rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12/// Macro for profiling sections - only active in non-WASM builds
13#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15    ($name:expr, $profile:expr, $code:expr) => {{
16        let start = std::time::Instant::now();
17        let result = $code;
18        if $profile {
19            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20        }
21        result
22    }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30// Comprehensive link pattern that captures both inline and reference links
31// Use (?s) flag to make . match newlines
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        r#"(?sx)
35        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36        (?:
37            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
38            |
39            \[([^\]]*)\]      # Reference ID in group 6
40        )"#
41    ).unwrap()
42});
43
44// Image pattern (similar to links but with ! prefix)
45// Use (?s) flag to make . match newlines
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(
48        r#"(?sx)
49        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50        (?:
51            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
52            |
53            \[([^\]]*)\]      # Reference ID in group 6
54        )"#
55    ).unwrap()
56});
57
58// Reference definition pattern
59static REF_DEF_PATTERN: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62// Pattern for bare URLs - uses centralized URL pattern from regex_cache
63
64// Pattern for email addresses
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68// Pattern for blockquote prefix in parse_list_blocks
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71/// Pre-computed information about a line
72#[derive(Debug, Clone)]
73pub struct LineInfo {
74    /// Byte offset where this line starts in the document
75    pub byte_offset: usize,
76    /// Length of the line in bytes (without newline)
77    pub byte_len: usize,
78    /// Number of bytes of leading whitespace (for substring extraction)
79    pub indent: usize,
80    /// Visual column width of leading whitespace (with proper tab expansion)
81    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
82    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
83    pub visual_indent: usize,
84    /// Whether the line is blank (empty or only whitespace)
85    pub is_blank: bool,
86    /// Whether this line is inside a code block
87    pub in_code_block: bool,
88    /// Whether this line is inside front matter
89    pub in_front_matter: bool,
90    /// Whether this line is inside an HTML block
91    pub in_html_block: bool,
92    /// Whether this line is inside an HTML comment
93    pub in_html_comment: bool,
94    /// List item information if this line starts a list item
95    pub list_item: Option<ListItemInfo>,
96    /// Heading information if this line is a heading
97    pub heading: Option<HeadingInfo>,
98    /// Blockquote information if this line is a blockquote
99    pub blockquote: Option<BlockquoteInfo>,
100    /// Whether this line is inside a mkdocstrings autodoc block
101    pub in_mkdocstrings: bool,
102    /// Whether this line is part of an ESM import/export block (MDX only)
103    pub in_esm_block: bool,
104    /// Whether this line is a continuation of a multi-line code span from a previous line
105    pub in_code_span_continuation: bool,
106    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
107    /// Pre-computed for consistent detection across all rules
108    pub is_horizontal_rule: bool,
109    /// Whether this line is inside a math block ($$ ... $$)
110    pub in_math_block: bool,
111}
112
113impl LineInfo {
114    /// Get the line content as a string slice from the source document
115    pub fn content<'a>(&self, source: &'a str) -> &'a str {
116        &source[self.byte_offset..self.byte_offset + self.byte_len]
117    }
118}
119
120/// Information about a list item
121#[derive(Debug, Clone)]
122pub struct ListItemInfo {
123    /// The marker used (*, -, +, or number with . or ))
124    pub marker: String,
125    /// Whether it's ordered (true) or unordered (false)
126    pub is_ordered: bool,
127    /// The number for ordered lists
128    pub number: Option<usize>,
129    /// Column where the marker starts (0-based)
130    pub marker_column: usize,
131    /// Column where content after marker starts
132    pub content_column: usize,
133}
134
135/// Heading style type
136#[derive(Debug, Clone, PartialEq)]
137pub enum HeadingStyle {
138    /// ATX style heading (# Heading)
139    ATX,
140    /// Setext style heading with = underline
141    Setext1,
142    /// Setext style heading with - underline
143    Setext2,
144}
145
146/// Parsed link information
147#[derive(Debug, Clone)]
148pub struct ParsedLink<'a> {
149    /// Line number (1-indexed)
150    pub line: usize,
151    /// Start column (0-indexed) in the line
152    pub start_col: usize,
153    /// End column (0-indexed) in the line
154    pub end_col: usize,
155    /// Byte offset in document
156    pub byte_offset: usize,
157    /// End byte offset in document
158    pub byte_end: usize,
159    /// Link text
160    pub text: Cow<'a, str>,
161    /// Link URL or reference
162    pub url: Cow<'a, str>,
163    /// Whether this is a reference link [text][ref] vs inline [text](url)
164    pub is_reference: bool,
165    /// Reference ID for reference links
166    pub reference_id: Option<Cow<'a, str>>,
167    /// Link type from pulldown-cmark
168    pub link_type: LinkType,
169}
170
171/// Information about a broken link reported by pulldown-cmark
172#[derive(Debug, Clone)]
173pub struct BrokenLinkInfo {
174    /// The reference text that couldn't be resolved
175    pub reference: String,
176    /// Byte span in the source document
177    pub span: std::ops::Range<usize>,
178}
179
180/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
181#[derive(Debug, Clone)]
182pub struct FootnoteRef {
183    /// The footnote ID (without the ^ prefix)
184    pub id: String,
185    /// Line number (1-indexed)
186    pub line: usize,
187    /// Start byte offset in document
188    pub byte_offset: usize,
189    /// End byte offset in document
190    pub byte_end: usize,
191}
192
193/// Parsed image information
194#[derive(Debug, Clone)]
195pub struct ParsedImage<'a> {
196    /// Line number (1-indexed)
197    pub line: usize,
198    /// Start column (0-indexed) in the line
199    pub start_col: usize,
200    /// End column (0-indexed) in the line
201    pub end_col: usize,
202    /// Byte offset in document
203    pub byte_offset: usize,
204    /// End byte offset in document
205    pub byte_end: usize,
206    /// Alt text
207    pub alt_text: Cow<'a, str>,
208    /// Image URL or reference
209    pub url: Cow<'a, str>,
210    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
211    pub is_reference: bool,
212    /// Reference ID for reference images
213    pub reference_id: Option<Cow<'a, str>>,
214    /// Link type from pulldown-cmark
215    pub link_type: LinkType,
216}
217
218/// Reference definition [ref]: url "title"
219#[derive(Debug, Clone)]
220pub struct ReferenceDef {
221    /// Line number (1-indexed)
222    pub line: usize,
223    /// Reference ID (normalized to lowercase)
224    pub id: String,
225    /// URL
226    pub url: String,
227    /// Optional title
228    pub title: Option<String>,
229    /// Byte offset where the reference definition starts
230    pub byte_offset: usize,
231    /// Byte offset where the reference definition ends
232    pub byte_end: usize,
233    /// Byte offset where the title starts (if present, includes quote)
234    pub title_byte_start: Option<usize>,
235    /// Byte offset where the title ends (if present, includes quote)
236    pub title_byte_end: Option<usize>,
237}
238
239/// Parsed code span information
240#[derive(Debug, Clone)]
241pub struct CodeSpan {
242    /// Line number where the code span starts (1-indexed)
243    pub line: usize,
244    /// Line number where the code span ends (1-indexed)
245    pub end_line: usize,
246    /// Start column (0-indexed) in the line
247    pub start_col: usize,
248    /// End column (0-indexed) in the line
249    pub end_col: usize,
250    /// Byte offset in document
251    pub byte_offset: usize,
252    /// End byte offset in document
253    pub byte_end: usize,
254    /// Number of backticks used (1, 2, 3, etc.)
255    pub backtick_count: usize,
256    /// Content inside the code span (without backticks)
257    pub content: String,
258}
259
260/// Information about a heading
261#[derive(Debug, Clone)]
262pub struct HeadingInfo {
263    /// Heading level (1-6 for ATX, 1-2 for Setext)
264    pub level: u8,
265    /// Style of heading
266    pub style: HeadingStyle,
267    /// The heading marker (# characters or underline)
268    pub marker: String,
269    /// Column where the marker starts (0-based)
270    pub marker_column: usize,
271    /// Column where heading text starts
272    pub content_column: usize,
273    /// The heading text (without markers and without custom ID syntax)
274    pub text: String,
275    /// Custom header ID if present (e.g., from {#custom-id} syntax)
276    pub custom_id: Option<String>,
277    /// Original heading text including custom ID syntax
278    pub raw_text: String,
279    /// Whether it has a closing sequence (for ATX)
280    pub has_closing_sequence: bool,
281    /// The closing sequence if present
282    pub closing_sequence: String,
283    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
284    /// False for malformed headings like `#NoSpace` that MD018 should flag
285    pub is_valid: bool,
286}
287
288/// A valid heading from a filtered iteration
289///
290/// Only includes headings that are CommonMark-compliant (have space after #).
291/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
292#[derive(Debug, Clone)]
293pub struct ValidHeading<'a> {
294    /// The 1-indexed line number in the document
295    pub line_num: usize,
296    /// Reference to the heading information
297    pub heading: &'a HeadingInfo,
298    /// Reference to the full line info (for rules that need additional context)
299    pub line_info: &'a LineInfo,
300}
301
302/// Iterator over valid CommonMark headings in a document
303///
304/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
305/// but should not be processed by other heading rules.
306pub struct ValidHeadingsIter<'a> {
307    lines: &'a [LineInfo],
308    current_index: usize,
309}
310
311impl<'a> ValidHeadingsIter<'a> {
312    fn new(lines: &'a [LineInfo]) -> Self {
313        Self {
314            lines,
315            current_index: 0,
316        }
317    }
318}
319
320impl<'a> Iterator for ValidHeadingsIter<'a> {
321    type Item = ValidHeading<'a>;
322
323    fn next(&mut self) -> Option<Self::Item> {
324        while self.current_index < self.lines.len() {
325            let idx = self.current_index;
326            self.current_index += 1;
327
328            let line_info = &self.lines[idx];
329            if let Some(heading) = &line_info.heading
330                && heading.is_valid
331            {
332                return Some(ValidHeading {
333                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
334                    heading,
335                    line_info,
336                });
337            }
338        }
339        None
340    }
341}
342
343/// Information about a blockquote line
344#[derive(Debug, Clone)]
345pub struct BlockquoteInfo {
346    /// Nesting level (1 for >, 2 for >>, etc.)
347    pub nesting_level: usize,
348    /// The indentation before the blockquote marker
349    pub indent: String,
350    /// Column where the first > starts (0-based)
351    pub marker_column: usize,
352    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
353    pub prefix: String,
354    /// Content after the blockquote marker(s)
355    pub content: String,
356    /// Whether the line has no space after the marker
357    pub has_no_space_after_marker: bool,
358    /// Whether the line has multiple spaces after the marker
359    pub has_multiple_spaces_after_marker: bool,
360    /// Whether this is an empty blockquote line needing MD028 fix
361    pub needs_md028_fix: bool,
362}
363
364/// Information about a list block
365#[derive(Debug, Clone)]
366pub struct ListBlock {
367    /// Line number where the list starts (1-indexed)
368    pub start_line: usize,
369    /// Line number where the list ends (1-indexed)
370    pub end_line: usize,
371    /// Whether it's ordered or unordered
372    pub is_ordered: bool,
373    /// The consistent marker for unordered lists (if any)
374    pub marker: Option<String>,
375    /// Blockquote prefix for this list (empty if not in blockquote)
376    pub blockquote_prefix: String,
377    /// Lines that are list items within this block
378    pub item_lines: Vec<usize>,
379    /// Nesting level (0 for top-level lists)
380    pub nesting_level: usize,
381    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
382    pub max_marker_width: usize,
383}
384
385use std::sync::{Arc, OnceLock};
386
387/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
388type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
389
390/// Character frequency data for fast content analysis
391#[derive(Debug, Clone, Default)]
392pub struct CharFrequency {
393    /// Count of # characters (headings)
394    pub hash_count: usize,
395    /// Count of * characters (emphasis, lists, horizontal rules)
396    pub asterisk_count: usize,
397    /// Count of _ characters (emphasis, horizontal rules)
398    pub underscore_count: usize,
399    /// Count of - characters (lists, horizontal rules, setext headings)
400    pub hyphen_count: usize,
401    /// Count of + characters (lists)
402    pub plus_count: usize,
403    /// Count of > characters (blockquotes)
404    pub gt_count: usize,
405    /// Count of | characters (tables)
406    pub pipe_count: usize,
407    /// Count of [ characters (links, images)
408    pub bracket_count: usize,
409    /// Count of ` characters (code spans, code blocks)
410    pub backtick_count: usize,
411    /// Count of < characters (HTML tags, autolinks)
412    pub lt_count: usize,
413    /// Count of ! characters (images)
414    pub exclamation_count: usize,
415    /// Count of newline characters
416    pub newline_count: usize,
417}
418
419/// Pre-parsed HTML tag information
420#[derive(Debug, Clone)]
421pub struct HtmlTag {
422    /// Line number (1-indexed)
423    pub line: usize,
424    /// Start column (0-indexed) in the line
425    pub start_col: usize,
426    /// End column (0-indexed) in the line
427    pub end_col: usize,
428    /// Byte offset in document
429    pub byte_offset: usize,
430    /// End byte offset in document
431    pub byte_end: usize,
432    /// Tag name (e.g., "div", "img", "br")
433    pub tag_name: String,
434    /// Whether it's a closing tag (`</tag>`)
435    pub is_closing: bool,
436    /// Whether it's self-closing (`<tag />`)
437    pub is_self_closing: bool,
438    /// Raw tag content
439    pub raw_content: String,
440}
441
442/// Pre-parsed emphasis span information
443#[derive(Debug, Clone)]
444pub struct EmphasisSpan {
445    /// Line number (1-indexed)
446    pub line: usize,
447    /// Start column (0-indexed) in the line
448    pub start_col: usize,
449    /// End column (0-indexed) in the line
450    pub end_col: usize,
451    /// Byte offset in document
452    pub byte_offset: usize,
453    /// End byte offset in document
454    pub byte_end: usize,
455    /// Type of emphasis ('*' or '_')
456    pub marker: char,
457    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
458    pub marker_count: usize,
459    /// Content inside the emphasis
460    pub content: String,
461}
462
463/// Pre-parsed table row information
464#[derive(Debug, Clone)]
465pub struct TableRow {
466    /// Line number (1-indexed)
467    pub line: usize,
468    /// Whether this is a separator row (contains only |, -, :, and spaces)
469    pub is_separator: bool,
470    /// Number of columns (pipe-separated cells)
471    pub column_count: usize,
472    /// Alignment info from separator row
473    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
474}
475
476/// Pre-parsed bare URL information (not in links)
477#[derive(Debug, Clone)]
478pub struct BareUrl {
479    /// Line number (1-indexed)
480    pub line: usize,
481    /// Start column (0-indexed) in the line
482    pub start_col: usize,
483    /// End column (0-indexed) in the line
484    pub end_col: usize,
485    /// Byte offset in document
486    pub byte_offset: usize,
487    /// End byte offset in document
488    pub byte_end: usize,
489    /// The URL string
490    pub url: String,
491    /// Type of URL ("http", "https", "ftp", "email")
492    pub url_type: String,
493}
494
495pub struct LintContext<'a> {
496    pub content: &'a str,
497    pub line_offsets: Vec<usize>,
498    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
499    pub lines: Vec<LineInfo>,             // Pre-computed line information
500    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
501    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
502    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
503    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
504    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
505    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
506    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
507    pub char_frequency: CharFrequency,    // Character frequency analysis
508    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
509    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
510    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
511    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
512    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
513    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
514    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
515    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
516    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
517    pub flavor: MarkdownFlavor,           // Markdown flavor being used
518    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
519}
520
521/// Detailed blockquote parse result with all components
522struct BlockquoteComponents<'a> {
523    indent: &'a str,
524    markers: &'a str,
525    spaces_after: &'a str,
526    content: &'a str,
527}
528
529/// Parse blockquote prefix with detailed components using manual parsing
530#[inline]
531fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
532    let bytes = line.as_bytes();
533    let mut pos = 0;
534
535    // Parse leading whitespace (indent)
536    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
537        pos += 1;
538    }
539    let indent_end = pos;
540
541    // Must have at least one '>' marker
542    if pos >= bytes.len() || bytes[pos] != b'>' {
543        return None;
544    }
545
546    // Parse '>' markers
547    while pos < bytes.len() && bytes[pos] == b'>' {
548        pos += 1;
549    }
550    let markers_end = pos;
551
552    // Parse spaces after markers
553    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
554        pos += 1;
555    }
556    let spaces_end = pos;
557
558    Some(BlockquoteComponents {
559        indent: &line[0..indent_end],
560        markers: &line[indent_end..markers_end],
561        spaces_after: &line[markers_end..spaces_end],
562        content: &line[spaces_end..],
563    })
564}
565
566impl<'a> LintContext<'a> {
567    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
568        #[cfg(not(target_arch = "wasm32"))]
569        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
570        #[cfg(target_arch = "wasm32")]
571        let profile = false;
572
573        let line_offsets = profile_section!("Line offsets", profile, {
574            let mut offsets = vec![0];
575            for (i, c) in content.char_indices() {
576                if c == '\n' {
577                    offsets.push(i + 1);
578                }
579            }
580            offsets
581        });
582
583        // Detect code blocks once and cache them
584        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
585
586        // Pre-compute HTML comment ranges ONCE for all operations
587        let html_comment_ranges = profile_section!(
588            "HTML comment ranges",
589            profile,
590            crate::utils::skip_context::compute_html_comment_ranges(content)
591        );
592
593        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
594        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
595            if flavor == MarkdownFlavor::MkDocs {
596                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
597            } else {
598                Vec::new()
599            }
600        });
601
602        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
603        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
604        let (mut lines, emphasis_spans) = profile_section!(
605            "Basic line info",
606            profile,
607            Self::compute_basic_line_info(
608                content,
609                &line_offsets,
610                &code_blocks,
611                flavor,
612                &html_comment_ranges,
613                &autodoc_ranges,
614            )
615        );
616
617        // Detect HTML blocks BEFORE heading detection
618        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
619
620        // Detect ESM import/export blocks in MDX files BEFORE heading detection
621        profile_section!(
622            "ESM blocks",
623            profile,
624            Self::detect_esm_blocks(content, &mut lines, flavor)
625        );
626
627        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
628        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
629
630        // Now detect headings and blockquotes
631        profile_section!(
632            "Headings & blockquotes",
633            profile,
634            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
635        );
636
637        // Parse code spans early so we can exclude them from link/image parsing
638        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
639
640        // Mark lines that are continuations of multi-line code spans
641        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
642        for span in &code_spans {
643            if span.end_line > span.line {
644                // Mark lines after the first line as continuations
645                for line_num in (span.line + 1)..=span.end_line {
646                    if let Some(line_info) = lines.get_mut(line_num - 1) {
647                        line_info.in_code_span_continuation = true;
648                    }
649                }
650            }
651        }
652
653        // Parse links, images, references, and list blocks
654        let (links, broken_links, footnote_refs) = profile_section!(
655            "Links",
656            profile,
657            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
658        );
659
660        let images = profile_section!(
661            "Images",
662            profile,
663            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
664        );
665
666        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
667
668        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
669
670        // Compute character frequency for fast content analysis
671        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
672
673        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
674        let table_blocks = profile_section!(
675            "Table blocks",
676            profile,
677            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
678                content,
679                &code_blocks,
680                &code_spans,
681                &html_comment_ranges,
682            )
683        );
684
685        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
686        let line_index = profile_section!(
687            "Line index",
688            profile,
689            crate::utils::range_utils::LineIndex::new(content)
690        );
691
692        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
693        let jinja_ranges = profile_section!(
694            "Jinja ranges",
695            profile,
696            crate::utils::jinja_utils::find_jinja_ranges(content)
697        );
698
699        Self {
700            content,
701            line_offsets,
702            code_blocks,
703            lines,
704            links,
705            images,
706            broken_links,
707            footnote_refs,
708            reference_defs,
709            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
710            list_blocks,
711            char_frequency,
712            html_tags_cache: OnceLock::new(),
713            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
714            table_rows_cache: OnceLock::new(),
715            bare_urls_cache: OnceLock::new(),
716            has_mixed_list_nesting_cache: OnceLock::new(),
717            html_comment_ranges,
718            table_blocks,
719            line_index,
720            jinja_ranges,
721            flavor,
722            source_file,
723        }
724    }
725
726    /// Get code spans - computed lazily on first access
727    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
728        Arc::clone(
729            self.code_spans_cache
730                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
731        )
732    }
733
734    /// Get HTML comment ranges - pre-computed during LintContext construction
735    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
736        &self.html_comment_ranges
737    }
738
739    /// Get HTML tags - computed lazily on first access
740    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
741        Arc::clone(self.html_tags_cache.get_or_init(|| {
742            Arc::new(Self::parse_html_tags(
743                self.content,
744                &self.lines,
745                &self.code_blocks,
746                self.flavor,
747            ))
748        }))
749    }
750
751    /// Get emphasis spans - pre-computed during construction
752    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
753        Arc::clone(
754            self.emphasis_spans_cache
755                .get()
756                .expect("emphasis_spans_cache initialized during construction"),
757        )
758    }
759
760    /// Get table rows - computed lazily on first access
761    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
762        Arc::clone(
763            self.table_rows_cache
764                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
765        )
766    }
767
768    /// Get bare URLs - computed lazily on first access
769    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
770        Arc::clone(
771            self.bare_urls_cache
772                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
773        )
774    }
775
776    /// Check if document has mixed ordered/unordered list nesting.
777    /// Result is cached after first computation (document-level invariant).
778    /// This is used by MD007 for smart style auto-detection.
779    pub fn has_mixed_list_nesting(&self) -> bool {
780        *self
781            .has_mixed_list_nesting_cache
782            .get_or_init(|| self.compute_mixed_list_nesting())
783    }
784
785    /// Internal computation for mixed list nesting (only called once per LintContext).
786    fn compute_mixed_list_nesting(&self) -> bool {
787        // Track parent list items by their marker position and type
788        // Using marker_column instead of indent because it works correctly
789        // for blockquoted content where indent doesn't account for the prefix
790        // Stack stores: (marker_column, is_ordered)
791        let mut stack: Vec<(usize, bool)> = Vec::new();
792        let mut last_was_blank = false;
793
794        for line_info in &self.lines {
795            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
796            if line_info.in_code_block
797                || line_info.in_front_matter
798                || line_info.in_mkdocstrings
799                || line_info.in_html_comment
800                || line_info.in_esm_block
801            {
802                continue;
803            }
804
805            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
806            if line_info.is_blank {
807                last_was_blank = true;
808                continue;
809            }
810
811            if let Some(list_item) = &line_info.list_item {
812                // Normalize column 1 to column 0 (consistent with MD007 check function)
813                let current_pos = if list_item.marker_column == 1 {
814                    0
815                } else {
816                    list_item.marker_column
817                };
818
819                // If there was a blank line and this item is at root level, reset stack
820                if last_was_blank && current_pos == 0 {
821                    stack.clear();
822                }
823                last_was_blank = false;
824
825                // Pop items at same or greater position (they're siblings or deeper, not parents)
826                while let Some(&(pos, _)) = stack.last() {
827                    if pos >= current_pos {
828                        stack.pop();
829                    } else {
830                        break;
831                    }
832                }
833
834                // Check if immediate parent has different type - this is mixed nesting
835                if let Some(&(_, parent_is_ordered)) = stack.last()
836                    && parent_is_ordered != list_item.is_ordered
837                {
838                    return true; // Found mixed nesting - early exit
839                }
840
841                stack.push((current_pos, list_item.is_ordered));
842            } else {
843                // Non-list line (but not blank) - could be paragraph or other content
844                last_was_blank = false;
845            }
846        }
847
848        false
849    }
850
851    /// Map a byte offset to (line, column)
852    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
853        match self.line_offsets.binary_search(&offset) {
854            Ok(line) => (line + 1, 1),
855            Err(line) => {
856                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
857                (line, offset - line_start + 1)
858            }
859        }
860    }
861
862    /// Check if a position is within a code block or code span
863    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
864        // Check code blocks first
865        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
866            return true;
867        }
868
869        // Check inline code spans (lazy load if needed)
870        self.code_spans()
871            .iter()
872            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
873    }
874
875    /// Get line information by line number (1-indexed)
876    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
877        if line_num > 0 {
878            self.lines.get(line_num - 1)
879        } else {
880            None
881        }
882    }
883
884    /// Get byte offset for a line number (1-indexed)
885    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
886        self.line_info(line_num).map(|info| info.byte_offset)
887    }
888
889    /// Get URL for a reference link/image by its ID
890    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
891        let normalized_id = ref_id.to_lowercase();
892        self.reference_defs
893            .iter()
894            .find(|def| def.id == normalized_id)
895            .map(|def| def.url.as_str())
896    }
897
898    /// Check if a line is part of a list block
899    pub fn is_in_list_block(&self, line_num: usize) -> bool {
900        self.list_blocks
901            .iter()
902            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
903    }
904
905    /// Get the list block containing a specific line
906    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
907        self.list_blocks
908            .iter()
909            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
910    }
911
912    // Compatibility methods for DocumentStructure migration
913
914    /// Check if a line is within a code block
915    pub fn is_in_code_block(&self, line_num: usize) -> bool {
916        if line_num == 0 || line_num > self.lines.len() {
917            return false;
918        }
919        self.lines[line_num - 1].in_code_block
920    }
921
922    /// Check if a line is within front matter
923    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
924        if line_num == 0 || line_num > self.lines.len() {
925            return false;
926        }
927        self.lines[line_num - 1].in_front_matter
928    }
929
930    /// Check if a line is within an HTML block
931    pub fn is_in_html_block(&self, line_num: usize) -> bool {
932        if line_num == 0 || line_num > self.lines.len() {
933            return false;
934        }
935        self.lines[line_num - 1].in_html_block
936    }
937
938    /// Check if a line and column is within a code span
939    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
940        if line_num == 0 || line_num > self.lines.len() {
941            return false;
942        }
943
944        // Use the code spans cache to check
945        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
946        // Convert col to 0-indexed for comparison
947        let col_0indexed = if col > 0 { col - 1 } else { 0 };
948        let code_spans = self.code_spans();
949        code_spans.iter().any(|span| {
950            // Check if line is within the span's line range
951            if line_num < span.line || line_num > span.end_line {
952                return false;
953            }
954
955            if span.line == span.end_line {
956                // Single-line span: check column bounds
957                col_0indexed >= span.start_col && col_0indexed < span.end_col
958            } else if line_num == span.line {
959                // First line of multi-line span: anything after start_col is in span
960                col_0indexed >= span.start_col
961            } else if line_num == span.end_line {
962                // Last line of multi-line span: anything before end_col is in span
963                col_0indexed < span.end_col
964            } else {
965                // Middle line of multi-line span: entire line is in span
966                true
967            }
968        })
969    }
970
971    /// Check if a byte offset is within a code span
972    #[inline]
973    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
974        let code_spans = self.code_spans();
975        code_spans
976            .iter()
977            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
978    }
979
980    /// Check if a byte position is within a reference definition
981    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
982    #[inline]
983    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
984        self.reference_defs
985            .iter()
986            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
987    }
988
989    /// Check if a byte position is within an HTML comment
990    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
991    /// where k is the number of HTML comments (typically very small)
992    #[inline]
993    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
994        self.html_comment_ranges
995            .iter()
996            .any(|range| byte_pos >= range.start && byte_pos < range.end)
997    }
998
999    /// Check if a byte position is within an HTML tag (including multiline tags)
1000    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1001    #[inline]
1002    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1003        self.html_tags()
1004            .iter()
1005            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1006    }
1007
1008    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1009    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1010        self.jinja_ranges
1011            .iter()
1012            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1013    }
1014
1015    /// Check if a byte position is within a link reference definition title
1016    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1017        self.reference_defs.iter().any(|def| {
1018            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1019                byte_pos >= start && byte_pos < end
1020            } else {
1021                false
1022            }
1023        })
1024    }
1025
1026    /// Check if content has any instances of a specific character (fast)
1027    pub fn has_char(&self, ch: char) -> bool {
1028        match ch {
1029            '#' => self.char_frequency.hash_count > 0,
1030            '*' => self.char_frequency.asterisk_count > 0,
1031            '_' => self.char_frequency.underscore_count > 0,
1032            '-' => self.char_frequency.hyphen_count > 0,
1033            '+' => self.char_frequency.plus_count > 0,
1034            '>' => self.char_frequency.gt_count > 0,
1035            '|' => self.char_frequency.pipe_count > 0,
1036            '[' => self.char_frequency.bracket_count > 0,
1037            '`' => self.char_frequency.backtick_count > 0,
1038            '<' => self.char_frequency.lt_count > 0,
1039            '!' => self.char_frequency.exclamation_count > 0,
1040            '\n' => self.char_frequency.newline_count > 0,
1041            _ => self.content.contains(ch), // Fallback for other characters
1042        }
1043    }
1044
1045    /// Get count of a specific character (fast)
1046    pub fn char_count(&self, ch: char) -> usize {
1047        match ch {
1048            '#' => self.char_frequency.hash_count,
1049            '*' => self.char_frequency.asterisk_count,
1050            '_' => self.char_frequency.underscore_count,
1051            '-' => self.char_frequency.hyphen_count,
1052            '+' => self.char_frequency.plus_count,
1053            '>' => self.char_frequency.gt_count,
1054            '|' => self.char_frequency.pipe_count,
1055            '[' => self.char_frequency.bracket_count,
1056            '`' => self.char_frequency.backtick_count,
1057            '<' => self.char_frequency.lt_count,
1058            '!' => self.char_frequency.exclamation_count,
1059            '\n' => self.char_frequency.newline_count,
1060            _ => self.content.matches(ch).count(), // Fallback for other characters
1061        }
1062    }
1063
1064    /// Check if content likely contains headings (fast)
1065    pub fn likely_has_headings(&self) -> bool {
1066        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1067    }
1068
1069    /// Check if content likely contains lists (fast)
1070    pub fn likely_has_lists(&self) -> bool {
1071        self.char_frequency.asterisk_count > 0
1072            || self.char_frequency.hyphen_count > 0
1073            || self.char_frequency.plus_count > 0
1074    }
1075
1076    /// Check if content likely contains emphasis (fast)
1077    pub fn likely_has_emphasis(&self) -> bool {
1078        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1079    }
1080
1081    /// Check if content likely contains tables (fast)
1082    pub fn likely_has_tables(&self) -> bool {
1083        self.char_frequency.pipe_count > 2
1084    }
1085
1086    /// Check if content likely contains blockquotes (fast)
1087    pub fn likely_has_blockquotes(&self) -> bool {
1088        self.char_frequency.gt_count > 0
1089    }
1090
1091    /// Check if content likely contains code (fast)
1092    pub fn likely_has_code(&self) -> bool {
1093        self.char_frequency.backtick_count > 0
1094    }
1095
1096    /// Check if content likely contains links or images (fast)
1097    pub fn likely_has_links_or_images(&self) -> bool {
1098        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1099    }
1100
1101    /// Check if content likely contains HTML (fast)
1102    pub fn likely_has_html(&self) -> bool {
1103        self.char_frequency.lt_count > 0
1104    }
1105
1106    /// Get HTML tags on a specific line
1107    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1108        self.html_tags()
1109            .iter()
1110            .filter(|tag| tag.line == line_num)
1111            .cloned()
1112            .collect()
1113    }
1114
1115    /// Get emphasis spans on a specific line
1116    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1117        self.emphasis_spans()
1118            .iter()
1119            .filter(|span| span.line == line_num)
1120            .cloned()
1121            .collect()
1122    }
1123
1124    /// Get table rows on a specific line
1125    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1126        self.table_rows()
1127            .iter()
1128            .filter(|row| row.line == line_num)
1129            .cloned()
1130            .collect()
1131    }
1132
1133    /// Get bare URLs on a specific line
1134    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1135        self.bare_urls()
1136            .iter()
1137            .filter(|url| url.line == line_num)
1138            .cloned()
1139            .collect()
1140    }
1141
1142    /// Find the line index for a given byte offset using binary search.
1143    /// Returns (line_index, line_number, column) where:
1144    /// - line_index is the 0-based index in the lines array
1145    /// - line_number is the 1-based line number
1146    /// - column is the byte offset within that line
1147    #[inline]
1148    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1149        // Binary search to find the line containing this byte offset
1150        let idx = match lines.binary_search_by(|line| {
1151            if byte_offset < line.byte_offset {
1152                std::cmp::Ordering::Greater
1153            } else if byte_offset > line.byte_offset + line.byte_len {
1154                std::cmp::Ordering::Less
1155            } else {
1156                std::cmp::Ordering::Equal
1157            }
1158        }) {
1159            Ok(idx) => idx,
1160            Err(idx) => idx.saturating_sub(1),
1161        };
1162
1163        let line = &lines[idx];
1164        let line_num = idx + 1;
1165        let col = byte_offset.saturating_sub(line.byte_offset);
1166
1167        (idx, line_num, col)
1168    }
1169
1170    /// Check if a byte offset is within a code span using binary search
1171    #[inline]
1172    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1173        // Since spans are sorted by byte_offset, use partition_point for binary search
1174        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1175
1176        // Check the span that starts at or before our offset
1177        if idx > 0 {
1178            let span = &code_spans[idx - 1];
1179            if offset >= span.byte_offset && offset < span.byte_end {
1180                return true;
1181            }
1182        }
1183
1184        false
1185    }
1186
1187    /// Collect byte ranges of all links using pulldown-cmark
1188    /// This is used to skip heading detection for lines that fall within link syntax
1189    /// (e.g., multiline links like `[text](url\n#fragment)`)
1190    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1191        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1192
1193        let mut link_ranges = Vec::new();
1194        let mut options = Options::empty();
1195        options.insert(Options::ENABLE_WIKILINKS);
1196        options.insert(Options::ENABLE_FOOTNOTES);
1197
1198        let parser = Parser::new_ext(content, options).into_offset_iter();
1199        let mut link_stack: Vec<usize> = Vec::new();
1200
1201        for (event, range) in parser {
1202            match event {
1203                Event::Start(Tag::Link { .. }) => {
1204                    link_stack.push(range.start);
1205                }
1206                Event::End(TagEnd::Link) => {
1207                    if let Some(start_pos) = link_stack.pop() {
1208                        link_ranges.push((start_pos, range.end));
1209                    }
1210                }
1211                _ => {}
1212            }
1213        }
1214
1215        link_ranges
1216    }
1217
1218    /// Parse all links in the content
1219    fn parse_links(
1220        content: &'a str,
1221        lines: &[LineInfo],
1222        code_blocks: &[(usize, usize)],
1223        code_spans: &[CodeSpan],
1224        flavor: MarkdownFlavor,
1225        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1226    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1227        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1228        use std::collections::HashSet;
1229
1230        let mut links = Vec::with_capacity(content.len() / 500);
1231        let mut broken_links = Vec::new();
1232        let mut footnote_refs = Vec::new();
1233
1234        // Track byte positions of links found by pulldown-cmark
1235        let mut found_positions = HashSet::new();
1236
1237        // Use pulldown-cmark's streaming parser with BrokenLink callback
1238        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1239        // This automatically handles:
1240        // - Escaped links (won't generate events)
1241        // - Links in code blocks/spans (won't generate Link events)
1242        // - Images (generates Tag::Image instead)
1243        // - Reference resolution (dest_url is already resolved!)
1244        // - Broken references (callback is invoked)
1245        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1246        let mut options = Options::empty();
1247        options.insert(Options::ENABLE_WIKILINKS);
1248        options.insert(Options::ENABLE_FOOTNOTES);
1249
1250        let parser = Parser::new_with_broken_link_callback(
1251            content,
1252            options,
1253            Some(|link: BrokenLink<'_>| {
1254                broken_links.push(BrokenLinkInfo {
1255                    reference: link.reference.to_string(),
1256                    span: link.span.clone(),
1257                });
1258                None
1259            }),
1260        )
1261        .into_offset_iter();
1262
1263        let mut link_stack: Vec<(
1264            usize,
1265            usize,
1266            pulldown_cmark::CowStr<'a>,
1267            LinkType,
1268            pulldown_cmark::CowStr<'a>,
1269        )> = Vec::new();
1270        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1271
1272        for (event, range) in parser {
1273            match event {
1274                Event::Start(Tag::Link {
1275                    link_type,
1276                    dest_url,
1277                    id,
1278                    ..
1279                }) => {
1280                    // Link start - record position, URL, and reference ID
1281                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1282                    text_chunks.clear();
1283                }
1284                Event::Text(text) if !link_stack.is_empty() => {
1285                    // Track text content with its byte range
1286                    text_chunks.push((text.to_string(), range.start, range.end));
1287                }
1288                Event::Code(code) if !link_stack.is_empty() => {
1289                    // Include inline code in link text (with backticks)
1290                    let code_text = format!("`{code}`");
1291                    text_chunks.push((code_text, range.start, range.end));
1292                }
1293                Event::End(TagEnd::Link) => {
1294                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1295                        // Skip if in HTML comment
1296                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1297                            text_chunks.clear();
1298                            continue;
1299                        }
1300
1301                        // Find line and column information
1302                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1303
1304                        // Skip if this link is on a MkDocs snippet line
1305                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1306                            text_chunks.clear();
1307                            continue;
1308                        }
1309
1310                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1311
1312                        let is_reference = matches!(
1313                            link_type,
1314                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1315                        );
1316
1317                        // Extract link text directly from source bytes to preserve escaping
1318                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1319                        let link_text = if start_pos < content.len() {
1320                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1321
1322                            // Find MATCHING ] by tracking bracket depth for nested brackets
1323                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1324                            // Brackets inside code spans (between backticks) should be ignored
1325                            let mut close_pos = None;
1326                            let mut depth = 0;
1327                            let mut in_code_span = false;
1328
1329                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1330                                // Count preceding backslashes
1331                                let mut backslash_count = 0;
1332                                let mut j = i;
1333                                while j > 0 && link_bytes[j - 1] == b'\\' {
1334                                    backslash_count += 1;
1335                                    j -= 1;
1336                                }
1337                                let is_escaped = backslash_count % 2 != 0;
1338
1339                                // Track code spans - backticks toggle in/out of code
1340                                if byte == b'`' && !is_escaped {
1341                                    in_code_span = !in_code_span;
1342                                }
1343
1344                                // Only count brackets when NOT in a code span
1345                                if !is_escaped && !in_code_span {
1346                                    if byte == b'[' {
1347                                        depth += 1;
1348                                    } else if byte == b']' {
1349                                        if depth == 0 {
1350                                            // Found the matching closing bracket
1351                                            close_pos = Some(i);
1352                                            break;
1353                                        } else {
1354                                            depth -= 1;
1355                                        }
1356                                    }
1357                                }
1358                            }
1359
1360                            if let Some(pos) = close_pos {
1361                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1362                            } else {
1363                                Cow::Borrowed("")
1364                            }
1365                        } else {
1366                            Cow::Borrowed("")
1367                        };
1368
1369                        // For reference links, use the actual reference ID from pulldown-cmark
1370                        let reference_id = if is_reference && !ref_id.is_empty() {
1371                            Some(Cow::Owned(ref_id.to_lowercase()))
1372                        } else if is_reference {
1373                            // For collapsed/shortcut references without explicit ID, use the link text
1374                            Some(Cow::Owned(link_text.to_lowercase()))
1375                        } else {
1376                            None
1377                        };
1378
1379                        // Track this position as found
1380                        found_positions.insert(start_pos);
1381
1382                        links.push(ParsedLink {
1383                            line: line_num,
1384                            start_col: col_start,
1385                            end_col: col_end,
1386                            byte_offset: start_pos,
1387                            byte_end: range.end,
1388                            text: link_text,
1389                            url: Cow::Owned(url.to_string()),
1390                            is_reference,
1391                            reference_id,
1392                            link_type,
1393                        });
1394
1395                        text_chunks.clear();
1396                    }
1397                }
1398                Event::FootnoteReference(footnote_id) => {
1399                    // Capture footnote references like [^1], [^note]
1400                    // Skip if in HTML comment
1401                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1402                        continue;
1403                    }
1404
1405                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1406                    footnote_refs.push(FootnoteRef {
1407                        id: footnote_id.to_string(),
1408                        line: line_num,
1409                        byte_offset: range.start,
1410                        byte_end: range.end,
1411                    });
1412                }
1413                _ => {}
1414            }
1415        }
1416
1417        // Also find undefined references using regex
1418        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1419        // because the reference is undefined
1420        for cap in LINK_PATTERN.captures_iter(content) {
1421            let full_match = cap.get(0).unwrap();
1422            let match_start = full_match.start();
1423            let match_end = full_match.end();
1424
1425            // Skip if this was already found by pulldown-cmark (it's a valid link)
1426            if found_positions.contains(&match_start) {
1427                continue;
1428            }
1429
1430            // Skip if escaped
1431            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1432                continue;
1433            }
1434
1435            // Skip if it's an image
1436            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1437                continue;
1438            }
1439
1440            // Skip if in code block
1441            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1442                continue;
1443            }
1444
1445            // Skip if in code span
1446            if Self::is_offset_in_code_span(code_spans, match_start) {
1447                continue;
1448            }
1449
1450            // Skip if in HTML comment
1451            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1452                continue;
1453            }
1454
1455            // Find line and column information
1456            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1457
1458            // Skip if this link is on a MkDocs snippet line
1459            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1460                continue;
1461            }
1462
1463            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1464
1465            let text = cap.get(1).map_or("", |m| m.as_str());
1466
1467            // Only process reference links (group 6)
1468            if let Some(ref_id) = cap.get(6) {
1469                let ref_id_str = ref_id.as_str();
1470                let normalized_ref = if ref_id_str.is_empty() {
1471                    Cow::Owned(text.to_lowercase()) // Implicit reference
1472                } else {
1473                    Cow::Owned(ref_id_str.to_lowercase())
1474                };
1475
1476                // This is an undefined reference (pulldown-cmark didn't parse it)
1477                links.push(ParsedLink {
1478                    line: line_num,
1479                    start_col: col_start,
1480                    end_col: col_end,
1481                    byte_offset: match_start,
1482                    byte_end: match_end,
1483                    text: Cow::Borrowed(text),
1484                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1485                    is_reference: true,
1486                    reference_id: Some(normalized_ref),
1487                    link_type: LinkType::Reference, // Undefined references are reference-style
1488                });
1489            }
1490        }
1491
1492        (links, broken_links, footnote_refs)
1493    }
1494
1495    /// Parse all images in the content
1496    fn parse_images(
1497        content: &'a str,
1498        lines: &[LineInfo],
1499        code_blocks: &[(usize, usize)],
1500        code_spans: &[CodeSpan],
1501        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1502    ) -> Vec<ParsedImage<'a>> {
1503        use crate::utils::skip_context::is_in_html_comment_ranges;
1504        use std::collections::HashSet;
1505
1506        // Pre-size based on a heuristic: images are less common than links
1507        let mut images = Vec::with_capacity(content.len() / 1000);
1508        let mut found_positions = HashSet::new();
1509
1510        // Use pulldown-cmark for parsing - more accurate and faster
1511        let parser = Parser::new(content).into_offset_iter();
1512        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1513            Vec::new();
1514        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1515
1516        for (event, range) in parser {
1517            match event {
1518                Event::Start(Tag::Image {
1519                    link_type,
1520                    dest_url,
1521                    id,
1522                    ..
1523                }) => {
1524                    image_stack.push((range.start, dest_url, link_type, id));
1525                    text_chunks.clear();
1526                }
1527                Event::Text(text) if !image_stack.is_empty() => {
1528                    text_chunks.push((text.to_string(), range.start, range.end));
1529                }
1530                Event::Code(code) if !image_stack.is_empty() => {
1531                    let code_text = format!("`{code}`");
1532                    text_chunks.push((code_text, range.start, range.end));
1533                }
1534                Event::End(TagEnd::Image) => {
1535                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1536                        // Skip if in code block
1537                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1538                            continue;
1539                        }
1540
1541                        // Skip if in code span
1542                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1543                            continue;
1544                        }
1545
1546                        // Skip if in HTML comment
1547                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1548                            continue;
1549                        }
1550
1551                        // Find line and column using binary search
1552                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1553                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1554
1555                        let is_reference = matches!(
1556                            link_type,
1557                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1558                        );
1559
1560                        // Extract alt text directly from source bytes to preserve escaping
1561                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1562                        let alt_text = if start_pos < content.len() {
1563                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1564
1565                            // Find MATCHING ] by tracking bracket depth for nested brackets
1566                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1567                            let mut close_pos = None;
1568                            let mut depth = 0;
1569
1570                            if image_bytes.len() > 2 {
1571                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1572                                    // Count preceding backslashes
1573                                    let mut backslash_count = 0;
1574                                    let mut j = i;
1575                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1576                                        backslash_count += 1;
1577                                        j -= 1;
1578                                    }
1579                                    let is_escaped = backslash_count % 2 != 0;
1580
1581                                    if !is_escaped {
1582                                        if byte == b'[' {
1583                                            depth += 1;
1584                                        } else if byte == b']' {
1585                                            if depth == 0 {
1586                                                // Found the matching closing bracket
1587                                                close_pos = Some(i);
1588                                                break;
1589                                            } else {
1590                                                depth -= 1;
1591                                            }
1592                                        }
1593                                    }
1594                                }
1595                            }
1596
1597                            if let Some(pos) = close_pos {
1598                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1599                            } else {
1600                                Cow::Borrowed("")
1601                            }
1602                        } else {
1603                            Cow::Borrowed("")
1604                        };
1605
1606                        let reference_id = if is_reference && !ref_id.is_empty() {
1607                            Some(Cow::Owned(ref_id.to_lowercase()))
1608                        } else if is_reference {
1609                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1610                        } else {
1611                            None
1612                        };
1613
1614                        found_positions.insert(start_pos);
1615                        images.push(ParsedImage {
1616                            line: line_num,
1617                            start_col: col_start,
1618                            end_col: col_end,
1619                            byte_offset: start_pos,
1620                            byte_end: range.end,
1621                            alt_text,
1622                            url: Cow::Owned(url.to_string()),
1623                            is_reference,
1624                            reference_id,
1625                            link_type,
1626                        });
1627                    }
1628                }
1629                _ => {}
1630            }
1631        }
1632
1633        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1634        for cap in IMAGE_PATTERN.captures_iter(content) {
1635            let full_match = cap.get(0).unwrap();
1636            let match_start = full_match.start();
1637            let match_end = full_match.end();
1638
1639            // Skip if already found by pulldown-cmark
1640            if found_positions.contains(&match_start) {
1641                continue;
1642            }
1643
1644            // Skip if the ! is escaped
1645            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1646                continue;
1647            }
1648
1649            // Skip if in code block, code span, or HTML comment
1650            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1651                || Self::is_offset_in_code_span(code_spans, match_start)
1652                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1653            {
1654                continue;
1655            }
1656
1657            // Only process reference images (undefined references not found by pulldown-cmark)
1658            if let Some(ref_id) = cap.get(6) {
1659                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1660                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1661                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1662                let ref_id_str = ref_id.as_str();
1663                let normalized_ref = if ref_id_str.is_empty() {
1664                    Cow::Owned(alt_text.to_lowercase())
1665                } else {
1666                    Cow::Owned(ref_id_str.to_lowercase())
1667                };
1668
1669                images.push(ParsedImage {
1670                    line: line_num,
1671                    start_col: col_start,
1672                    end_col: col_end,
1673                    byte_offset: match_start,
1674                    byte_end: match_end,
1675                    alt_text: Cow::Borrowed(alt_text),
1676                    url: Cow::Borrowed(""),
1677                    is_reference: true,
1678                    reference_id: Some(normalized_ref),
1679                    link_type: LinkType::Reference, // Undefined references are reference-style
1680                });
1681            }
1682        }
1683
1684        images
1685    }
1686
1687    /// Parse reference definitions
1688    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1689        // Pre-size based on lines count as reference definitions are line-based
1690        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1691
1692        for (line_idx, line_info) in lines.iter().enumerate() {
1693            // Skip lines in code blocks
1694            if line_info.in_code_block {
1695                continue;
1696            }
1697
1698            let line = line_info.content(content);
1699            let line_num = line_idx + 1;
1700
1701            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1702                let id_raw = cap.get(1).unwrap().as_str();
1703
1704                // Skip footnote definitions - they use [^id]: syntax and are semantically
1705                // different from reference link definitions
1706                if id_raw.starts_with('^') {
1707                    continue;
1708                }
1709
1710                let id = id_raw.to_lowercase();
1711                let url = cap.get(2).unwrap().as_str().to_string();
1712                let title_match = cap.get(3).or_else(|| cap.get(4));
1713                let title = title_match.map(|m| m.as_str().to_string());
1714
1715                // Calculate byte positions
1716                // The match starts at the beginning of the line (0) and extends to the end
1717                let match_obj = cap.get(0).unwrap();
1718                let byte_offset = line_info.byte_offset + match_obj.start();
1719                let byte_end = line_info.byte_offset + match_obj.end();
1720
1721                // Calculate title byte positions (includes the quote character before content)
1722                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1723                    // The match is the content inside quotes, so we include the quote before
1724                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1725                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1726                    (Some(start), Some(end))
1727                } else {
1728                    (None, None)
1729                };
1730
1731                refs.push(ReferenceDef {
1732                    line: line_num,
1733                    id,
1734                    url,
1735                    title,
1736                    byte_offset,
1737                    byte_end,
1738                    title_byte_start,
1739                    title_byte_end,
1740                });
1741            }
1742        }
1743
1744        refs
1745    }
1746
1747    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1748    /// Handles nested blockquotes like `> > > content`
1749    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1750    #[inline]
1751    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1752        let trimmed_start = line.trim_start();
1753        if !trimmed_start.starts_with('>') {
1754            return None;
1755        }
1756
1757        // Track total prefix length to handle nested blockquotes
1758        let mut remaining = line;
1759        let mut total_prefix_len = 0;
1760
1761        loop {
1762            let trimmed = remaining.trim_start();
1763            if !trimmed.starts_with('>') {
1764                break;
1765            }
1766
1767            // Add leading whitespace + '>' to prefix
1768            let leading_ws_len = remaining.len() - trimmed.len();
1769            total_prefix_len += leading_ws_len + 1;
1770
1771            let after_gt = &trimmed[1..];
1772
1773            // Handle optional whitespace after '>' (space or tab)
1774            if let Some(stripped) = after_gt.strip_prefix(' ') {
1775                total_prefix_len += 1;
1776                remaining = stripped;
1777            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1778                total_prefix_len += 1;
1779                remaining = stripped;
1780            } else {
1781                remaining = after_gt;
1782            }
1783        }
1784
1785        Some((&line[..total_prefix_len], remaining))
1786    }
1787
1788    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
1789    ///
1790    /// Returns a HashMap keyed by line byte offset, containing:
1791    /// `(is_ordered, marker, marker_column, content_column, number)`
1792    ///
1793    /// ## Why pulldown-cmark?
1794    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
1795    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
1796    /// This fixes issue #253 where continuation lines were falsely detected.
1797    ///
1798    /// ## Tab indentation quirk
1799    /// Pulldown-cmark reports nested list items at the newline character position
1800    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
1801    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
1802    /// We detect this and advance to the correct line.
1803    ///
1804    /// ## HashMap key strategy
1805    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
1806    /// that resolve to the same line (after newline adjustment). The first event
1807    /// for each line is authoritative.
1808    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
1809    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
1810    /// This avoids a separate parse for emphasis detection.
1811    fn detect_list_items_and_emphasis_with_pulldown(
1812        content: &str,
1813        line_offsets: &[usize],
1814        flavor: MarkdownFlavor,
1815        front_matter_end: usize,
1816        code_blocks: &[(usize, usize)],
1817    ) -> (ListItemMap, Vec<EmphasisSpan>) {
1818        use std::collections::HashMap;
1819
1820        let mut list_items = HashMap::new();
1821        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
1822
1823        let mut options = Options::empty();
1824        options.insert(Options::ENABLE_TABLES);
1825        options.insert(Options::ENABLE_FOOTNOTES);
1826        options.insert(Options::ENABLE_STRIKETHROUGH);
1827        options.insert(Options::ENABLE_TASKLISTS);
1828        // Always enable GFM features for consistency with existing behavior
1829        options.insert(Options::ENABLE_GFM);
1830
1831        // Suppress unused variable warning
1832        let _ = flavor;
1833
1834        let parser = Parser::new_ext(content, options).into_offset_iter();
1835        let mut list_depth: usize = 0;
1836        let mut list_stack: Vec<bool> = Vec::new();
1837
1838        for (event, range) in parser {
1839            match event {
1840                // Capture emphasis spans (for MD030's emphasis detection)
1841                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
1842                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
1843                        2
1844                    } else {
1845                        1
1846                    };
1847                    let match_start = range.start;
1848                    let match_end = range.end;
1849
1850                    // Skip if in code block
1851                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
1852                        // Determine marker character by looking at the content at the start
1853                        let marker = content[match_start..].chars().next().unwrap_or('*');
1854                        if marker == '*' || marker == '_' {
1855                            // Extract content between markers
1856                            let content_start = match_start + marker_count;
1857                            let content_end = if match_end >= marker_count {
1858                                match_end - marker_count
1859                            } else {
1860                                match_end
1861                            };
1862                            let content_part = if content_start < content_end && content_end <= content.len() {
1863                                &content[content_start..content_end]
1864                            } else {
1865                                ""
1866                            };
1867
1868                            // Find which line this emphasis is on using line_offsets
1869                            let line_idx = match line_offsets.binary_search(&match_start) {
1870                                Ok(idx) => idx,
1871                                Err(idx) => idx.saturating_sub(1),
1872                            };
1873                            let line_num = line_idx + 1;
1874                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
1875                            let col_start = match_start - line_start;
1876                            let col_end = match_end - line_start;
1877
1878                            emphasis_spans.push(EmphasisSpan {
1879                                line: line_num,
1880                                start_col: col_start,
1881                                end_col: col_end,
1882                                byte_offset: match_start,
1883                                byte_end: match_end,
1884                                marker,
1885                                marker_count,
1886                                content: content_part.to_string(),
1887                            });
1888                        }
1889                    }
1890                }
1891                Event::Start(Tag::List(start_number)) => {
1892                    list_depth += 1;
1893                    list_stack.push(start_number.is_some());
1894                }
1895                Event::End(TagEnd::List(_)) => {
1896                    list_depth = list_depth.saturating_sub(1);
1897                    list_stack.pop();
1898                }
1899                Event::Start(Tag::Item) if list_depth > 0 => {
1900                    // Get the ordered state for the CURRENT (innermost) list
1901                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1902                    // Find which line this byte offset corresponds to
1903                    let item_start = range.start;
1904
1905                    // Binary search to find the line number
1906                    let mut line_idx = match line_offsets.binary_search(&item_start) {
1907                        Ok(idx) => idx,
1908                        Err(idx) => idx.saturating_sub(1),
1909                    };
1910
1911                    // Pulldown-cmark reports nested list items at the newline before the item
1912                    // when using tab indentation (e.g., "* Item\n\t- Nested").
1913                    // Advance to the actual content line in this case.
1914                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1915                        line_idx += 1;
1916                    }
1917
1918                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
1919                    if front_matter_end > 0 && line_idx < front_matter_end {
1920                        continue;
1921                    }
1922
1923                    if line_idx < line_offsets.len() {
1924                        let line_start_byte = line_offsets[line_idx];
1925                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
1926                        let line = &content[line_start_byte..line_end.min(content.len())];
1927
1928                        // Strip trailing newline
1929                        let line = line
1930                            .strip_suffix('\n')
1931                            .or_else(|| line.strip_suffix("\r\n"))
1932                            .unwrap_or(line);
1933
1934                        // Strip blockquote prefix if present
1935                        let blockquote_parse = Self::parse_blockquote_prefix(line);
1936                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
1937                            (prefix.len(), content)
1938                        } else {
1939                            (0, line)
1940                        };
1941
1942                        // Parse the list marker from the actual line
1943                        if current_list_is_ordered {
1944                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1945                                Self::parse_ordered_list(line_to_parse)
1946                            {
1947                                let marker = format!("{number_str}{delimiter}");
1948                                let marker_column = blockquote_prefix_len + leading_spaces.len();
1949                                let content_column = marker_column + marker.len() + spacing.len();
1950                                let number = number_str.parse().ok();
1951
1952                                list_items.entry(line_start_byte).or_insert((
1953                                    true,
1954                                    marker,
1955                                    marker_column,
1956                                    content_column,
1957                                    number,
1958                                ));
1959                            }
1960                        } else if let Some((leading_spaces, marker, spacing, _content)) =
1961                            Self::parse_unordered_list(line_to_parse)
1962                        {
1963                            let marker_column = blockquote_prefix_len + leading_spaces.len();
1964                            let content_column = marker_column + 1 + spacing.len();
1965
1966                            list_items.entry(line_start_byte).or_insert((
1967                                false,
1968                                marker.to_string(),
1969                                marker_column,
1970                                content_column,
1971                                None,
1972                            ));
1973                        }
1974                    }
1975                }
1976                _ => {}
1977            }
1978        }
1979
1980        (list_items, emphasis_spans)
1981    }
1982
1983    /// Fast unordered list parser - replaces regex for 5-10x speedup
1984    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1985    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1986    #[inline]
1987    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1988        let bytes = line.as_bytes();
1989        let mut i = 0;
1990
1991        // Skip leading whitespace
1992        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1993            i += 1;
1994        }
1995
1996        // Check for marker
1997        if i >= bytes.len() {
1998            return None;
1999        }
2000        let marker = bytes[i] as char;
2001        if marker != '-' && marker != '*' && marker != '+' {
2002            return None;
2003        }
2004        let marker_pos = i;
2005        i += 1;
2006
2007        // Collect spacing after marker (space or tab only)
2008        let spacing_start = i;
2009        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2010            i += 1;
2011        }
2012
2013        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2014    }
2015
2016    /// Fast ordered list parser - replaces regex for 5-10x speedup
2017    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2018    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2019    #[inline]
2020    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2021        let bytes = line.as_bytes();
2022        let mut i = 0;
2023
2024        // Skip leading whitespace
2025        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2026            i += 1;
2027        }
2028
2029        // Collect digits
2030        let number_start = i;
2031        while i < bytes.len() && bytes[i].is_ascii_digit() {
2032            i += 1;
2033        }
2034        if i == number_start {
2035            return None; // No digits found
2036        }
2037
2038        // Check for delimiter
2039        if i >= bytes.len() {
2040            return None;
2041        }
2042        let delimiter = bytes[i] as char;
2043        if delimiter != '.' && delimiter != ')' {
2044            return None;
2045        }
2046        let delimiter_pos = i;
2047        i += 1;
2048
2049        // Collect spacing after delimiter (space or tab only)
2050        let spacing_start = i;
2051        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2052            i += 1;
2053        }
2054
2055        Some((
2056            &line[..number_start],
2057            &line[number_start..delimiter_pos],
2058            delimiter,
2059            &line[spacing_start..i],
2060            &line[i..],
2061        ))
2062    }
2063
2064    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2065    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2066    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2067        let num_lines = line_offsets.len();
2068        let mut in_code_block = vec![false; num_lines];
2069
2070        // For each code block, mark all lines within it
2071        for &(start, end) in code_blocks {
2072            // Ensure we're at valid UTF-8 boundaries
2073            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2074                let mut boundary = start;
2075                while boundary > 0 && !content.is_char_boundary(boundary) {
2076                    boundary -= 1;
2077                }
2078                boundary
2079            } else {
2080                start
2081            };
2082
2083            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2084                let mut boundary = end;
2085                while boundary < content.len() && !content.is_char_boundary(boundary) {
2086                    boundary += 1;
2087                }
2088                boundary
2089            } else {
2090                end.min(content.len())
2091            };
2092
2093            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2094            // That function now has proper list context awareness (see code_block_utils.rs)
2095            // and correctly distinguishes between:
2096            // - Fenced code blocks (``` or ~~~)
2097            // - Indented code blocks at document level (4 spaces + blank line before)
2098            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2099            //
2100            // We no longer need to re-validate here. The original validation logic
2101            // was causing false positives by marking list continuation paragraphs as
2102            // code blocks when they have 4 spaces of indentation.
2103
2104            // Use binary search to find the first and last line indices
2105            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2106            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2107            //
2108            // Find the line that CONTAINS safe_start: the line with the largest
2109            // start offset that is <= safe_start. partition_point gives us the
2110            // first line that starts AFTER safe_start, so we subtract 1.
2111            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2112            let first_line = first_line_after.saturating_sub(1);
2113            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2114
2115            // Mark all lines in the range at once
2116            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2117                *flag = true;
2118            }
2119        }
2120
2121        in_code_block
2122    }
2123
2124    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2125    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2126    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2127        let content_lines: Vec<&str> = content.lines().collect();
2128        let num_lines = content_lines.len();
2129        let mut in_math_block = vec![false; num_lines];
2130
2131        let mut inside_math = false;
2132
2133        for (i, line) in content_lines.iter().enumerate() {
2134            // Skip lines that are in code blocks - math delimiters inside code are literal
2135            if code_block_map.get(i).copied().unwrap_or(false) {
2136                continue;
2137            }
2138
2139            let trimmed = line.trim();
2140
2141            // Check for math block delimiter ($$)
2142            // A line with just $$ toggles the math block state
2143            if trimmed == "$$" {
2144                if inside_math {
2145                    // Closing delimiter - this line is still part of the math block
2146                    in_math_block[i] = true;
2147                    inside_math = false;
2148                } else {
2149                    // Opening delimiter - this line starts the math block
2150                    in_math_block[i] = true;
2151                    inside_math = true;
2152                }
2153            } else if inside_math {
2154                // Content inside math block
2155                in_math_block[i] = true;
2156            }
2157        }
2158
2159        in_math_block
2160    }
2161
2162    /// Pre-compute basic line information (without headings/blockquotes)
2163    /// Also returns emphasis spans detected during the pulldown-cmark parse
2164    fn compute_basic_line_info(
2165        content: &str,
2166        line_offsets: &[usize],
2167        code_blocks: &[(usize, usize)],
2168        flavor: MarkdownFlavor,
2169        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2170        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2171    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2172        let content_lines: Vec<&str> = content.lines().collect();
2173        let mut lines = Vec::with_capacity(content_lines.len());
2174
2175        // Pre-compute which lines are in code blocks
2176        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2177
2178        // Pre-compute which lines are in math blocks ($$ ... $$)
2179        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2180
2181        // Detect front matter boundaries FIRST, before any other parsing
2182        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2183        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2184
2185        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2186        // (context-aware, eliminates false positives)
2187        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2188            content,
2189            line_offsets,
2190            flavor,
2191            front_matter_end,
2192            code_blocks,
2193        );
2194
2195        for (i, line) in content_lines.iter().enumerate() {
2196            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2197            let indent = line.len() - line.trim_start().len();
2198            // Compute visual indent with proper CommonMark tab expansion
2199            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2200
2201            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2202            let blockquote_parse = Self::parse_blockquote_prefix(line);
2203
2204            // For blank detection, consider blockquote context
2205            let is_blank = if let Some((_, content)) = blockquote_parse {
2206                // In blockquote context, check if content after prefix is blank
2207                content.trim().is_empty()
2208            } else {
2209                line.trim().is_empty()
2210            };
2211
2212            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2213            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2214
2215            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2216            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2217                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2218            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2219            // This ensures content after `-->` on the same line is not incorrectly skipped
2220            let line_end_offset = byte_offset + line.len();
2221            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2222                html_comment_ranges,
2223                byte_offset,
2224                line_end_offset,
2225            );
2226            // Use pulldown-cmark's list detection for context-aware parsing
2227            // This eliminates false positives on continuation lines (issue #253)
2228            let list_item =
2229                list_item_map
2230                    .get(&byte_offset)
2231                    .map(
2232                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2233                            marker: marker.clone(),
2234                            is_ordered: *is_ordered,
2235                            number: *number,
2236                            marker_column: *marker_column,
2237                            content_column: *content_column,
2238                        },
2239                    );
2240
2241            // Detect horizontal rules (only outside code blocks and frontmatter)
2242            // Uses CommonMark-compliant check including leading indentation validation
2243            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2244            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2245
2246            // Get math block status for this line
2247            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2248
2249            lines.push(LineInfo {
2250                byte_offset,
2251                byte_len: line.len(),
2252                indent,
2253                visual_indent,
2254                is_blank,
2255                in_code_block,
2256                in_front_matter,
2257                in_html_block: false, // Will be populated after line creation
2258                in_html_comment,
2259                list_item,
2260                heading: None,    // Will be populated in second pass for Setext headings
2261                blockquote: None, // Will be populated after line creation
2262                in_mkdocstrings,
2263                in_esm_block: false, // Will be populated after line creation for MDX files
2264                in_code_span_continuation: false, // Will be populated after code spans are parsed
2265                is_horizontal_rule: is_hr,
2266                in_math_block,
2267            });
2268        }
2269
2270        (lines, emphasis_spans)
2271    }
2272
2273    /// Detect headings and blockquotes (called after HTML block detection)
2274    fn detect_headings_and_blockquotes(
2275        content: &str,
2276        lines: &mut [LineInfo],
2277        flavor: MarkdownFlavor,
2278        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2279        link_byte_ranges: &[(usize, usize)],
2280    ) {
2281        // Regex for heading detection
2282        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2283            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2284        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2285            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2286
2287        let content_lines: Vec<&str> = content.lines().collect();
2288
2289        // Detect front matter boundaries to skip those lines
2290        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2291
2292        // Detect headings (including Setext which needs look-ahead) and blockquotes
2293        for i in 0..lines.len() {
2294            let line = content_lines[i];
2295
2296            // Detect blockquotes FIRST, before any skip conditions.
2297            // A line can be both a blockquote AND contain a code block inside it.
2298            // We need to know about the blockquote marker regardless of code block status.
2299            // Skip only frontmatter lines - those are never blockquotes.
2300            if !(front_matter_end > 0 && i < front_matter_end)
2301                && let Some(bq) = parse_blockquote_detailed(line)
2302            {
2303                let nesting_level = bq.markers.len();
2304                let marker_column = bq.indent.len();
2305                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2306                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2307                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2308                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2309
2310                lines[i].blockquote = Some(BlockquoteInfo {
2311                    nesting_level,
2312                    indent: bq.indent.to_string(),
2313                    marker_column,
2314                    prefix,
2315                    content: bq.content.to_string(),
2316                    has_no_space_after_marker: has_no_space,
2317                    has_multiple_spaces_after_marker: has_multiple_spaces,
2318                    needs_md028_fix,
2319                });
2320            }
2321
2322            // Now apply skip conditions for heading detection
2323            if lines[i].in_code_block {
2324                continue;
2325            }
2326
2327            // Skip lines in front matter
2328            if front_matter_end > 0 && i < front_matter_end {
2329                continue;
2330            }
2331
2332            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2333            if lines[i].in_html_block {
2334                continue;
2335            }
2336
2337            // Skip heading detection for blank lines
2338            if lines[i].is_blank {
2339                continue;
2340            }
2341
2342            // Check for ATX headings (but skip MkDocs snippet lines)
2343            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2344            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2345                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2346                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2347            } else {
2348                false
2349            };
2350
2351            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2352                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2353                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2354                    continue;
2355                }
2356                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2357                // This prevents false positives where `#fragment` is detected as a heading
2358                let line_offset = lines[i].byte_offset;
2359                if link_byte_ranges
2360                    .iter()
2361                    .any(|&(start, end)| line_offset > start && line_offset < end)
2362                {
2363                    continue;
2364                }
2365                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2366                let hashes = caps.get(2).map_or("", |m| m.as_str());
2367                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2368                let rest = caps.get(4).map_or("", |m| m.as_str());
2369
2370                let level = hashes.len() as u8;
2371                let marker_column = leading_spaces.len();
2372
2373                // Check for closing sequence, but handle custom IDs that might come after
2374                let (text, has_closing, closing_seq) = {
2375                    // First check if there's a custom ID at the end
2376                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2377                        // Check if this looks like a valid custom ID (ends with })
2378                        if rest[id_start..].trim_end().ends_with('}') {
2379                            // Split off the custom ID
2380                            (&rest[..id_start], &rest[id_start..])
2381                        } else {
2382                            (rest, "")
2383                        }
2384                    } else {
2385                        (rest, "")
2386                    };
2387
2388                    // Now look for closing hashes in the part before the custom ID
2389                    let trimmed_rest = rest_without_id.trim_end();
2390                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2391                        // Find the start of the hash sequence by walking backwards
2392                        // Use char_indices to get byte positions at char boundaries
2393                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2394
2395                        // Find which char index corresponds to last_hash_byte_pos
2396                        let last_hash_char_idx = char_positions
2397                            .iter()
2398                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2399
2400                        if let Some(mut char_idx) = last_hash_char_idx {
2401                            // Walk backwards to find start of hash sequence
2402                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2403                                char_idx -= 1;
2404                            }
2405
2406                            // Get the byte position of the start of hashes
2407                            let start_of_hashes = char_positions[char_idx].0;
2408
2409                            // Check if there's at least one space before the closing hashes
2410                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2411
2412                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2413                            let potential_closing = &trimmed_rest[start_of_hashes..];
2414                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2415
2416                            if is_all_hashes && has_space_before {
2417                                // This is a closing sequence
2418                                let closing_hashes = potential_closing.to_string();
2419                                // The text is everything before the closing hashes
2420                                // Don't include the custom ID here - it will be extracted later
2421                                let text_part = if !custom_id_part.is_empty() {
2422                                    // If we have a custom ID, append it back to get the full rest
2423                                    // This allows the extract_header_id function to handle it properly
2424                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2425                                } else {
2426                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2427                                };
2428                                (text_part, true, closing_hashes)
2429                            } else {
2430                                // Not a valid closing sequence, return the full content
2431                                (rest.to_string(), false, String::new())
2432                            }
2433                        } else {
2434                            // Couldn't find char boundary, return the full content
2435                            (rest.to_string(), false, String::new())
2436                        }
2437                    } else {
2438                        // No hashes found, return the full content
2439                        (rest.to_string(), false, String::new())
2440                    }
2441                };
2442
2443                let content_column = marker_column + hashes.len() + spaces_after.len();
2444
2445                // Extract custom header ID if present
2446                let raw_text = text.trim().to_string();
2447                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2448
2449                // If no custom ID was found on the header line, check the next line for standalone attr-list
2450                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2451                    let next_line = content_lines[i + 1];
2452                    if !lines[i + 1].in_code_block
2453                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2454                        && let Some(next_line_id) =
2455                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2456                    {
2457                        custom_id = Some(next_line_id);
2458                    }
2459                }
2460
2461                // ATX heading is "valid" for processing by heading rules if:
2462                // 1. Has space after # (CommonMark compliant): `# Heading`
2463                // 2. Is empty (just hashes): `#`
2464                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2465                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2466                //
2467                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2468                // - `#tag` - single # with lowercase (social hashtag)
2469                // - `#123` - single # with number (GitHub issue ref)
2470                let is_valid = !spaces_after.is_empty()
2471                    || rest.is_empty()
2472                    || level > 1
2473                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2474
2475                lines[i].heading = Some(HeadingInfo {
2476                    level,
2477                    style: HeadingStyle::ATX,
2478                    marker: hashes.to_string(),
2479                    marker_column,
2480                    content_column,
2481                    text: clean_text,
2482                    custom_id,
2483                    raw_text,
2484                    has_closing_sequence: has_closing,
2485                    closing_sequence: closing_seq,
2486                    is_valid,
2487                });
2488            }
2489            // Check for Setext headings (need to look at next line)
2490            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2491                let next_line = content_lines[i + 1];
2492                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2493                    // Skip if next line is front matter delimiter
2494                    if front_matter_end > 0 && i < front_matter_end {
2495                        continue;
2496                    }
2497
2498                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2499                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2500                    {
2501                        continue;
2502                    }
2503
2504                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2505                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2506                    let content_line = line.trim();
2507
2508                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2509                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2510                        continue;
2511                    }
2512
2513                    // Skip underscore thematic breaks (___)
2514                    if content_line.starts_with('_') {
2515                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2516                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2517                            continue;
2518                        }
2519                    }
2520
2521                    // Skip numbered lists (1. Item, 2. Item, etc.)
2522                    if let Some(first_char) = content_line.chars().next()
2523                        && first_char.is_ascii_digit()
2524                    {
2525                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2526                        if num_end < content_line.len() {
2527                            let next = content_line.chars().nth(num_end);
2528                            if next == Some('.') || next == Some(')') {
2529                                continue;
2530                            }
2531                        }
2532                    }
2533
2534                    // Skip ATX headings
2535                    if ATX_HEADING_REGEX.is_match(line) {
2536                        continue;
2537                    }
2538
2539                    // Skip blockquotes
2540                    if content_line.starts_with('>') {
2541                        continue;
2542                    }
2543
2544                    // Skip code fences
2545                    let trimmed_start = line.trim_start();
2546                    if trimmed_start.len() >= 3 {
2547                        let first_three: String = trimmed_start.chars().take(3).collect();
2548                        if first_three == "```" || first_three == "~~~" {
2549                            continue;
2550                        }
2551                    }
2552
2553                    // Skip HTML blocks
2554                    if content_line.starts_with('<') {
2555                        continue;
2556                    }
2557
2558                    let underline = next_line.trim();
2559
2560                    let level = if underline.starts_with('=') { 1 } else { 2 };
2561                    let style = if level == 1 {
2562                        HeadingStyle::Setext1
2563                    } else {
2564                        HeadingStyle::Setext2
2565                    };
2566
2567                    // Extract custom header ID if present
2568                    let raw_text = line.trim().to_string();
2569                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2570
2571                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2572                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2573                        let attr_line = content_lines[i + 2];
2574                        if !lines[i + 2].in_code_block
2575                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2576                            && let Some(attr_line_id) =
2577                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2578                        {
2579                            custom_id = Some(attr_line_id);
2580                        }
2581                    }
2582
2583                    lines[i].heading = Some(HeadingInfo {
2584                        level,
2585                        style,
2586                        marker: underline.to_string(),
2587                        marker_column: next_line.len() - next_line.trim_start().len(),
2588                        content_column: lines[i].indent,
2589                        text: clean_text,
2590                        custom_id,
2591                        raw_text,
2592                        has_closing_sequence: false,
2593                        closing_sequence: String::new(),
2594                        is_valid: true, // Setext headings are always valid
2595                    });
2596                }
2597            }
2598        }
2599    }
2600
2601    /// Detect HTML blocks in the content
2602    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2603        // HTML block elements that trigger block context
2604        // Includes HTML5 media, embedded content, and interactive elements
2605        const BLOCK_ELEMENTS: &[&str] = &[
2606            "address",
2607            "article",
2608            "aside",
2609            "audio",
2610            "blockquote",
2611            "canvas",
2612            "details",
2613            "dialog",
2614            "dd",
2615            "div",
2616            "dl",
2617            "dt",
2618            "embed",
2619            "fieldset",
2620            "figcaption",
2621            "figure",
2622            "footer",
2623            "form",
2624            "h1",
2625            "h2",
2626            "h3",
2627            "h4",
2628            "h5",
2629            "h6",
2630            "header",
2631            "hr",
2632            "iframe",
2633            "li",
2634            "main",
2635            "menu",
2636            "nav",
2637            "noscript",
2638            "object",
2639            "ol",
2640            "p",
2641            "picture",
2642            "pre",
2643            "script",
2644            "search",
2645            "section",
2646            "source",
2647            "style",
2648            "summary",
2649            "svg",
2650            "table",
2651            "tbody",
2652            "td",
2653            "template",
2654            "textarea",
2655            "tfoot",
2656            "th",
2657            "thead",
2658            "tr",
2659            "track",
2660            "ul",
2661            "video",
2662        ];
2663
2664        let mut i = 0;
2665        while i < lines.len() {
2666            // Skip if already in code block or front matter
2667            if lines[i].in_code_block || lines[i].in_front_matter {
2668                i += 1;
2669                continue;
2670            }
2671
2672            let trimmed = lines[i].content(content).trim_start();
2673
2674            // Check if line starts with an HTML tag
2675            if trimmed.starts_with('<') && trimmed.len() > 1 {
2676                // Extract tag name safely
2677                let after_bracket = &trimmed[1..];
2678                let is_closing = after_bracket.starts_with('/');
2679                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2680
2681                // Extract tag name (stop at space, >, /, or end of string)
2682                let tag_name = tag_start
2683                    .chars()
2684                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2685                    .collect::<String>()
2686                    .to_lowercase();
2687
2688                // Check if it's a block element
2689                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2690                    // Mark this line as in HTML block
2691                    lines[i].in_html_block = true;
2692
2693                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2694                    // This avoids complex nesting logic that might cause infinite loops
2695                    if !is_closing {
2696                        let closing_tag = format!("</{tag_name}>");
2697                        // style and script tags can contain blank lines (CSS/JS formatting)
2698                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2699                        let mut j = i + 1;
2700                        let mut found_closing_tag = false;
2701                        while j < lines.len() && j < i + 100 {
2702                            // Limit search to 100 lines
2703                            // Stop at blank lines (except for style/script tags)
2704                            if !allow_blank_lines && lines[j].is_blank {
2705                                break;
2706                            }
2707
2708                            lines[j].in_html_block = true;
2709
2710                            // Check if this line contains the closing tag
2711                            if lines[j].content(content).contains(&closing_tag) {
2712                                found_closing_tag = true;
2713                            }
2714
2715                            // After finding closing tag, continue marking lines as
2716                            // in_html_block until blank line (per CommonMark spec)
2717                            if found_closing_tag {
2718                                j += 1;
2719                                // Continue marking subsequent lines until blank
2720                                while j < lines.len() && j < i + 100 {
2721                                    if lines[j].is_blank {
2722                                        break;
2723                                    }
2724                                    lines[j].in_html_block = true;
2725                                    j += 1;
2726                                }
2727                                break;
2728                            }
2729                            j += 1;
2730                        }
2731                    }
2732                }
2733            }
2734
2735            i += 1;
2736        }
2737    }
2738
2739    /// Detect ESM import/export blocks in MDX files
2740    /// ESM blocks consist of contiguous import/export statements at the top of the file
2741    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2742        // Only process MDX files
2743        if !flavor.supports_esm_blocks() {
2744            return;
2745        }
2746
2747        let mut in_multiline_comment = false;
2748
2749        for line in lines.iter_mut() {
2750            // Skip blank lines and HTML comments
2751            if line.is_blank || line.in_html_comment {
2752                continue;
2753            }
2754
2755            let trimmed = line.content(content).trim_start();
2756
2757            // Handle continuation of multi-line JS comments
2758            if in_multiline_comment {
2759                if trimmed.contains("*/") {
2760                    in_multiline_comment = false;
2761                }
2762                continue;
2763            }
2764
2765            // Skip single-line JS comments (// and ///)
2766            if trimmed.starts_with("//") {
2767                continue;
2768            }
2769
2770            // Handle start of multi-line JS comment
2771            if trimmed.starts_with("/*") {
2772                if !trimmed.contains("*/") {
2773                    in_multiline_comment = true;
2774                }
2775                continue;
2776            }
2777
2778            // Check if line starts with import or export
2779            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2780                line.in_esm_block = true;
2781            } else {
2782                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2783                break;
2784            }
2785        }
2786    }
2787
2788    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2789    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2790        let mut code_spans = Vec::new();
2791
2792        // Quick check - if no backticks, no code spans
2793        if !content.contains('`') {
2794            return code_spans;
2795        }
2796
2797        // Use pulldown-cmark's streaming parser with byte offsets
2798        let parser = Parser::new(content).into_offset_iter();
2799
2800        for (event, range) in parser {
2801            if let Event::Code(_) = event {
2802                let start_pos = range.start;
2803                let end_pos = range.end;
2804
2805                // The range includes the backticks, extract the actual content
2806                let full_span = &content[start_pos..end_pos];
2807                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2808
2809                // Extract content between backticks, preserving spaces
2810                let content_start = start_pos + backtick_count;
2811                let content_end = end_pos - backtick_count;
2812                let span_content = if content_start < content_end {
2813                    content[content_start..content_end].to_string()
2814                } else {
2815                    String::new()
2816                };
2817
2818                // Use binary search to find line number - O(log n) instead of O(n)
2819                // Find the rightmost line whose byte_offset <= start_pos
2820                let line_idx = lines
2821                    .partition_point(|line| line.byte_offset <= start_pos)
2822                    .saturating_sub(1);
2823                let line_num = line_idx + 1;
2824                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2825
2826                // Find end column using binary search
2827                let end_line_idx = lines
2828                    .partition_point(|line| line.byte_offset <= end_pos)
2829                    .saturating_sub(1);
2830                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2831
2832                // Convert byte offsets to character positions for correct Unicode handling
2833                // This ensures consistency with warning.column which uses character positions
2834                let line_content = lines[line_idx].content(content);
2835                let col_start = if byte_col_start <= line_content.len() {
2836                    line_content[..byte_col_start].chars().count()
2837                } else {
2838                    line_content.chars().count()
2839                };
2840
2841                let end_line_content = lines[end_line_idx].content(content);
2842                let col_end = if byte_col_end <= end_line_content.len() {
2843                    end_line_content[..byte_col_end].chars().count()
2844                } else {
2845                    end_line_content.chars().count()
2846                };
2847
2848                code_spans.push(CodeSpan {
2849                    line: line_num,
2850                    end_line: end_line_idx + 1,
2851                    start_col: col_start,
2852                    end_col: col_end,
2853                    byte_offset: start_pos,
2854                    byte_end: end_pos,
2855                    backtick_count,
2856                    content: span_content,
2857                });
2858            }
2859        }
2860
2861        // Sort by position to ensure consistent ordering
2862        code_spans.sort_by_key(|span| span.byte_offset);
2863
2864        code_spans
2865    }
2866
2867    /// Parse all list blocks in the content (legacy line-by-line approach)
2868    ///
2869    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2870    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
2871    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
2872    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
2873    ///   treated as list continuation (based on the list marker width)
2874    ///
2875    /// When a new list item is encountered, we check if list-breaking content was seen
2876    /// since the last item. If so, we start a new list block.
2877    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2878        // Minimum indentation for unordered list continuation per CommonMark spec
2879        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2880
2881        /// Initialize or reset the forward-scanning tracking state.
2882        /// This helper eliminates code duplication across three initialization sites.
2883        #[inline]
2884        fn reset_tracking_state(
2885            list_item: &ListItemInfo,
2886            has_list_breaking_content: &mut bool,
2887            min_continuation: &mut usize,
2888        ) {
2889            *has_list_breaking_content = false;
2890            let marker_width = if list_item.is_ordered {
2891                list_item.marker.len() + 1 // Ordered markers need space after period/paren
2892            } else {
2893                list_item.marker.len()
2894            };
2895            *min_continuation = if list_item.is_ordered {
2896                marker_width
2897            } else {
2898                UNORDERED_LIST_MIN_CONTINUATION_INDENT
2899            };
2900        }
2901
2902        // Pre-size based on lines that could be list items
2903        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
2904        let mut current_block: Option<ListBlock> = None;
2905        let mut last_list_item_line = 0;
2906        let mut current_indent_level = 0;
2907        let mut last_marker_width = 0;
2908
2909        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
2910        let mut has_list_breaking_content_since_last_item = false;
2911        let mut min_continuation_for_tracking = 0;
2912
2913        for (line_idx, line_info) in lines.iter().enumerate() {
2914            let line_num = line_idx + 1;
2915
2916            // Enhanced code block handling using Design #3's context analysis
2917            if line_info.in_code_block {
2918                if let Some(ref mut block) = current_block {
2919                    // Calculate minimum indentation for list continuation
2920                    let min_continuation_indent =
2921                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2922
2923                    // Analyze code block context using the three-tier classification
2924                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2925
2926                    match context {
2927                        CodeBlockContext::Indented => {
2928                            // Code block is properly indented - continues the list
2929                            block.end_line = line_num;
2930                            continue;
2931                        }
2932                        CodeBlockContext::Standalone => {
2933                            // Code block separates lists - end current block
2934                            let completed_block = current_block.take().unwrap();
2935                            list_blocks.push(completed_block);
2936                            continue;
2937                        }
2938                        CodeBlockContext::Adjacent => {
2939                            // Edge case - use conservative behavior (continue list)
2940                            block.end_line = line_num;
2941                            continue;
2942                        }
2943                    }
2944                } else {
2945                    // No current list block - skip code block lines
2946                    continue;
2947                }
2948            }
2949
2950            // Extract blockquote prefix if any
2951            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2952                caps.get(0).unwrap().as_str().to_string()
2953            } else {
2954                String::new()
2955            };
2956
2957            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
2958            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
2959            if let Some(ref block) = current_block
2960                && line_info.list_item.is_none()
2961                && !line_info.is_blank
2962                && !line_info.in_code_span_continuation
2963            {
2964                let line_content = line_info.content(content).trim();
2965
2966                // Check for structural separators that break lists
2967                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
2968                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
2969                // as they indicate improper indentation rather than lazy continuation.
2970                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2971
2972                // Check if blockquote context changes (different prefix than current block)
2973                // Lines within the SAME blockquote context don't break lists
2974                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
2975
2976                let breaks_list = line_info.heading.is_some()
2977                    || line_content.starts_with("---")
2978                    || line_content.starts_with("***")
2979                    || line_content.starts_with("___")
2980                    || crate::utils::skip_context::is_table_line(line_content)
2981                    || blockquote_prefix_changes
2982                    || (line_info.indent > 0
2983                        && line_info.indent < min_continuation_for_tracking
2984                        && !is_lazy_continuation);
2985
2986                if breaks_list {
2987                    has_list_breaking_content_since_last_item = true;
2988                }
2989            }
2990
2991            // If this line is a code span continuation within an active list block,
2992            // extend the block's end_line to include this line (maintains list continuity)
2993            if line_info.in_code_span_continuation
2994                && line_info.list_item.is_none()
2995                && let Some(ref mut block) = current_block
2996            {
2997                block.end_line = line_num;
2998            }
2999
3000            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3001            // properly indented lines within the list). This ensures the workaround at line 2448
3002            // works correctly when there are multiple continuation lines before a nested list item.
3003            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3004            // For blockquote lines, compute effective indent after stripping the prefix
3005            let effective_continuation_indent = if let Some(ref block) = current_block {
3006                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3007                let line_content = line_info.content(content);
3008                let line_bq_level = line_content
3009                    .chars()
3010                    .take_while(|c| *c == '>' || c.is_whitespace())
3011                    .filter(|&c| c == '>')
3012                    .count();
3013                if line_bq_level > 0 && line_bq_level == block_bq_level {
3014                    // Compute indent after blockquote markers
3015                    let mut pos = 0;
3016                    let mut found_markers = 0;
3017                    for c in line_content.chars() {
3018                        pos += c.len_utf8();
3019                        if c == '>' {
3020                            found_markers += 1;
3021                            if found_markers == line_bq_level {
3022                                if line_content.get(pos..pos + 1) == Some(" ") {
3023                                    pos += 1;
3024                                }
3025                                break;
3026                            }
3027                        }
3028                    }
3029                    let after_bq = &line_content[pos..];
3030                    after_bq.len() - after_bq.trim_start().len()
3031                } else {
3032                    line_info.indent
3033                }
3034            } else {
3035                line_info.indent
3036            };
3037            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3038                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3039                if block_bq_level > 0 {
3040                    if block.is_ordered { last_marker_width } else { 2 }
3041                } else {
3042                    min_continuation_for_tracking
3043                }
3044            } else {
3045                min_continuation_for_tracking
3046            };
3047            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3048                || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
3049
3050            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3051                eprintln!(
3052                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3053                    line_num,
3054                    effective_continuation_indent,
3055                    adjusted_min_continuation_for_tracking,
3056                    is_valid_continuation,
3057                    line_info.in_code_span_continuation,
3058                    line_info.in_code_block,
3059                    current_block.is_some()
3060                );
3061            }
3062
3063            if !line_info.in_code_span_continuation
3064                && line_info.list_item.is_none()
3065                && !line_info.is_blank
3066                && !line_info.in_code_block
3067                && is_valid_continuation
3068                && let Some(ref mut block) = current_block
3069            {
3070                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3071                    eprintln!(
3072                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3073                        line_num, block.end_line, line_num
3074                    );
3075                }
3076                block.end_line = line_num;
3077            }
3078
3079            // Check if this line is a list item
3080            if let Some(list_item) = &line_info.list_item {
3081                // Calculate nesting level based on indentation
3082                let item_indent = list_item.marker_column;
3083                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3084
3085                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3086                    eprintln!(
3087                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3088                        line_num, list_item.marker, item_indent
3089                    );
3090                }
3091
3092                if let Some(ref mut block) = current_block {
3093                    // Check if this continues the current block
3094                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3095                    // or a continuation at the same or lower level
3096                    let is_nested = nesting > block.nesting_level;
3097                    let same_type =
3098                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3099                    let same_context = block.blockquote_prefix == blockquote_prefix;
3100                    // Allow one blank line after last item, or lines immediately after block content
3101                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3102
3103                    // For unordered lists, also check marker consistency
3104                    let marker_compatible =
3105                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3106
3107                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3108                    // This eliminates the quadratic bottleneck from issue #148
3109                    let has_non_list_content = has_list_breaking_content_since_last_item;
3110
3111                    // A list continues if:
3112                    // 1. It's a nested item (indented more than the parent), OR
3113                    // 2. It's the same type at the same level with reasonable distance
3114                    let mut continues_list = if is_nested {
3115                        // Nested items always continue the list if they're in the same context
3116                        same_context && reasonable_distance && !has_non_list_content
3117                    } else {
3118                        // Same-level items need to match type and markers
3119                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3120                    };
3121
3122                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3123                        eprintln!(
3124                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3125                            line_num,
3126                            continues_list,
3127                            is_nested,
3128                            same_type,
3129                            same_context,
3130                            reasonable_distance,
3131                            marker_compatible,
3132                            has_non_list_content,
3133                            last_list_item_line,
3134                            block.end_line
3135                        );
3136                    }
3137
3138                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3139                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3140                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
3141                        // Check if the previous line was a list item or a continuation of a list item
3142                        // (including lazy continuation lines)
3143                        if block.item_lines.contains(&(line_num - 1)) {
3144                            // They're consecutive list items - force them to be in the same list
3145                            continues_list = true;
3146                        } else {
3147                            // Previous line is a continuation line within this block
3148                            // (e.g., lazy continuation with indent=0)
3149                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3150                            continues_list = true;
3151                        }
3152                    }
3153
3154                    if continues_list {
3155                        // Extend current block
3156                        block.end_line = line_num;
3157                        block.item_lines.push(line_num);
3158
3159                        // Update max marker width
3160                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3161                            list_item.marker.len() + 1
3162                        } else {
3163                            list_item.marker.len()
3164                        });
3165
3166                        // Update marker consistency for unordered lists
3167                        if !block.is_ordered
3168                            && block.marker.is_some()
3169                            && block.marker.as_ref() != Some(&list_item.marker)
3170                        {
3171                            // Mixed markers, clear the marker field
3172                            block.marker = None;
3173                        }
3174
3175                        // Reset tracked state for issue #148 optimization
3176                        reset_tracking_state(
3177                            list_item,
3178                            &mut has_list_breaking_content_since_last_item,
3179                            &mut min_continuation_for_tracking,
3180                        );
3181                    } else {
3182                        // End current block and start a new one
3183
3184                        list_blocks.push(block.clone());
3185
3186                        *block = ListBlock {
3187                            start_line: line_num,
3188                            end_line: line_num,
3189                            is_ordered: list_item.is_ordered,
3190                            marker: if list_item.is_ordered {
3191                                None
3192                            } else {
3193                                Some(list_item.marker.clone())
3194                            },
3195                            blockquote_prefix: blockquote_prefix.clone(),
3196                            item_lines: vec![line_num],
3197                            nesting_level: nesting,
3198                            max_marker_width: if list_item.is_ordered {
3199                                list_item.marker.len() + 1
3200                            } else {
3201                                list_item.marker.len()
3202                            },
3203                        };
3204
3205                        // Initialize tracked state for new block (issue #148 optimization)
3206                        reset_tracking_state(
3207                            list_item,
3208                            &mut has_list_breaking_content_since_last_item,
3209                            &mut min_continuation_for_tracking,
3210                        );
3211                    }
3212                } else {
3213                    // Start a new block
3214                    current_block = Some(ListBlock {
3215                        start_line: line_num,
3216                        end_line: line_num,
3217                        is_ordered: list_item.is_ordered,
3218                        marker: if list_item.is_ordered {
3219                            None
3220                        } else {
3221                            Some(list_item.marker.clone())
3222                        },
3223                        blockquote_prefix,
3224                        item_lines: vec![line_num],
3225                        nesting_level: nesting,
3226                        max_marker_width: list_item.marker.len(),
3227                    });
3228
3229                    // Initialize tracked state for new block (issue #148 optimization)
3230                    reset_tracking_state(
3231                        list_item,
3232                        &mut has_list_breaking_content_since_last_item,
3233                        &mut min_continuation_for_tracking,
3234                    );
3235                }
3236
3237                last_list_item_line = line_num;
3238                current_indent_level = item_indent;
3239                last_marker_width = if list_item.is_ordered {
3240                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3241                } else {
3242                    list_item.marker.len()
3243                };
3244            } else if let Some(ref mut block) = current_block {
3245                // Not a list item - check if it continues the current block
3246                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3247                    eprintln!(
3248                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3249                        line_num, line_info.is_blank
3250                    );
3251                }
3252
3253                // For MD032 compatibility, we use a simple approach:
3254                // - Indented lines continue the list
3255                // - Blank lines followed by indented content continue the list
3256                // - Everything else ends the list
3257
3258                // Check if the last line in the list block ended with a backslash (hard line break)
3259                // This handles cases where list items use backslash for hard line breaks
3260                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3261                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3262                } else {
3263                    false
3264                };
3265
3266                // Calculate minimum indentation for list continuation
3267                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3268                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3269                let min_continuation_indent = if block.is_ordered {
3270                    current_indent_level + last_marker_width
3271                } else {
3272                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3273                };
3274
3275                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3276                    // Indented line or backslash continuation continues the list
3277                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3278                        eprintln!(
3279                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3280                            line_num, line_info.indent, min_continuation_indent
3281                        );
3282                    }
3283                    block.end_line = line_num;
3284                } else if line_info.is_blank {
3285                    // Blank line - check if it's internal to the list or ending it
3286                    // We only include blank lines that are followed by more list content
3287                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3288                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3289                    }
3290                    let mut check_idx = line_idx + 1;
3291                    let mut found_continuation = false;
3292
3293                    // Skip additional blank lines
3294                    while check_idx < lines.len() && lines[check_idx].is_blank {
3295                        check_idx += 1;
3296                    }
3297
3298                    if check_idx < lines.len() {
3299                        let next_line = &lines[check_idx];
3300                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3301                        let next_content = next_line.content(content);
3302                        // Use blockquote level (count of >) to compare, not the full prefix
3303                        // This avoids issues where the regex captures extra whitespace
3304                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3305                        let next_bq_level_for_indent = next_content
3306                            .chars()
3307                            .take_while(|c| *c == '>' || c.is_whitespace())
3308                            .filter(|&c| c == '>')
3309                            .count();
3310                        let effective_indent =
3311                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3312                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3313                                // Find position after ">" and one space
3314                                let mut pos = 0;
3315                                let mut found_markers = 0;
3316                                for c in next_content.chars() {
3317                                    pos += c.len_utf8();
3318                                    if c == '>' {
3319                                        found_markers += 1;
3320                                        if found_markers == next_bq_level_for_indent {
3321                                            // Skip optional space after last >
3322                                            if next_content.get(pos..pos + 1) == Some(" ") {
3323                                                pos += 1;
3324                                            }
3325                                            break;
3326                                        }
3327                                    }
3328                                }
3329                                let after_blockquote_marker = &next_content[pos..];
3330                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3331                            } else {
3332                                next_line.indent
3333                            };
3334                        // Also adjust min_continuation_indent for blockquote lists
3335                        // The marker_column includes blockquote prefix, so subtract it
3336                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3337                            // For blockquote lists, the continuation is relative to blockquote content
3338                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3339                            if block.is_ordered { last_marker_width } else { 2 }
3340                        } else {
3341                            min_continuation_indent
3342                        };
3343                        // Check if followed by indented content (list continuation)
3344                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3345                            eprintln!(
3346                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3347                                line_num,
3348                                check_idx + 1,
3349                                effective_indent,
3350                                adjusted_min_continuation,
3351                                next_line.list_item.is_some(),
3352                                next_line.in_code_block
3353                            );
3354                        }
3355                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3356                            found_continuation = true;
3357                        }
3358                        // Check if followed by another list item at the same level
3359                        else if !next_line.in_code_block
3360                            && next_line.list_item.is_some()
3361                            && let Some(item) = &next_line.list_item
3362                        {
3363                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3364                                .find(next_line.content(content))
3365                                .map_or(String::new(), |m| m.as_str().to_string());
3366                            if item.marker_column == current_indent_level
3367                                && item.is_ordered == block.is_ordered
3368                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3369                            {
3370                                // Check if there was meaningful content between the list items (unused now)
3371                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3372                                // Pre-compute block's blockquote level for use in closures
3373                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3374                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3375                                    if let Some(between_line) = lines.get(idx) {
3376                                        let between_content = between_line.content(content);
3377                                        let trimmed = between_content.trim();
3378                                        // Skip empty lines
3379                                        if trimmed.is_empty() {
3380                                            return false;
3381                                        }
3382                                        // Check for meaningful content
3383                                        let line_indent = between_content.len() - between_content.trim_start().len();
3384
3385                                        // Check if blockquote level changed (not just if line starts with ">")
3386                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3387                                            .find(between_content)
3388                                            .map_or(String::new(), |m| m.as_str().to_string());
3389                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3390                                        let blockquote_level_changed =
3391                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
3392
3393                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
3394                                        if trimmed.starts_with("```")
3395                                            || trimmed.starts_with("~~~")
3396                                            || trimmed.starts_with("---")
3397                                            || trimmed.starts_with("***")
3398                                            || trimmed.starts_with("___")
3399                                            || blockquote_level_changed
3400                                            || crate::utils::skip_context::is_table_line(trimmed)
3401                                            || between_line.heading.is_some()
3402                                        {
3403                                            return true; // These are structural separators - meaningful content that breaks lists
3404                                        }
3405
3406                                        // Only properly indented content continues the list
3407                                        line_indent >= min_continuation_indent
3408                                    } else {
3409                                        false
3410                                    }
3411                                });
3412
3413                                if block.is_ordered {
3414                                    // For ordered lists: don't continue if there are structural separators
3415                                    // Check if there are structural separators between the list items
3416                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3417                                        if let Some(between_line) = lines.get(idx) {
3418                                            let between_content = between_line.content(content);
3419                                            let trimmed = between_content.trim();
3420                                            if trimmed.is_empty() {
3421                                                return false;
3422                                            }
3423                                            // Check if blockquote level changed (not just if line starts with ">")
3424                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3425                                                .find(between_content)
3426                                                .map_or(String::new(), |m| m.as_str().to_string());
3427                                            let between_bq_level =
3428                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
3429                                            let blockquote_level_changed =
3430                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
3431                                            // Check for structural separators that break lists
3432                                            trimmed.starts_with("```")
3433                                                || trimmed.starts_with("~~~")
3434                                                || trimmed.starts_with("---")
3435                                                || trimmed.starts_with("***")
3436                                                || trimmed.starts_with("___")
3437                                                || blockquote_level_changed
3438                                                || crate::utils::skip_context::is_table_line(trimmed)
3439                                                || between_line.heading.is_some()
3440                                        } else {
3441                                            false
3442                                        }
3443                                    });
3444                                    found_continuation = !has_structural_separators;
3445                                } else {
3446                                    // For unordered lists: also check for structural separators
3447                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3448                                        if let Some(between_line) = lines.get(idx) {
3449                                            let between_content = between_line.content(content);
3450                                            let trimmed = between_content.trim();
3451                                            if trimmed.is_empty() {
3452                                                return false;
3453                                            }
3454                                            // Check if blockquote level changed (not just if line starts with ">")
3455                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3456                                                .find(between_content)
3457                                                .map_or(String::new(), |m| m.as_str().to_string());
3458                                            let between_bq_level =
3459                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
3460                                            let blockquote_level_changed =
3461                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
3462                                            // Check for structural separators that break lists
3463                                            trimmed.starts_with("```")
3464                                                || trimmed.starts_with("~~~")
3465                                                || trimmed.starts_with("---")
3466                                                || trimmed.starts_with("***")
3467                                                || trimmed.starts_with("___")
3468                                                || blockquote_level_changed
3469                                                || crate::utils::skip_context::is_table_line(trimmed)
3470                                                || between_line.heading.is_some()
3471                                        } else {
3472                                            false
3473                                        }
3474                                    });
3475                                    found_continuation = !has_structural_separators;
3476                                }
3477                            }
3478                        }
3479                    }
3480
3481                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3482                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
3483                    }
3484                    if found_continuation {
3485                        // Include the blank line in the block
3486                        block.end_line = line_num;
3487                    } else {
3488                        // Blank line ends the list - don't include it
3489                        list_blocks.push(block.clone());
3490                        current_block = None;
3491                    }
3492                } else {
3493                    // Check for lazy continuation - non-indented line immediately after a list item
3494                    // But only if the line has sufficient indentation for the list type
3495                    let min_required_indent = if block.is_ordered {
3496                        current_indent_level + last_marker_width
3497                    } else {
3498                        current_indent_level + 2
3499                    };
3500
3501                    // For lazy continuation to apply, the line must either:
3502                    // 1. Have no indentation (true lazy continuation)
3503                    // 2. Have sufficient indentation for the list type
3504                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3505                    let line_content = line_info.content(content).trim();
3506
3507                    // Check for table-like patterns
3508                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3509
3510                    // Check if blockquote level changed (not just if line starts with ">")
3511                    // Lines within the same blockquote level are NOT structural separators
3512                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3513                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3514                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3515
3516                    let is_structural_separator = line_info.heading.is_some()
3517                        || line_content.starts_with("```")
3518                        || line_content.starts_with("~~~")
3519                        || line_content.starts_with("---")
3520                        || line_content.starts_with("***")
3521                        || line_content.starts_with("___")
3522                        || blockquote_level_changed
3523                        || looks_like_table;
3524
3525                    // Allow lazy continuation if we're still within the same list block
3526                    // (not just immediately after a list item)
3527                    let is_lazy_continuation = !is_structural_separator
3528                        && !line_info.is_blank
3529                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3530
3531                    if is_lazy_continuation {
3532                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3533                        // it's probably not a continuation
3534                        // BUT: for blockquote lines with sufficient effective indent, always treat as continuation
3535                        let line_content_raw = line_info.content(content);
3536                        let block_bq_level_lazy = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3537                        let line_bq_level_lazy = line_content_raw
3538                            .chars()
3539                            .take_while(|c| *c == '>' || c.is_whitespace())
3540                            .filter(|&c| c == '>')
3541                            .count();
3542                        let has_proper_blockquote_indent =
3543                            if line_bq_level_lazy > 0 && line_bq_level_lazy == block_bq_level_lazy {
3544                                // Compute effective indent after blockquote markers
3545                                let mut pos = 0;
3546                                let mut found_markers = 0;
3547                                for c in line_content_raw.chars() {
3548                                    pos += c.len_utf8();
3549                                    if c == '>' {
3550                                        found_markers += 1;
3551                                        if found_markers == line_bq_level_lazy {
3552                                            if line_content_raw.get(pos..pos + 1) == Some(" ") {
3553                                                pos += 1;
3554                                            }
3555                                            break;
3556                                        }
3557                                    }
3558                                }
3559                                let after_bq = &line_content_raw[pos..];
3560                                let effective_indent_lazy = after_bq.len() - after_bq.trim_start().len();
3561                                let min_required_for_bq = if block.is_ordered { last_marker_width } else { 2 };
3562                                effective_indent_lazy >= min_required_for_bq
3563                            } else {
3564                                false
3565                            };
3566
3567                        // If it has proper blockquote indent, it's a continuation regardless of uppercase
3568                        if has_proper_blockquote_indent {
3569                            block.end_line = line_num;
3570                        } else {
3571                            let content_to_check = if !blockquote_prefix.is_empty() {
3572                                // Strip blockquote prefix to check the actual content
3573                                line_info
3574                                    .content(content)
3575                                    .strip_prefix(&blockquote_prefix)
3576                                    .unwrap_or(line_info.content(content))
3577                                    .trim()
3578                            } else {
3579                                line_info.content(content).trim()
3580                            };
3581
3582                            let starts_with_uppercase =
3583                                content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3584
3585                            // If it starts with uppercase and the previous line ended with punctuation,
3586                            // it's likely a new paragraph, not a continuation
3587                            if starts_with_uppercase && last_list_item_line > 0 {
3588                                // This looks like a new paragraph
3589                                list_blocks.push(block.clone());
3590                                current_block = None;
3591                            } else {
3592                                // This is a lazy continuation line
3593                                block.end_line = line_num;
3594                            }
3595                        }
3596                    } else {
3597                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3598                        list_blocks.push(block.clone());
3599                        current_block = None;
3600                    }
3601                }
3602            }
3603        }
3604
3605        // Don't forget the last block
3606        if let Some(block) = current_block {
3607            list_blocks.push(block);
3608        }
3609
3610        // Merge adjacent blocks that should be one
3611        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3612
3613        list_blocks
3614    }
3615
3616    /// Compute character frequency for fast content analysis
3617    fn compute_char_frequency(content: &str) -> CharFrequency {
3618        let mut frequency = CharFrequency::default();
3619
3620        for ch in content.chars() {
3621            match ch {
3622                '#' => frequency.hash_count += 1,
3623                '*' => frequency.asterisk_count += 1,
3624                '_' => frequency.underscore_count += 1,
3625                '-' => frequency.hyphen_count += 1,
3626                '+' => frequency.plus_count += 1,
3627                '>' => frequency.gt_count += 1,
3628                '|' => frequency.pipe_count += 1,
3629                '[' => frequency.bracket_count += 1,
3630                '`' => frequency.backtick_count += 1,
3631                '<' => frequency.lt_count += 1,
3632                '!' => frequency.exclamation_count += 1,
3633                '\n' => frequency.newline_count += 1,
3634                _ => {}
3635            }
3636        }
3637
3638        frequency
3639    }
3640
3641    /// Parse HTML tags in the content
3642    fn parse_html_tags(
3643        content: &str,
3644        lines: &[LineInfo],
3645        code_blocks: &[(usize, usize)],
3646        flavor: MarkdownFlavor,
3647    ) -> Vec<HtmlTag> {
3648        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3649            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3650
3651        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3652
3653        for cap in HTML_TAG_REGEX.captures_iter(content) {
3654            let full_match = cap.get(0).unwrap();
3655            let match_start = full_match.start();
3656            let match_end = full_match.end();
3657
3658            // Skip if in code block
3659            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3660                continue;
3661            }
3662
3663            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3664            let tag_name_original = cap.get(2).unwrap().as_str();
3665            let tag_name = tag_name_original.to_lowercase();
3666            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3667
3668            // Skip JSX components in MDX files (tags starting with uppercase letter)
3669            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3670            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3671                continue;
3672            }
3673
3674            // Find which line this tag is on
3675            let mut line_num = 1;
3676            let mut col_start = match_start;
3677            let mut col_end = match_end;
3678            for (idx, line_info) in lines.iter().enumerate() {
3679                if match_start >= line_info.byte_offset {
3680                    line_num = idx + 1;
3681                    col_start = match_start - line_info.byte_offset;
3682                    col_end = match_end - line_info.byte_offset;
3683                } else {
3684                    break;
3685                }
3686            }
3687
3688            html_tags.push(HtmlTag {
3689                line: line_num,
3690                start_col: col_start,
3691                end_col: col_end,
3692                byte_offset: match_start,
3693                byte_end: match_end,
3694                tag_name,
3695                is_closing,
3696                is_self_closing,
3697                raw_content: full_match.as_str().to_string(),
3698            });
3699        }
3700
3701        html_tags
3702    }
3703
3704    /// Parse table rows in the content
3705    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3706        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3707
3708        for (line_idx, line_info) in lines.iter().enumerate() {
3709            // Skip lines in code blocks or blank lines
3710            if line_info.in_code_block || line_info.is_blank {
3711                continue;
3712            }
3713
3714            let line = line_info.content(content);
3715            let line_num = line_idx + 1;
3716
3717            // Check if this line contains pipes (potential table row)
3718            if !line.contains('|') {
3719                continue;
3720            }
3721
3722            // Count columns by splitting on pipes
3723            let parts: Vec<&str> = line.split('|').collect();
3724            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3725
3726            // Check if this is a separator row
3727            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3728            let mut column_alignments = Vec::new();
3729
3730            if is_separator {
3731                for part in &parts[1..parts.len() - 1] {
3732                    // Skip first and last empty parts
3733                    let trimmed = part.trim();
3734                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3735                        "center".to_string()
3736                    } else if trimmed.ends_with(':') {
3737                        "right".to_string()
3738                    } else if trimmed.starts_with(':') {
3739                        "left".to_string()
3740                    } else {
3741                        "none".to_string()
3742                    };
3743                    column_alignments.push(alignment);
3744                }
3745            }
3746
3747            table_rows.push(TableRow {
3748                line: line_num,
3749                is_separator,
3750                column_count,
3751                column_alignments,
3752            });
3753        }
3754
3755        table_rows
3756    }
3757
3758    /// Parse bare URLs and emails in the content
3759    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3760        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3761
3762        // Check for bare URLs (not in angle brackets or markdown links)
3763        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3764            let full_match = cap.get(0).unwrap();
3765            let match_start = full_match.start();
3766            let match_end = full_match.end();
3767
3768            // Skip if in code block
3769            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3770                continue;
3771            }
3772
3773            // Skip if already in angle brackets or markdown links
3774            let preceding_char = if match_start > 0 {
3775                content.chars().nth(match_start - 1)
3776            } else {
3777                None
3778            };
3779            let following_char = content.chars().nth(match_end);
3780
3781            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3782                continue;
3783            }
3784            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3785                continue;
3786            }
3787
3788            let url = full_match.as_str();
3789            let url_type = if url.starts_with("https://") {
3790                "https"
3791            } else if url.starts_with("http://") {
3792                "http"
3793            } else if url.starts_with("ftp://") {
3794                "ftp"
3795            } else {
3796                "other"
3797            };
3798
3799            // Find which line this URL is on
3800            let mut line_num = 1;
3801            let mut col_start = match_start;
3802            let mut col_end = match_end;
3803            for (idx, line_info) in lines.iter().enumerate() {
3804                if match_start >= line_info.byte_offset {
3805                    line_num = idx + 1;
3806                    col_start = match_start - line_info.byte_offset;
3807                    col_end = match_end - line_info.byte_offset;
3808                } else {
3809                    break;
3810                }
3811            }
3812
3813            bare_urls.push(BareUrl {
3814                line: line_num,
3815                start_col: col_start,
3816                end_col: col_end,
3817                byte_offset: match_start,
3818                byte_end: match_end,
3819                url: url.to_string(),
3820                url_type: url_type.to_string(),
3821            });
3822        }
3823
3824        // Check for bare email addresses
3825        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3826            let full_match = cap.get(0).unwrap();
3827            let match_start = full_match.start();
3828            let match_end = full_match.end();
3829
3830            // Skip if in code block
3831            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3832                continue;
3833            }
3834
3835            // Skip if already in angle brackets or markdown links
3836            let preceding_char = if match_start > 0 {
3837                content.chars().nth(match_start - 1)
3838            } else {
3839                None
3840            };
3841            let following_char = content.chars().nth(match_end);
3842
3843            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3844                continue;
3845            }
3846            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3847                continue;
3848            }
3849
3850            let email = full_match.as_str();
3851
3852            // Find which line this email is on
3853            let mut line_num = 1;
3854            let mut col_start = match_start;
3855            let mut col_end = match_end;
3856            for (idx, line_info) in lines.iter().enumerate() {
3857                if match_start >= line_info.byte_offset {
3858                    line_num = idx + 1;
3859                    col_start = match_start - line_info.byte_offset;
3860                    col_end = match_end - line_info.byte_offset;
3861                } else {
3862                    break;
3863                }
3864            }
3865
3866            bare_urls.push(BareUrl {
3867                line: line_num,
3868                start_col: col_start,
3869                end_col: col_end,
3870                byte_offset: match_start,
3871                byte_end: match_end,
3872                url: email.to_string(),
3873                url_type: "email".to_string(),
3874            });
3875        }
3876
3877        bare_urls
3878    }
3879
3880    /// Get an iterator over valid CommonMark headings
3881    ///
3882    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
3883    /// that should be flagged by MD018 but should not be processed by other heading rules.
3884    ///
3885    /// # Examples
3886    ///
3887    /// ```rust
3888    /// use rumdl_lib::lint_context::LintContext;
3889    /// use rumdl_lib::config::MarkdownFlavor;
3890    ///
3891    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
3892    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3893    ///
3894    /// for heading in ctx.valid_headings() {
3895    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
3896    /// }
3897    /// // Only prints valid headings, skips `#NoSpace`
3898    /// ```
3899    #[must_use]
3900    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3901        ValidHeadingsIter::new(&self.lines)
3902    }
3903
3904    /// Check if the document contains any valid CommonMark headings
3905    ///
3906    /// Returns `true` if there is at least one heading with proper space after `#`.
3907    #[must_use]
3908    pub fn has_valid_headings(&self) -> bool {
3909        self.lines
3910            .iter()
3911            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3912    }
3913}
3914
3915/// Merge adjacent list blocks that should be treated as one
3916fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3917    if list_blocks.len() < 2 {
3918        return;
3919    }
3920
3921    let mut merger = ListBlockMerger::new(content, lines);
3922    *list_blocks = merger.merge(list_blocks);
3923}
3924
3925/// Helper struct to manage the complex logic of merging list blocks
3926struct ListBlockMerger<'a> {
3927    content: &'a str,
3928    lines: &'a [LineInfo],
3929}
3930
3931impl<'a> ListBlockMerger<'a> {
3932    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3933        Self { content, lines }
3934    }
3935
3936    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3937        let mut merged = Vec::with_capacity(list_blocks.len());
3938        let mut current = list_blocks[0].clone();
3939
3940        for next in list_blocks.iter().skip(1) {
3941            if self.should_merge_blocks(&current, next) {
3942                current = self.merge_two_blocks(current, next);
3943            } else {
3944                merged.push(current);
3945                current = next.clone();
3946            }
3947        }
3948
3949        merged.push(current);
3950        merged
3951    }
3952
3953    /// Determine if two adjacent list blocks should be merged
3954    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3955        // Basic compatibility checks
3956        if !self.blocks_are_compatible(current, next) {
3957            return false;
3958        }
3959
3960        // Check spacing and content between blocks
3961        let spacing = self.analyze_spacing_between(current, next);
3962        match spacing {
3963            BlockSpacing::Consecutive => true,
3964            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3965            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3966                self.can_merge_with_content_between(current, next)
3967            }
3968        }
3969    }
3970
3971    /// Check if blocks have compatible structure for merging
3972    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3973        current.is_ordered == next.is_ordered
3974            && current.blockquote_prefix == next.blockquote_prefix
3975            && current.nesting_level == next.nesting_level
3976    }
3977
3978    /// Analyze the spacing between two list blocks
3979    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3980        let gap = next.start_line - current.end_line;
3981
3982        match gap {
3983            1 => BlockSpacing::Consecutive,
3984            2 => BlockSpacing::SingleBlank,
3985            _ if gap > 2 => {
3986                if self.has_only_blank_lines_between(current, next) {
3987                    BlockSpacing::MultipleBlanks
3988                } else {
3989                    BlockSpacing::ContentBetween
3990                }
3991            }
3992            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
3993        }
3994    }
3995
3996    /// Check if unordered lists can be merged with a single blank line between
3997    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3998        // Check if there are structural separators between the blocks
3999        // If has_meaningful_content_between returns true, it means there are structural separators
4000        if has_meaningful_content_between(self.content, current, next, self.lines) {
4001            return false; // Structural separators prevent merging
4002        }
4003
4004        // Only merge unordered lists with same marker across single blank
4005        !current.is_ordered && current.marker == next.marker
4006    }
4007
4008    /// Check if ordered lists can be merged when there's content between them
4009    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4010        // Do not merge lists if there are structural separators between them
4011        if has_meaningful_content_between(self.content, current, next, self.lines) {
4012            return false; // Structural separators prevent merging
4013        }
4014
4015        // Only consider merging ordered lists if there's no structural content between
4016        current.is_ordered && next.is_ordered
4017    }
4018
4019    /// Check if there are only blank lines between blocks
4020    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4021        for line_num in (current.end_line + 1)..next.start_line {
4022            if let Some(line_info) = self.lines.get(line_num - 1)
4023                && !line_info.content(self.content).trim().is_empty()
4024            {
4025                return false;
4026            }
4027        }
4028        true
4029    }
4030
4031    /// Merge two compatible list blocks into one
4032    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4033        current.end_line = next.end_line;
4034        current.item_lines.extend_from_slice(&next.item_lines);
4035
4036        // Update max marker width
4037        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4038
4039        // Handle marker consistency for unordered lists
4040        if !current.is_ordered && self.markers_differ(&current, next) {
4041            current.marker = None; // Mixed markers
4042        }
4043
4044        current
4045    }
4046
4047    /// Check if two blocks have different markers
4048    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4049        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4050    }
4051}
4052
4053/// Types of spacing between list blocks
4054#[derive(Debug, PartialEq)]
4055enum BlockSpacing {
4056    Consecutive,    // No gap between blocks
4057    SingleBlank,    // One blank line between blocks
4058    MultipleBlanks, // Multiple blank lines but no content
4059    ContentBetween, // Content exists between blocks
4060}
4061
4062/// Check if there's meaningful content (not just blank lines) between two list blocks
4063fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4064    // Check lines between current.end_line and next.start_line
4065    for line_num in (current.end_line + 1)..next.start_line {
4066        if let Some(line_info) = lines.get(line_num - 1) {
4067            // Convert to 0-indexed
4068            let trimmed = line_info.content(content).trim();
4069
4070            // Skip empty lines
4071            if trimmed.is_empty() {
4072                continue;
4073            }
4074
4075            // Check for structural separators that should separate lists (CommonMark compliant)
4076
4077            // Headings separate lists
4078            if line_info.heading.is_some() {
4079                return true; // Has meaningful content - headings separate lists
4080            }
4081
4082            // Horizontal rules separate lists (---, ***, ___)
4083            if is_horizontal_rule(trimmed) {
4084                return true; // Has meaningful content - horizontal rules separate lists
4085            }
4086
4087            // Tables separate lists
4088            if crate::utils::skip_context::is_table_line(trimmed) {
4089                return true; // Has meaningful content - tables separate lists
4090            }
4091
4092            // Blockquotes separate lists
4093            if trimmed.starts_with('>') {
4094                return true; // Has meaningful content - blockquotes separate lists
4095            }
4096
4097            // Code block fences separate lists (unless properly indented as list content)
4098            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4099                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4100
4101                // Check if this code block is properly indented as list continuation
4102                let min_continuation_indent = if current.is_ordered {
4103                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4104                } else {
4105                    current.nesting_level + 2
4106                };
4107
4108                if line_indent < min_continuation_indent {
4109                    // This is a standalone code block that separates lists
4110                    return true; // Has meaningful content - standalone code blocks separate lists
4111                }
4112            }
4113
4114            // Check if this line has proper indentation for list continuation
4115            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4116
4117            // Calculate minimum indentation needed to be list continuation
4118            let min_indent = if current.is_ordered {
4119                current.nesting_level + current.max_marker_width
4120            } else {
4121                current.nesting_level + 2
4122            };
4123
4124            // If the line is not indented enough to be list continuation, it's meaningful content
4125            if line_indent < min_indent {
4126                return true; // Has meaningful content - content not indented as list continuation
4127            }
4128
4129            // If we reach here, the line is properly indented as list continuation
4130            // Continue checking other lines
4131        }
4132    }
4133
4134    // Only blank lines or properly indented list continuation content between blocks
4135    false
4136}
4137
4138/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4139/// CommonMark rules for thematic breaks (horizontal rules):
4140/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4141/// - Must have 3+ of the same character (-, *, or _)
4142/// - May have spaces between characters
4143/// - No other characters allowed
4144pub fn is_horizontal_rule_line(line: &str) -> bool {
4145    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4146    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4147    if leading_spaces > 3 || line.starts_with('\t') {
4148        return false;
4149    }
4150
4151    is_horizontal_rule_content(line.trim())
4152}
4153
4154/// Check if trimmed content matches horizontal rule pattern.
4155/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4156pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4157    if trimmed.len() < 3 {
4158        return false;
4159    }
4160
4161    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4162    let chars: Vec<char> = trimmed.chars().collect();
4163    if let Some(&first_char) = chars.first()
4164        && (first_char == '-' || first_char == '*' || first_char == '_')
4165    {
4166        let mut count = 0;
4167        for &ch in &chars {
4168            if ch == first_char {
4169                count += 1;
4170            } else if ch != ' ' && ch != '\t' {
4171                return false; // Non-matching, non-whitespace character
4172            }
4173        }
4174        return count >= 3;
4175    }
4176    false
4177}
4178
4179/// Backwards-compatible alias for `is_horizontal_rule_content`
4180pub fn is_horizontal_rule(trimmed: &str) -> bool {
4181    is_horizontal_rule_content(trimmed)
4182}
4183
4184/// Check if content contains patterns that cause the markdown crate to panic
4185#[cfg(test)]
4186mod tests {
4187    use super::*;
4188
4189    #[test]
4190    fn test_empty_content() {
4191        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4192        assert_eq!(ctx.content, "");
4193        assert_eq!(ctx.line_offsets, vec![0]);
4194        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4195        assert_eq!(ctx.lines.len(), 0);
4196    }
4197
4198    #[test]
4199    fn test_single_line() {
4200        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4201        assert_eq!(ctx.content, "# Hello");
4202        assert_eq!(ctx.line_offsets, vec![0]);
4203        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4204        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4205    }
4206
4207    #[test]
4208    fn test_multi_line() {
4209        let content = "# Title\n\nSecond line\nThird line";
4210        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4211        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4212        // Test offset to line/col
4213        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4214        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4215        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4216        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4217        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4218    }
4219
4220    #[test]
4221    fn test_line_info() {
4222        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4223        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4224
4225        // Test line info
4226        assert_eq!(ctx.lines.len(), 7);
4227
4228        // Line 1: "# Title"
4229        let line1 = &ctx.lines[0];
4230        assert_eq!(line1.content(ctx.content), "# Title");
4231        assert_eq!(line1.byte_offset, 0);
4232        assert_eq!(line1.indent, 0);
4233        assert!(!line1.is_blank);
4234        assert!(!line1.in_code_block);
4235        assert!(line1.list_item.is_none());
4236
4237        // Line 2: "    indented"
4238        let line2 = &ctx.lines[1];
4239        assert_eq!(line2.content(ctx.content), "    indented");
4240        assert_eq!(line2.byte_offset, 8);
4241        assert_eq!(line2.indent, 4);
4242        assert!(!line2.is_blank);
4243
4244        // Line 3: "" (blank)
4245        let line3 = &ctx.lines[2];
4246        assert_eq!(line3.content(ctx.content), "");
4247        assert!(line3.is_blank);
4248
4249        // Test helper methods
4250        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4251        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4252        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4253        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4254    }
4255
4256    #[test]
4257    fn test_list_item_detection() {
4258        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4259        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4260
4261        // Line 1: "- Unordered item"
4262        let line1 = &ctx.lines[0];
4263        assert!(line1.list_item.is_some());
4264        let list1 = line1.list_item.as_ref().unwrap();
4265        assert_eq!(list1.marker, "-");
4266        assert!(!list1.is_ordered);
4267        assert_eq!(list1.marker_column, 0);
4268        assert_eq!(list1.content_column, 2);
4269
4270        // Line 2: "  * Nested item"
4271        let line2 = &ctx.lines[1];
4272        assert!(line2.list_item.is_some());
4273        let list2 = line2.list_item.as_ref().unwrap();
4274        assert_eq!(list2.marker, "*");
4275        assert_eq!(list2.marker_column, 2);
4276
4277        // Line 3: "1. Ordered item"
4278        let line3 = &ctx.lines[2];
4279        assert!(line3.list_item.is_some());
4280        let list3 = line3.list_item.as_ref().unwrap();
4281        assert_eq!(list3.marker, "1.");
4282        assert!(list3.is_ordered);
4283        assert_eq!(list3.number, Some(1));
4284
4285        // Line 6: "Not a list"
4286        let line6 = &ctx.lines[5];
4287        assert!(line6.list_item.is_none());
4288    }
4289
4290    #[test]
4291    fn test_offset_to_line_col_edge_cases() {
4292        let content = "a\nb\nc";
4293        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4294        // line_offsets: [0, 2, 4]
4295        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4296        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4297        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4298        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4299        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4300        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4301    }
4302
4303    #[test]
4304    fn test_mdx_esm_blocks() {
4305        let content = r##"import {Chart} from './snowfall.js'
4306export const year = 2023
4307
4308# Last year's snowfall
4309
4310In {year}, the snowfall was above average.
4311It was followed by a warm spring which caused
4312flood conditions in many of the nearby rivers.
4313
4314<Chart color="#fcb32c" year={year} />
4315"##;
4316
4317        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4318
4319        // Check that lines 1 and 2 are marked as ESM blocks
4320        assert_eq!(ctx.lines.len(), 10);
4321        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4322        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4323        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4324        assert!(
4325            !ctx.lines[3].in_esm_block,
4326            "Line 4 (heading) should NOT be in_esm_block"
4327        );
4328        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4329        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4330    }
4331
4332    #[test]
4333    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4334        let content = r#"import {Chart} from './snowfall.js'
4335export const year = 2023
4336
4337# Last year's snowfall
4338"#;
4339
4340        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4341
4342        // ESM blocks should NOT be detected in Standard flavor
4343        assert!(
4344            !ctx.lines[0].in_esm_block,
4345            "Line 1 should NOT be in_esm_block in Standard flavor"
4346        );
4347        assert!(
4348            !ctx.lines[1].in_esm_block,
4349            "Line 2 should NOT be in_esm_block in Standard flavor"
4350        );
4351    }
4352
4353    #[test]
4354    fn test_blockquote_with_indented_content() {
4355        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4356        // The content inside the blockquote may also be detected as a code block (which is correct),
4357        // but for MD046 purposes, we need to know the line is inside a blockquote.
4358        let content = r#"# Heading
4359
4360>      -S socket-path
4361>                    More text
4362"#;
4363        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4364
4365        // Line 3 (index 2) should be detected as blockquote
4366        assert!(
4367            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4368            "Line 3 should be a blockquote"
4369        );
4370        // Line 4 (index 3) should also be blockquote
4371        assert!(
4372            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4373            "Line 4 should be a blockquote"
4374        );
4375
4376        // Verify blockquote content is correctly parsed
4377        // Note: spaces_after includes the spaces between `>` and content
4378        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4379        assert_eq!(bq3.content, "-S socket-path");
4380        assert_eq!(bq3.nesting_level, 1);
4381        // 6 spaces after the `>` marker
4382        assert!(bq3.has_multiple_spaces_after_marker);
4383
4384        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4385        assert_eq!(bq4.content, "More text");
4386        assert_eq!(bq4.nesting_level, 1);
4387    }
4388
4389    #[test]
4390    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4391        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4392        let content = r#"# Title
4393
4394A footnote[^1].
4395
4396[^1]: This is the footnote content.
4397
4398[^note]: Another footnote with [link](https://example.com).
4399
4400[regular]: ./path.md "A real reference definition"
4401"#;
4402        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4403
4404        // Should only have one reference definition (the regular one)
4405        assert_eq!(
4406            ctx.reference_defs.len(),
4407            1,
4408            "Footnotes should not be parsed as reference definitions"
4409        );
4410
4411        // The only reference def should be the regular one
4412        assert_eq!(ctx.reference_defs[0].id, "regular");
4413        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4414        assert_eq!(
4415            ctx.reference_defs[0].title,
4416            Some("A real reference definition".to_string())
4417        );
4418    }
4419
4420    #[test]
4421    fn test_footnote_with_inline_link_not_misidentified() {
4422        // Regression test for issue #286: footnote containing an inline link
4423        // was incorrectly parsed as a reference definition with URL "[link](url)"
4424        let content = r#"# Title
4425
4426A footnote[^1].
4427
4428[^1]: [link](https://www.google.com).
4429"#;
4430        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4431
4432        // Should have no reference definitions
4433        assert!(
4434            ctx.reference_defs.is_empty(),
4435            "Footnote with inline link should not create a reference definition"
4436        );
4437    }
4438
4439    #[test]
4440    fn test_various_footnote_formats_excluded() {
4441        // Test various footnote ID formats are all excluded
4442        let content = r#"[^1]: Numeric footnote
4443[^note]: Named footnote
4444[^a]: Single char footnote
4445[^long-footnote-name]: Long named footnote
4446[^123abc]: Mixed alphanumeric
4447
4448[ref1]: ./file1.md
4449[ref2]: ./file2.md
4450"#;
4451        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4452
4453        // Should only have the two regular reference definitions
4454        assert_eq!(
4455            ctx.reference_defs.len(),
4456            2,
4457            "Only regular reference definitions should be parsed"
4458        );
4459
4460        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4461        assert!(ids.contains(&"ref1"));
4462        assert!(ids.contains(&"ref2"));
4463        assert!(!ids.iter().any(|id| id.starts_with('^')));
4464    }
4465}