rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14/// Macro for profiling sections - only active in non-WASM builds
15#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17    ($name:expr, $profile:expr, $code:expr) => {{
18        let start = std::time::Instant::now();
19        let result = $code;
20        if $profile {
21            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22        }
23        result
24    }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32// Comprehensive link pattern that captures both inline and reference links
33// Use (?s) flag to make . match newlines
34static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(
36        r#"(?sx)
37        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38        (?:
39            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
40            |
41            \[([^\]]*)\]      # Reference ID in group 6
42        )"#
43    ).unwrap()
44});
45
46// Image pattern (similar to links but with ! prefix)
47// Use (?s) flag to make . match newlines
48static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r#"(?sx)
51        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52        (?:
53            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
54            |
55            \[([^\]]*)\]      # Reference ID in group 6
56        )"#
57    ).unwrap()
58});
59
60// Reference definition pattern
61static REF_DEF_PATTERN: LazyLock<Regex> =
62    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64// Pattern for bare URLs - uses centralized URL pattern from regex_cache
65
66// Pattern for email addresses
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70// Pattern for blockquote prefix in parse_list_blocks
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73/// Pre-computed information about a line
74#[derive(Debug, Clone)]
75pub struct LineInfo {
76    /// Byte offset where this line starts in the document
77    pub byte_offset: usize,
78    /// Length of the line in bytes (without newline)
79    pub byte_len: usize,
80    /// Number of bytes of leading whitespace (for substring extraction)
81    pub indent: usize,
82    /// Visual column width of leading whitespace (with proper tab expansion)
83    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
84    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
85    pub visual_indent: usize,
86    /// Whether the line is blank (empty or only whitespace)
87    pub is_blank: bool,
88    /// Whether this line is inside a code block
89    pub in_code_block: bool,
90    /// Whether this line is inside front matter
91    pub in_front_matter: bool,
92    /// Whether this line is inside an HTML block
93    pub in_html_block: bool,
94    /// Whether this line is inside an HTML comment
95    pub in_html_comment: bool,
96    /// List item information if this line starts a list item
97    pub list_item: Option<ListItemInfo>,
98    /// Heading information if this line is a heading
99    pub heading: Option<HeadingInfo>,
100    /// Blockquote information if this line is a blockquote
101    pub blockquote: Option<BlockquoteInfo>,
102    /// Whether this line is inside a mkdocstrings autodoc block
103    pub in_mkdocstrings: bool,
104    /// Whether this line is part of an ESM import/export block (MDX only)
105    pub in_esm_block: bool,
106    /// Whether this line is a continuation of a multi-line code span from a previous line
107    pub in_code_span_continuation: bool,
108    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
109    /// Pre-computed for consistent detection across all rules
110    pub is_horizontal_rule: bool,
111    /// Whether this line is inside a math block ($$ ... $$)
112    pub in_math_block: bool,
113    /// Whether this line is inside a Quarto div block (::: ... :::)
114    pub in_quarto_div: bool,
115    /// Whether this line contains or is inside a JSX expression (MDX only)
116    pub in_jsx_expression: bool,
117    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
118    pub in_mdx_comment: bool,
119    /// Whether this line is inside a JSX component (MDX only)
120    pub in_jsx_component: bool,
121    /// Whether this line is inside a JSX fragment (MDX only)
122    pub in_jsx_fragment: bool,
123    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
124    pub in_admonition: bool,
125    /// Whether this line is inside an MkDocs content tab block (===)
126    pub in_content_tab: bool,
127    /// Whether this line is a definition list item (: definition)
128    pub in_definition_list: bool,
129}
130
131impl LineInfo {
132    /// Get the line content as a string slice from the source document
133    pub fn content<'a>(&self, source: &'a str) -> &'a str {
134        &source[self.byte_offset..self.byte_offset + self.byte_len]
135    }
136}
137
138/// Information about a list item
139#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141    /// The marker used (*, -, +, or number with . or ))
142    pub marker: String,
143    /// Whether it's ordered (true) or unordered (false)
144    pub is_ordered: bool,
145    /// The number for ordered lists
146    pub number: Option<usize>,
147    /// Column where the marker starts (0-based)
148    pub marker_column: usize,
149    /// Column where content after marker starts
150    pub content_column: usize,
151}
152
153/// Heading style type
154#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156    /// ATX style heading (# Heading)
157    ATX,
158    /// Setext style heading with = underline
159    Setext1,
160    /// Setext style heading with - underline
161    Setext2,
162}
163
164/// Parsed link information
165#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167    /// Line number (1-indexed)
168    pub line: usize,
169    /// Start column (0-indexed) in the line
170    pub start_col: usize,
171    /// End column (0-indexed) in the line
172    pub end_col: usize,
173    /// Byte offset in document
174    pub byte_offset: usize,
175    /// End byte offset in document
176    pub byte_end: usize,
177    /// Link text
178    pub text: Cow<'a, str>,
179    /// Link URL or reference
180    pub url: Cow<'a, str>,
181    /// Whether this is a reference link [text][ref] vs inline [text](url)
182    pub is_reference: bool,
183    /// Reference ID for reference links
184    pub reference_id: Option<Cow<'a, str>>,
185    /// Link type from pulldown-cmark
186    pub link_type: LinkType,
187}
188
189/// Information about a broken link reported by pulldown-cmark
190#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192    /// The reference text that couldn't be resolved
193    pub reference: String,
194    /// Byte span in the source document
195    pub span: std::ops::Range<usize>,
196}
197
198/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
199#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201    /// The footnote ID (without the ^ prefix)
202    pub id: String,
203    /// Line number (1-indexed)
204    pub line: usize,
205    /// Start byte offset in document
206    pub byte_offset: usize,
207    /// End byte offset in document
208    pub byte_end: usize,
209}
210
211/// Parsed image information
212#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214    /// Line number (1-indexed)
215    pub line: usize,
216    /// Start column (0-indexed) in the line
217    pub start_col: usize,
218    /// End column (0-indexed) in the line
219    pub end_col: usize,
220    /// Byte offset in document
221    pub byte_offset: usize,
222    /// End byte offset in document
223    pub byte_end: usize,
224    /// Alt text
225    pub alt_text: Cow<'a, str>,
226    /// Image URL or reference
227    pub url: Cow<'a, str>,
228    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
229    pub is_reference: bool,
230    /// Reference ID for reference images
231    pub reference_id: Option<Cow<'a, str>>,
232    /// Link type from pulldown-cmark
233    pub link_type: LinkType,
234}
235
236/// Reference definition [ref]: url "title"
237#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239    /// Line number (1-indexed)
240    pub line: usize,
241    /// Reference ID (normalized to lowercase)
242    pub id: String,
243    /// URL
244    pub url: String,
245    /// Optional title
246    pub title: Option<String>,
247    /// Byte offset where the reference definition starts
248    pub byte_offset: usize,
249    /// Byte offset where the reference definition ends
250    pub byte_end: usize,
251    /// Byte offset where the title starts (if present, includes quote)
252    pub title_byte_start: Option<usize>,
253    /// Byte offset where the title ends (if present, includes quote)
254    pub title_byte_end: Option<usize>,
255}
256
257/// Parsed code span information
258#[derive(Debug, Clone)]
259pub struct CodeSpan {
260    /// Line number where the code span starts (1-indexed)
261    pub line: usize,
262    /// Line number where the code span ends (1-indexed)
263    pub end_line: usize,
264    /// Start column (0-indexed) in the line
265    pub start_col: usize,
266    /// End column (0-indexed) in the line
267    pub end_col: usize,
268    /// Byte offset in document
269    pub byte_offset: usize,
270    /// End byte offset in document
271    pub byte_end: usize,
272    /// Number of backticks used (1, 2, 3, etc.)
273    pub backtick_count: usize,
274    /// Content inside the code span (without backticks)
275    pub content: String,
276}
277
278/// Parsed math span information (inline $...$ or display $$...$$)
279#[derive(Debug, Clone)]
280pub struct MathSpan {
281    /// Line number where the math span starts (1-indexed)
282    pub line: usize,
283    /// Line number where the math span ends (1-indexed)
284    pub end_line: usize,
285    /// Start column (0-indexed) in the line
286    pub start_col: usize,
287    /// End column (0-indexed) in the line
288    pub end_col: usize,
289    /// Byte offset in document
290    pub byte_offset: usize,
291    /// End byte offset in document
292    pub byte_end: usize,
293    /// Whether this is display math ($$...$$) vs inline ($...$)
294    pub is_display: bool,
295    /// Content inside the math delimiters
296    pub content: String,
297}
298
299/// Information about a heading
300#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302    /// Heading level (1-6 for ATX, 1-2 for Setext)
303    pub level: u8,
304    /// Style of heading
305    pub style: HeadingStyle,
306    /// The heading marker (# characters or underline)
307    pub marker: String,
308    /// Column where the marker starts (0-based)
309    pub marker_column: usize,
310    /// Column where heading text starts
311    pub content_column: usize,
312    /// The heading text (without markers and without custom ID syntax)
313    pub text: String,
314    /// Custom header ID if present (e.g., from {#custom-id} syntax)
315    pub custom_id: Option<String>,
316    /// Original heading text including custom ID syntax
317    pub raw_text: String,
318    /// Whether it has a closing sequence (for ATX)
319    pub has_closing_sequence: bool,
320    /// The closing sequence if present
321    pub closing_sequence: String,
322    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
323    /// False for malformed headings like `#NoSpace` that MD018 should flag
324    pub is_valid: bool,
325}
326
327/// A valid heading from a filtered iteration
328///
329/// Only includes headings that are CommonMark-compliant (have space after #).
330/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
331#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333    /// The 1-indexed line number in the document
334    pub line_num: usize,
335    /// Reference to the heading information
336    pub heading: &'a HeadingInfo,
337    /// Reference to the full line info (for rules that need additional context)
338    pub line_info: &'a LineInfo,
339}
340
341/// Iterator over valid CommonMark headings in a document
342///
343/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
344/// but should not be processed by other heading rules.
345pub struct ValidHeadingsIter<'a> {
346    lines: &'a [LineInfo],
347    current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351    fn new(lines: &'a [LineInfo]) -> Self {
352        Self {
353            lines,
354            current_index: 0,
355        }
356    }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360    type Item = ValidHeading<'a>;
361
362    fn next(&mut self) -> Option<Self::Item> {
363        while self.current_index < self.lines.len() {
364            let idx = self.current_index;
365            self.current_index += 1;
366
367            let line_info = &self.lines[idx];
368            if let Some(heading) = &line_info.heading
369                && heading.is_valid
370            {
371                return Some(ValidHeading {
372                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
373                    heading,
374                    line_info,
375                });
376            }
377        }
378        None
379    }
380}
381
382/// Information about a blockquote line
383#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385    /// Nesting level (1 for >, 2 for >>, etc.)
386    pub nesting_level: usize,
387    /// The indentation before the blockquote marker
388    pub indent: String,
389    /// Column where the first > starts (0-based)
390    pub marker_column: usize,
391    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
392    pub prefix: String,
393    /// Content after the blockquote marker(s)
394    pub content: String,
395    /// Whether the line has no space after the marker
396    pub has_no_space_after_marker: bool,
397    /// Whether the line has multiple spaces after the marker
398    pub has_multiple_spaces_after_marker: bool,
399    /// Whether this is an empty blockquote line needing MD028 fix
400    pub needs_md028_fix: bool,
401}
402
403/// Information about a list block
404#[derive(Debug, Clone)]
405pub struct ListBlock {
406    /// Line number where the list starts (1-indexed)
407    pub start_line: usize,
408    /// Line number where the list ends (1-indexed)
409    pub end_line: usize,
410    /// Whether it's ordered or unordered
411    pub is_ordered: bool,
412    /// The consistent marker for unordered lists (if any)
413    pub marker: Option<String>,
414    /// Blockquote prefix for this list (empty if not in blockquote)
415    pub blockquote_prefix: String,
416    /// Lines that are list items within this block
417    pub item_lines: Vec<usize>,
418    /// Nesting level (0 for top-level lists)
419    pub nesting_level: usize,
420    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
421    pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
427type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429/// Type alias for byte ranges used in JSX expression and MDX comment detection
430type ByteRanges = Vec<(usize, usize)>;
431
432/// Character frequency data for fast content analysis
433#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435    /// Count of # characters (headings)
436    pub hash_count: usize,
437    /// Count of * characters (emphasis, lists, horizontal rules)
438    pub asterisk_count: usize,
439    /// Count of _ characters (emphasis, horizontal rules)
440    pub underscore_count: usize,
441    /// Count of - characters (lists, horizontal rules, setext headings)
442    pub hyphen_count: usize,
443    /// Count of + characters (lists)
444    pub plus_count: usize,
445    /// Count of > characters (blockquotes)
446    pub gt_count: usize,
447    /// Count of | characters (tables)
448    pub pipe_count: usize,
449    /// Count of [ characters (links, images)
450    pub bracket_count: usize,
451    /// Count of ` characters (code spans, code blocks)
452    pub backtick_count: usize,
453    /// Count of < characters (HTML tags, autolinks)
454    pub lt_count: usize,
455    /// Count of ! characters (images)
456    pub exclamation_count: usize,
457    /// Count of newline characters
458    pub newline_count: usize,
459}
460
461/// Pre-parsed HTML tag information
462#[derive(Debug, Clone)]
463pub struct HtmlTag {
464    /// Line number (1-indexed)
465    pub line: usize,
466    /// Start column (0-indexed) in the line
467    pub start_col: usize,
468    /// End column (0-indexed) in the line
469    pub end_col: usize,
470    /// Byte offset in document
471    pub byte_offset: usize,
472    /// End byte offset in document
473    pub byte_end: usize,
474    /// Tag name (e.g., "div", "img", "br")
475    pub tag_name: String,
476    /// Whether it's a closing tag (`</tag>`)
477    pub is_closing: bool,
478    /// Whether it's self-closing (`<tag />`)
479    pub is_self_closing: bool,
480    /// Raw tag content
481    pub raw_content: String,
482}
483
484/// Pre-parsed emphasis span information
485#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487    /// Line number (1-indexed)
488    pub line: usize,
489    /// Start column (0-indexed) in the line
490    pub start_col: usize,
491    /// End column (0-indexed) in the line
492    pub end_col: usize,
493    /// Byte offset in document
494    pub byte_offset: usize,
495    /// End byte offset in document
496    pub byte_end: usize,
497    /// Type of emphasis ('*' or '_')
498    pub marker: char,
499    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
500    pub marker_count: usize,
501    /// Content inside the emphasis
502    pub content: String,
503}
504
505/// Pre-parsed table row information
506#[derive(Debug, Clone)]
507pub struct TableRow {
508    /// Line number (1-indexed)
509    pub line: usize,
510    /// Whether this is a separator row (contains only |, -, :, and spaces)
511    pub is_separator: bool,
512    /// Number of columns (pipe-separated cells)
513    pub column_count: usize,
514    /// Alignment info from separator row
515    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
516}
517
518/// Pre-parsed bare URL information (not in links)
519#[derive(Debug, Clone)]
520pub struct BareUrl {
521    /// Line number (1-indexed)
522    pub line: usize,
523    /// Start column (0-indexed) in the line
524    pub start_col: usize,
525    /// End column (0-indexed) in the line
526    pub end_col: usize,
527    /// Byte offset in document
528    pub byte_offset: usize,
529    /// End byte offset in document
530    pub byte_end: usize,
531    /// The URL string
532    pub url: String,
533    /// Type of URL ("http", "https", "ftp", "email")
534    pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538    pub content: &'a str,
539    pub line_offsets: Vec<usize>,
540    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
541    pub lines: Vec<LineInfo>,             // Pre-computed line information
542    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
543    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
544    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
545    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
546    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
547    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
548    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
549    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
550    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
551    pub char_frequency: CharFrequency,    // Character frequency analysis
552    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
553    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
554    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
555    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
556    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
557    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
558    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
559    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
560    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
561    pub flavor: MarkdownFlavor,           // Markdown flavor being used
562    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
563    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
564    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
565    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
566    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
567    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
568}
569
570/// Detailed blockquote parse result with all components
571struct BlockquoteComponents<'a> {
572    indent: &'a str,
573    markers: &'a str,
574    spaces_after: &'a str,
575    content: &'a str,
576}
577
578/// Parse blockquote prefix with detailed components using manual parsing
579#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581    let bytes = line.as_bytes();
582    let mut pos = 0;
583
584    // Parse leading whitespace (indent)
585    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586        pos += 1;
587    }
588    let indent_end = pos;
589
590    // Must have at least one '>' marker
591    if pos >= bytes.len() || bytes[pos] != b'>' {
592        return None;
593    }
594
595    // Parse '>' markers
596    while pos < bytes.len() && bytes[pos] == b'>' {
597        pos += 1;
598    }
599    let markers_end = pos;
600
601    // Parse spaces after markers
602    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603        pos += 1;
604    }
605    let spaces_end = pos;
606
607    Some(BlockquoteComponents {
608        indent: &line[0..indent_end],
609        markers: &line[indent_end..markers_end],
610        spaces_after: &line[markers_end..spaces_end],
611        content: &line[spaces_end..],
612    })
613}
614
615impl<'a> LintContext<'a> {
616    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617        #[cfg(not(target_arch = "wasm32"))]
618        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619        #[cfg(target_arch = "wasm32")]
620        let profile = false;
621
622        let line_offsets = profile_section!("Line offsets", profile, {
623            let mut offsets = vec![0];
624            for (i, c) in content.char_indices() {
625                if c == '\n' {
626                    offsets.push(i + 1);
627                }
628            }
629            offsets
630        });
631
632        // Detect code blocks once and cache them
633        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
634
635        // Pre-compute HTML comment ranges ONCE for all operations
636        let html_comment_ranges = profile_section!(
637            "HTML comment ranges",
638            profile,
639            crate::utils::skip_context::compute_html_comment_ranges(content)
640        );
641
642        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
643        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
644            if flavor == MarkdownFlavor::MkDocs {
645                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
646            } else {
647                Vec::new()
648            }
649        });
650
651        // Pre-compute Quarto div block ranges for Quarto flavor
652        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
653            if flavor == MarkdownFlavor::Quarto {
654                crate::utils::quarto_divs::detect_div_block_ranges(content)
655            } else {
656                Vec::new()
657            }
658        });
659
660        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
661        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
662        let (mut lines, emphasis_spans) = profile_section!(
663            "Basic line info",
664            profile,
665            Self::compute_basic_line_info(
666                content,
667                &line_offsets,
668                &code_blocks,
669                flavor,
670                &html_comment_ranges,
671                &autodoc_ranges,
672                &quarto_div_ranges,
673            )
674        );
675
676        // Detect HTML blocks BEFORE heading detection
677        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
678
679        // Detect ESM import/export blocks in MDX files BEFORE heading detection
680        profile_section!(
681            "ESM blocks",
682            profile,
683            Self::detect_esm_blocks(content, &mut lines, flavor)
684        );
685
686        // Detect JSX expressions and MDX comments in MDX files
687        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
688            "JSX/MDX detection",
689            profile,
690            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
691        );
692
693        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
694        profile_section!(
695            "MkDocs constructs",
696            profile,
697            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
698        );
699
700        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
701        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
702
703        // Now detect headings and blockquotes
704        profile_section!(
705            "Headings & blockquotes",
706            profile,
707            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
708        );
709
710        // Parse code spans early so we can exclude them from link/image parsing
711        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
712
713        // Mark lines that are continuations of multi-line code spans
714        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
715        for span in &code_spans {
716            if span.end_line > span.line {
717                // Mark lines after the first line as continuations
718                for line_num in (span.line + 1)..=span.end_line {
719                    if let Some(line_info) = lines.get_mut(line_num - 1) {
720                        line_info.in_code_span_continuation = true;
721                    }
722                }
723            }
724        }
725
726        // Parse links, images, references, and list blocks
727        let (links, broken_links, footnote_refs) = profile_section!(
728            "Links",
729            profile,
730            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
731        );
732
733        let images = profile_section!(
734            "Images",
735            profile,
736            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
737        );
738
739        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
740
741        // Build O(1) lookup map for reference definitions by lowercase ID
742        let reference_defs_map: HashMap<String, usize> = reference_defs
743            .iter()
744            .enumerate()
745            .map(|(idx, def)| (def.id.to_lowercase(), idx))
746            .collect();
747
748        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
749
750        // Compute character frequency for fast content analysis
751        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
752
753        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
754        let table_blocks = profile_section!(
755            "Table blocks",
756            profile,
757            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
758                content,
759                &code_blocks,
760                &code_spans,
761                &html_comment_ranges,
762            )
763        );
764
765        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
766        let line_index = profile_section!(
767            "Line index",
768            profile,
769            crate::utils::range_utils::LineIndex::new(content)
770        );
771
772        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
773        let jinja_ranges = profile_section!(
774            "Jinja ranges",
775            profile,
776            crate::utils::jinja_utils::find_jinja_ranges(content)
777        );
778
779        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
780        let citation_ranges = profile_section!("Citation ranges", profile, {
781            if flavor == MarkdownFlavor::Quarto {
782                crate::utils::quarto_divs::find_citation_ranges(content)
783            } else {
784                Vec::new()
785            }
786        });
787
788        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
789        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
790            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
791            let mut ranges = Vec::new();
792            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
793                ranges.push((mat.start(), mat.end()));
794            }
795            ranges
796        });
797
798        Self {
799            content,
800            line_offsets,
801            code_blocks,
802            lines,
803            links,
804            images,
805            broken_links,
806            footnote_refs,
807            reference_defs,
808            reference_defs_map,
809            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
810            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
811            list_blocks,
812            char_frequency,
813            html_tags_cache: OnceLock::new(),
814            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
815            table_rows_cache: OnceLock::new(),
816            bare_urls_cache: OnceLock::new(),
817            has_mixed_list_nesting_cache: OnceLock::new(),
818            html_comment_ranges,
819            table_blocks,
820            line_index,
821            jinja_ranges,
822            flavor,
823            source_file,
824            jsx_expression_ranges,
825            mdx_comment_ranges,
826            citation_ranges,
827            shortcode_ranges,
828            inline_config: InlineConfig::from_content(content),
829        }
830    }
831
832    /// Check if a rule is disabled at a specific line number (1-indexed)
833    ///
834    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
835    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
836    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
837        self.inline_config.is_rule_disabled(rule_name, line_number)
838    }
839
840    /// Get code spans - computed lazily on first access
841    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
842        Arc::clone(
843            self.code_spans_cache
844                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
845        )
846    }
847
848    /// Get math spans - computed lazily on first access
849    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
850        Arc::clone(
851            self.math_spans_cache
852                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
853        )
854    }
855
856    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
857    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
858        let math_spans = self.math_spans();
859        math_spans
860            .iter()
861            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
862    }
863
864    /// Get HTML comment ranges - pre-computed during LintContext construction
865    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
866        &self.html_comment_ranges
867    }
868
869    /// Get HTML tags - computed lazily on first access
870    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
871        Arc::clone(self.html_tags_cache.get_or_init(|| {
872            Arc::new(Self::parse_html_tags(
873                self.content,
874                &self.lines,
875                &self.code_blocks,
876                self.flavor,
877            ))
878        }))
879    }
880
881    /// Get emphasis spans - pre-computed during construction
882    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
883        Arc::clone(
884            self.emphasis_spans_cache
885                .get()
886                .expect("emphasis_spans_cache initialized during construction"),
887        )
888    }
889
890    /// Get table rows - computed lazily on first access
891    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
892        Arc::clone(
893            self.table_rows_cache
894                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
895        )
896    }
897
898    /// Get bare URLs - computed lazily on first access
899    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
900        Arc::clone(
901            self.bare_urls_cache
902                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
903        )
904    }
905
906    /// Check if document has mixed ordered/unordered list nesting.
907    /// Result is cached after first computation (document-level invariant).
908    /// This is used by MD007 for smart style auto-detection.
909    pub fn has_mixed_list_nesting(&self) -> bool {
910        *self
911            .has_mixed_list_nesting_cache
912            .get_or_init(|| self.compute_mixed_list_nesting())
913    }
914
915    /// Internal computation for mixed list nesting (only called once per LintContext).
916    fn compute_mixed_list_nesting(&self) -> bool {
917        // Track parent list items by their marker position and type
918        // Using marker_column instead of indent because it works correctly
919        // for blockquoted content where indent doesn't account for the prefix
920        // Stack stores: (marker_column, is_ordered)
921        let mut stack: Vec<(usize, bool)> = Vec::new();
922        let mut last_was_blank = false;
923
924        for line_info in &self.lines {
925            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
926            if line_info.in_code_block
927                || line_info.in_front_matter
928                || line_info.in_mkdocstrings
929                || line_info.in_html_comment
930                || line_info.in_esm_block
931            {
932                continue;
933            }
934
935            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
936            if line_info.is_blank {
937                last_was_blank = true;
938                continue;
939            }
940
941            if let Some(list_item) = &line_info.list_item {
942                // Normalize column 1 to column 0 (consistent with MD007 check function)
943                let current_pos = if list_item.marker_column == 1 {
944                    0
945                } else {
946                    list_item.marker_column
947                };
948
949                // If there was a blank line and this item is at root level, reset stack
950                if last_was_blank && current_pos == 0 {
951                    stack.clear();
952                }
953                last_was_blank = false;
954
955                // Pop items at same or greater position (they're siblings or deeper, not parents)
956                while let Some(&(pos, _)) = stack.last() {
957                    if pos >= current_pos {
958                        stack.pop();
959                    } else {
960                        break;
961                    }
962                }
963
964                // Check if immediate parent has different type - this is mixed nesting
965                if let Some(&(_, parent_is_ordered)) = stack.last()
966                    && parent_is_ordered != list_item.is_ordered
967                {
968                    return true; // Found mixed nesting - early exit
969                }
970
971                stack.push((current_pos, list_item.is_ordered));
972            } else {
973                // Non-list line (but not blank) - could be paragraph or other content
974                last_was_blank = false;
975            }
976        }
977
978        false
979    }
980
981    /// Map a byte offset to (line, column)
982    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
983        match self.line_offsets.binary_search(&offset) {
984            Ok(line) => (line + 1, 1),
985            Err(line) => {
986                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
987                (line, offset - line_start + 1)
988            }
989        }
990    }
991
992    /// Check if a position is within a code block or code span
993    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
994        // Check code blocks first
995        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
996            return true;
997        }
998
999        // Check inline code spans (lazy load if needed)
1000        self.code_spans()
1001            .iter()
1002            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1003    }
1004
1005    /// Get line information by line number (1-indexed)
1006    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1007        if line_num > 0 {
1008            self.lines.get(line_num - 1)
1009        } else {
1010            None
1011        }
1012    }
1013
1014    /// Get byte offset for a line number (1-indexed)
1015    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1016        self.line_info(line_num).map(|info| info.byte_offset)
1017    }
1018
1019    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1020    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1021        let normalized_id = ref_id.to_lowercase();
1022        self.reference_defs_map
1023            .get(&normalized_id)
1024            .map(|&idx| self.reference_defs[idx].url.as_str())
1025    }
1026
1027    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1028    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1029        let normalized_id = ref_id.to_lowercase();
1030        self.reference_defs_map
1031            .get(&normalized_id)
1032            .map(|&idx| &self.reference_defs[idx])
1033    }
1034
1035    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1036    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1037        let normalized_id = ref_id.to_lowercase();
1038        self.reference_defs_map.contains_key(&normalized_id)
1039    }
1040
1041    /// Check if a line is part of a list block
1042    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1043        self.list_blocks
1044            .iter()
1045            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1046    }
1047
1048    /// Get the list block containing a specific line
1049    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1050        self.list_blocks
1051            .iter()
1052            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1053    }
1054
1055    // Compatibility methods for DocumentStructure migration
1056
1057    /// Check if a line is within a code block
1058    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1059        if line_num == 0 || line_num > self.lines.len() {
1060            return false;
1061        }
1062        self.lines[line_num - 1].in_code_block
1063    }
1064
1065    /// Check if a line is within front matter
1066    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1067        if line_num == 0 || line_num > self.lines.len() {
1068            return false;
1069        }
1070        self.lines[line_num - 1].in_front_matter
1071    }
1072
1073    /// Check if a line is within an HTML block
1074    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1075        if line_num == 0 || line_num > self.lines.len() {
1076            return false;
1077        }
1078        self.lines[line_num - 1].in_html_block
1079    }
1080
1081    /// Check if a line and column is within a code span
1082    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1083        if line_num == 0 || line_num > self.lines.len() {
1084            return false;
1085        }
1086
1087        // Use the code spans cache to check
1088        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1089        // Convert col to 0-indexed for comparison
1090        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1091        let code_spans = self.code_spans();
1092        code_spans.iter().any(|span| {
1093            // Check if line is within the span's line range
1094            if line_num < span.line || line_num > span.end_line {
1095                return false;
1096            }
1097
1098            if span.line == span.end_line {
1099                // Single-line span: check column bounds
1100                col_0indexed >= span.start_col && col_0indexed < span.end_col
1101            } else if line_num == span.line {
1102                // First line of multi-line span: anything after start_col is in span
1103                col_0indexed >= span.start_col
1104            } else if line_num == span.end_line {
1105                // Last line of multi-line span: anything before end_col is in span
1106                col_0indexed < span.end_col
1107            } else {
1108                // Middle line of multi-line span: entire line is in span
1109                true
1110            }
1111        })
1112    }
1113
1114    /// Check if a byte offset is within a code span
1115    #[inline]
1116    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1117        let code_spans = self.code_spans();
1118        code_spans
1119            .iter()
1120            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1121    }
1122
1123    /// Check if a byte position is within a reference definition
1124    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1125    #[inline]
1126    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1127        self.reference_defs
1128            .iter()
1129            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1130    }
1131
1132    /// Check if a byte position is within an HTML comment
1133    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1134    /// where k is the number of HTML comments (typically very small)
1135    #[inline]
1136    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1137        self.html_comment_ranges
1138            .iter()
1139            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1140    }
1141
1142    /// Check if a byte position is within an HTML tag (including multiline tags)
1143    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1144    #[inline]
1145    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1146        self.html_tags()
1147            .iter()
1148            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1149    }
1150
1151    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1152    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1153        self.jinja_ranges
1154            .iter()
1155            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1156    }
1157
1158    /// Check if a byte position is within a JSX expression (MDX: {expression})
1159    #[inline]
1160    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1161        self.jsx_expression_ranges
1162            .iter()
1163            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1164    }
1165
1166    /// Check if a byte position is within an MDX comment ({/* ... */})
1167    #[inline]
1168    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1169        self.mdx_comment_ranges
1170            .iter()
1171            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1172    }
1173
1174    /// Get all JSX expression byte ranges
1175    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1176        &self.jsx_expression_ranges
1177    }
1178
1179    /// Get all MDX comment byte ranges
1180    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1181        &self.mdx_comment_ranges
1182    }
1183
1184    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1185    /// Only active in Quarto flavor
1186    #[inline]
1187    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1188        self.citation_ranges
1189            .iter()
1190            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1191    }
1192
1193    /// Get all citation byte ranges (Quarto flavor only)
1194    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1195        &self.citation_ranges
1196    }
1197
1198    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1199    #[inline]
1200    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1201        self.shortcode_ranges
1202            .iter()
1203            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1204    }
1205
1206    /// Get all shortcode byte ranges
1207    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1208        &self.shortcode_ranges
1209    }
1210
1211    /// Check if a byte position is within a link reference definition title
1212    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1213        self.reference_defs.iter().any(|def| {
1214            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1215                byte_pos >= start && byte_pos < end
1216            } else {
1217                false
1218            }
1219        })
1220    }
1221
1222    /// Check if content has any instances of a specific character (fast)
1223    pub fn has_char(&self, ch: char) -> bool {
1224        match ch {
1225            '#' => self.char_frequency.hash_count > 0,
1226            '*' => self.char_frequency.asterisk_count > 0,
1227            '_' => self.char_frequency.underscore_count > 0,
1228            '-' => self.char_frequency.hyphen_count > 0,
1229            '+' => self.char_frequency.plus_count > 0,
1230            '>' => self.char_frequency.gt_count > 0,
1231            '|' => self.char_frequency.pipe_count > 0,
1232            '[' => self.char_frequency.bracket_count > 0,
1233            '`' => self.char_frequency.backtick_count > 0,
1234            '<' => self.char_frequency.lt_count > 0,
1235            '!' => self.char_frequency.exclamation_count > 0,
1236            '\n' => self.char_frequency.newline_count > 0,
1237            _ => self.content.contains(ch), // Fallback for other characters
1238        }
1239    }
1240
1241    /// Get count of a specific character (fast)
1242    pub fn char_count(&self, ch: char) -> usize {
1243        match ch {
1244            '#' => self.char_frequency.hash_count,
1245            '*' => self.char_frequency.asterisk_count,
1246            '_' => self.char_frequency.underscore_count,
1247            '-' => self.char_frequency.hyphen_count,
1248            '+' => self.char_frequency.plus_count,
1249            '>' => self.char_frequency.gt_count,
1250            '|' => self.char_frequency.pipe_count,
1251            '[' => self.char_frequency.bracket_count,
1252            '`' => self.char_frequency.backtick_count,
1253            '<' => self.char_frequency.lt_count,
1254            '!' => self.char_frequency.exclamation_count,
1255            '\n' => self.char_frequency.newline_count,
1256            _ => self.content.matches(ch).count(), // Fallback for other characters
1257        }
1258    }
1259
1260    /// Check if content likely contains headings (fast)
1261    pub fn likely_has_headings(&self) -> bool {
1262        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1263    }
1264
1265    /// Check if content likely contains lists (fast)
1266    pub fn likely_has_lists(&self) -> bool {
1267        self.char_frequency.asterisk_count > 0
1268            || self.char_frequency.hyphen_count > 0
1269            || self.char_frequency.plus_count > 0
1270    }
1271
1272    /// Check if content likely contains emphasis (fast)
1273    pub fn likely_has_emphasis(&self) -> bool {
1274        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1275    }
1276
1277    /// Check if content likely contains tables (fast)
1278    pub fn likely_has_tables(&self) -> bool {
1279        self.char_frequency.pipe_count > 2
1280    }
1281
1282    /// Check if content likely contains blockquotes (fast)
1283    pub fn likely_has_blockquotes(&self) -> bool {
1284        self.char_frequency.gt_count > 0
1285    }
1286
1287    /// Check if content likely contains code (fast)
1288    pub fn likely_has_code(&self) -> bool {
1289        self.char_frequency.backtick_count > 0
1290    }
1291
1292    /// Check if content likely contains links or images (fast)
1293    pub fn likely_has_links_or_images(&self) -> bool {
1294        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1295    }
1296
1297    /// Check if content likely contains HTML (fast)
1298    pub fn likely_has_html(&self) -> bool {
1299        self.char_frequency.lt_count > 0
1300    }
1301
1302    /// Get the blockquote prefix for inserting a blank line at the given line index.
1303    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1304    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1305    /// Returns an empty string if the line is not inside a blockquote.
1306    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1307        if let Some(line_info) = self.lines.get(line_idx)
1308            && let Some(ref bq) = line_info.blockquote
1309        {
1310            bq.prefix.trim_end().to_string()
1311        } else {
1312            String::new()
1313        }
1314    }
1315
1316    /// Get HTML tags on a specific line
1317    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1318        self.html_tags()
1319            .iter()
1320            .filter(|tag| tag.line == line_num)
1321            .cloned()
1322            .collect()
1323    }
1324
1325    /// Get emphasis spans on a specific line
1326    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1327        self.emphasis_spans()
1328            .iter()
1329            .filter(|span| span.line == line_num)
1330            .cloned()
1331            .collect()
1332    }
1333
1334    /// Get table rows on a specific line
1335    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1336        self.table_rows()
1337            .iter()
1338            .filter(|row| row.line == line_num)
1339            .cloned()
1340            .collect()
1341    }
1342
1343    /// Get bare URLs on a specific line
1344    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1345        self.bare_urls()
1346            .iter()
1347            .filter(|url| url.line == line_num)
1348            .cloned()
1349            .collect()
1350    }
1351
1352    /// Find the line index for a given byte offset using binary search.
1353    /// Returns (line_index, line_number, column) where:
1354    /// - line_index is the 0-based index in the lines array
1355    /// - line_number is the 1-based line number
1356    /// - column is the byte offset within that line
1357    #[inline]
1358    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1359        // Binary search to find the line containing this byte offset
1360        let idx = match lines.binary_search_by(|line| {
1361            if byte_offset < line.byte_offset {
1362                std::cmp::Ordering::Greater
1363            } else if byte_offset > line.byte_offset + line.byte_len {
1364                std::cmp::Ordering::Less
1365            } else {
1366                std::cmp::Ordering::Equal
1367            }
1368        }) {
1369            Ok(idx) => idx,
1370            Err(idx) => idx.saturating_sub(1),
1371        };
1372
1373        let line = &lines[idx];
1374        let line_num = idx + 1;
1375        let col = byte_offset.saturating_sub(line.byte_offset);
1376
1377        (idx, line_num, col)
1378    }
1379
1380    /// Check if a byte offset is within a code span using binary search
1381    #[inline]
1382    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1383        // Since spans are sorted by byte_offset, use partition_point for binary search
1384        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1385
1386        // Check the span that starts at or before our offset
1387        if idx > 0 {
1388            let span = &code_spans[idx - 1];
1389            if offset >= span.byte_offset && offset < span.byte_end {
1390                return true;
1391            }
1392        }
1393
1394        false
1395    }
1396
1397    /// Collect byte ranges of all links using pulldown-cmark
1398    /// This is used to skip heading detection for lines that fall within link syntax
1399    /// (e.g., multiline links like `[text](url\n#fragment)`)
1400    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1401        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1402
1403        let mut link_ranges = Vec::new();
1404        let mut options = Options::empty();
1405        options.insert(Options::ENABLE_WIKILINKS);
1406        options.insert(Options::ENABLE_FOOTNOTES);
1407
1408        let parser = Parser::new_ext(content, options).into_offset_iter();
1409        let mut link_stack: Vec<usize> = Vec::new();
1410
1411        for (event, range) in parser {
1412            match event {
1413                Event::Start(Tag::Link { .. }) => {
1414                    link_stack.push(range.start);
1415                }
1416                Event::End(TagEnd::Link) => {
1417                    if let Some(start_pos) = link_stack.pop() {
1418                        link_ranges.push((start_pos, range.end));
1419                    }
1420                }
1421                _ => {}
1422            }
1423        }
1424
1425        link_ranges
1426    }
1427
1428    /// Parse all links in the content
1429    fn parse_links(
1430        content: &'a str,
1431        lines: &[LineInfo],
1432        code_blocks: &[(usize, usize)],
1433        code_spans: &[CodeSpan],
1434        flavor: MarkdownFlavor,
1435        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1436    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1437        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1438        use std::collections::HashSet;
1439
1440        let mut links = Vec::with_capacity(content.len() / 500);
1441        let mut broken_links = Vec::new();
1442        let mut footnote_refs = Vec::new();
1443
1444        // Track byte positions of links found by pulldown-cmark
1445        let mut found_positions = HashSet::new();
1446
1447        // Use pulldown-cmark's streaming parser with BrokenLink callback
1448        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1449        // This automatically handles:
1450        // - Escaped links (won't generate events)
1451        // - Links in code blocks/spans (won't generate Link events)
1452        // - Images (generates Tag::Image instead)
1453        // - Reference resolution (dest_url is already resolved!)
1454        // - Broken references (callback is invoked)
1455        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1456        let mut options = Options::empty();
1457        options.insert(Options::ENABLE_WIKILINKS);
1458        options.insert(Options::ENABLE_FOOTNOTES);
1459
1460        let parser = Parser::new_with_broken_link_callback(
1461            content,
1462            options,
1463            Some(|link: BrokenLink<'_>| {
1464                broken_links.push(BrokenLinkInfo {
1465                    reference: link.reference.to_string(),
1466                    span: link.span.clone(),
1467                });
1468                None
1469            }),
1470        )
1471        .into_offset_iter();
1472
1473        let mut link_stack: Vec<(
1474            usize,
1475            usize,
1476            pulldown_cmark::CowStr<'a>,
1477            LinkType,
1478            pulldown_cmark::CowStr<'a>,
1479        )> = Vec::new();
1480        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1481
1482        for (event, range) in parser {
1483            match event {
1484                Event::Start(Tag::Link {
1485                    link_type,
1486                    dest_url,
1487                    id,
1488                    ..
1489                }) => {
1490                    // Link start - record position, URL, and reference ID
1491                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1492                    text_chunks.clear();
1493                }
1494                Event::Text(text) if !link_stack.is_empty() => {
1495                    // Track text content with its byte range
1496                    text_chunks.push((text.to_string(), range.start, range.end));
1497                }
1498                Event::Code(code) if !link_stack.is_empty() => {
1499                    // Include inline code in link text (with backticks)
1500                    let code_text = format!("`{code}`");
1501                    text_chunks.push((code_text, range.start, range.end));
1502                }
1503                Event::End(TagEnd::Link) => {
1504                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1505                        // Skip if in HTML comment
1506                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1507                            text_chunks.clear();
1508                            continue;
1509                        }
1510
1511                        // Find line and column information
1512                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1513
1514                        // Skip if this link is on a MkDocs snippet line
1515                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1516                            text_chunks.clear();
1517                            continue;
1518                        }
1519
1520                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1521
1522                        let is_reference = matches!(
1523                            link_type,
1524                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1525                        );
1526
1527                        // Extract link text directly from source bytes to preserve escaping
1528                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1529                        let link_text = if start_pos < content.len() {
1530                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1531
1532                            // Find MATCHING ] by tracking bracket depth for nested brackets
1533                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1534                            // Brackets inside code spans (between backticks) should be ignored
1535                            let mut close_pos = None;
1536                            let mut depth = 0;
1537                            let mut in_code_span = false;
1538
1539                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1540                                // Count preceding backslashes
1541                                let mut backslash_count = 0;
1542                                let mut j = i;
1543                                while j > 0 && link_bytes[j - 1] == b'\\' {
1544                                    backslash_count += 1;
1545                                    j -= 1;
1546                                }
1547                                let is_escaped = backslash_count % 2 != 0;
1548
1549                                // Track code spans - backticks toggle in/out of code
1550                                if byte == b'`' && !is_escaped {
1551                                    in_code_span = !in_code_span;
1552                                }
1553
1554                                // Only count brackets when NOT in a code span
1555                                if !is_escaped && !in_code_span {
1556                                    if byte == b'[' {
1557                                        depth += 1;
1558                                    } else if byte == b']' {
1559                                        if depth == 0 {
1560                                            // Found the matching closing bracket
1561                                            close_pos = Some(i);
1562                                            break;
1563                                        } else {
1564                                            depth -= 1;
1565                                        }
1566                                    }
1567                                }
1568                            }
1569
1570                            if let Some(pos) = close_pos {
1571                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1572                            } else {
1573                                Cow::Borrowed("")
1574                            }
1575                        } else {
1576                            Cow::Borrowed("")
1577                        };
1578
1579                        // For reference links, use the actual reference ID from pulldown-cmark
1580                        let reference_id = if is_reference && !ref_id.is_empty() {
1581                            Some(Cow::Owned(ref_id.to_lowercase()))
1582                        } else if is_reference {
1583                            // For collapsed/shortcut references without explicit ID, use the link text
1584                            Some(Cow::Owned(link_text.to_lowercase()))
1585                        } else {
1586                            None
1587                        };
1588
1589                        // Track this position as found
1590                        found_positions.insert(start_pos);
1591
1592                        links.push(ParsedLink {
1593                            line: line_num,
1594                            start_col: col_start,
1595                            end_col: col_end,
1596                            byte_offset: start_pos,
1597                            byte_end: range.end,
1598                            text: link_text,
1599                            url: Cow::Owned(url.to_string()),
1600                            is_reference,
1601                            reference_id,
1602                            link_type,
1603                        });
1604
1605                        text_chunks.clear();
1606                    }
1607                }
1608                Event::FootnoteReference(footnote_id) => {
1609                    // Capture footnote references like [^1], [^note]
1610                    // Skip if in HTML comment
1611                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1612                        continue;
1613                    }
1614
1615                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1616                    footnote_refs.push(FootnoteRef {
1617                        id: footnote_id.to_string(),
1618                        line: line_num,
1619                        byte_offset: range.start,
1620                        byte_end: range.end,
1621                    });
1622                }
1623                _ => {}
1624            }
1625        }
1626
1627        // Also find undefined references using regex
1628        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1629        // because the reference is undefined
1630        for cap in LINK_PATTERN.captures_iter(content) {
1631            let full_match = cap.get(0).unwrap();
1632            let match_start = full_match.start();
1633            let match_end = full_match.end();
1634
1635            // Skip if this was already found by pulldown-cmark (it's a valid link)
1636            if found_positions.contains(&match_start) {
1637                continue;
1638            }
1639
1640            // Skip if escaped
1641            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1642                continue;
1643            }
1644
1645            // Skip if it's an image
1646            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1647                continue;
1648            }
1649
1650            // Skip if in code block
1651            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1652                continue;
1653            }
1654
1655            // Skip if in code span
1656            if Self::is_offset_in_code_span(code_spans, match_start) {
1657                continue;
1658            }
1659
1660            // Skip if in HTML comment
1661            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1662                continue;
1663            }
1664
1665            // Find line and column information
1666            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1667
1668            // Skip if this link is on a MkDocs snippet line
1669            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1670                continue;
1671            }
1672
1673            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1674
1675            let text = cap.get(1).map_or("", |m| m.as_str());
1676
1677            // Only process reference links (group 6)
1678            if let Some(ref_id) = cap.get(6) {
1679                let ref_id_str = ref_id.as_str();
1680                let normalized_ref = if ref_id_str.is_empty() {
1681                    Cow::Owned(text.to_lowercase()) // Implicit reference
1682                } else {
1683                    Cow::Owned(ref_id_str.to_lowercase())
1684                };
1685
1686                // This is an undefined reference (pulldown-cmark didn't parse it)
1687                links.push(ParsedLink {
1688                    line: line_num,
1689                    start_col: col_start,
1690                    end_col: col_end,
1691                    byte_offset: match_start,
1692                    byte_end: match_end,
1693                    text: Cow::Borrowed(text),
1694                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1695                    is_reference: true,
1696                    reference_id: Some(normalized_ref),
1697                    link_type: LinkType::Reference, // Undefined references are reference-style
1698                });
1699            }
1700        }
1701
1702        (links, broken_links, footnote_refs)
1703    }
1704
1705    /// Parse all images in the content
1706    fn parse_images(
1707        content: &'a str,
1708        lines: &[LineInfo],
1709        code_blocks: &[(usize, usize)],
1710        code_spans: &[CodeSpan],
1711        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1712    ) -> Vec<ParsedImage<'a>> {
1713        use crate::utils::skip_context::is_in_html_comment_ranges;
1714        use std::collections::HashSet;
1715
1716        // Pre-size based on a heuristic: images are less common than links
1717        let mut images = Vec::with_capacity(content.len() / 1000);
1718        let mut found_positions = HashSet::new();
1719
1720        // Use pulldown-cmark for parsing - more accurate and faster
1721        let parser = Parser::new(content).into_offset_iter();
1722        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1723            Vec::new();
1724        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1725
1726        for (event, range) in parser {
1727            match event {
1728                Event::Start(Tag::Image {
1729                    link_type,
1730                    dest_url,
1731                    id,
1732                    ..
1733                }) => {
1734                    image_stack.push((range.start, dest_url, link_type, id));
1735                    text_chunks.clear();
1736                }
1737                Event::Text(text) if !image_stack.is_empty() => {
1738                    text_chunks.push((text.to_string(), range.start, range.end));
1739                }
1740                Event::Code(code) if !image_stack.is_empty() => {
1741                    let code_text = format!("`{code}`");
1742                    text_chunks.push((code_text, range.start, range.end));
1743                }
1744                Event::End(TagEnd::Image) => {
1745                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1746                        // Skip if in code block
1747                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1748                            continue;
1749                        }
1750
1751                        // Skip if in code span
1752                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1753                            continue;
1754                        }
1755
1756                        // Skip if in HTML comment
1757                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1758                            continue;
1759                        }
1760
1761                        // Find line and column using binary search
1762                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1763                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1764
1765                        let is_reference = matches!(
1766                            link_type,
1767                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1768                        );
1769
1770                        // Extract alt text directly from source bytes to preserve escaping
1771                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1772                        let alt_text = if start_pos < content.len() {
1773                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1774
1775                            // Find MATCHING ] by tracking bracket depth for nested brackets
1776                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1777                            let mut close_pos = None;
1778                            let mut depth = 0;
1779
1780                            if image_bytes.len() > 2 {
1781                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1782                                    // Count preceding backslashes
1783                                    let mut backslash_count = 0;
1784                                    let mut j = i;
1785                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1786                                        backslash_count += 1;
1787                                        j -= 1;
1788                                    }
1789                                    let is_escaped = backslash_count % 2 != 0;
1790
1791                                    if !is_escaped {
1792                                        if byte == b'[' {
1793                                            depth += 1;
1794                                        } else if byte == b']' {
1795                                            if depth == 0 {
1796                                                // Found the matching closing bracket
1797                                                close_pos = Some(i);
1798                                                break;
1799                                            } else {
1800                                                depth -= 1;
1801                                            }
1802                                        }
1803                                    }
1804                                }
1805                            }
1806
1807                            if let Some(pos) = close_pos {
1808                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1809                            } else {
1810                                Cow::Borrowed("")
1811                            }
1812                        } else {
1813                            Cow::Borrowed("")
1814                        };
1815
1816                        let reference_id = if is_reference && !ref_id.is_empty() {
1817                            Some(Cow::Owned(ref_id.to_lowercase()))
1818                        } else if is_reference {
1819                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1820                        } else {
1821                            None
1822                        };
1823
1824                        found_positions.insert(start_pos);
1825                        images.push(ParsedImage {
1826                            line: line_num,
1827                            start_col: col_start,
1828                            end_col: col_end,
1829                            byte_offset: start_pos,
1830                            byte_end: range.end,
1831                            alt_text,
1832                            url: Cow::Owned(url.to_string()),
1833                            is_reference,
1834                            reference_id,
1835                            link_type,
1836                        });
1837                    }
1838                }
1839                _ => {}
1840            }
1841        }
1842
1843        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1844        for cap in IMAGE_PATTERN.captures_iter(content) {
1845            let full_match = cap.get(0).unwrap();
1846            let match_start = full_match.start();
1847            let match_end = full_match.end();
1848
1849            // Skip if already found by pulldown-cmark
1850            if found_positions.contains(&match_start) {
1851                continue;
1852            }
1853
1854            // Skip if the ! is escaped
1855            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1856                continue;
1857            }
1858
1859            // Skip if in code block, code span, or HTML comment
1860            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1861                || Self::is_offset_in_code_span(code_spans, match_start)
1862                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1863            {
1864                continue;
1865            }
1866
1867            // Only process reference images (undefined references not found by pulldown-cmark)
1868            if let Some(ref_id) = cap.get(6) {
1869                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1870                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1871                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1872                let ref_id_str = ref_id.as_str();
1873                let normalized_ref = if ref_id_str.is_empty() {
1874                    Cow::Owned(alt_text.to_lowercase())
1875                } else {
1876                    Cow::Owned(ref_id_str.to_lowercase())
1877                };
1878
1879                images.push(ParsedImage {
1880                    line: line_num,
1881                    start_col: col_start,
1882                    end_col: col_end,
1883                    byte_offset: match_start,
1884                    byte_end: match_end,
1885                    alt_text: Cow::Borrowed(alt_text),
1886                    url: Cow::Borrowed(""),
1887                    is_reference: true,
1888                    reference_id: Some(normalized_ref),
1889                    link_type: LinkType::Reference, // Undefined references are reference-style
1890                });
1891            }
1892        }
1893
1894        images
1895    }
1896
1897    /// Parse reference definitions
1898    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1899        // Pre-size based on lines count as reference definitions are line-based
1900        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1901
1902        for (line_idx, line_info) in lines.iter().enumerate() {
1903            // Skip lines in code blocks
1904            if line_info.in_code_block {
1905                continue;
1906            }
1907
1908            let line = line_info.content(content);
1909            let line_num = line_idx + 1;
1910
1911            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1912                let id_raw = cap.get(1).unwrap().as_str();
1913
1914                // Skip footnote definitions - they use [^id]: syntax and are semantically
1915                // different from reference link definitions
1916                if id_raw.starts_with('^') {
1917                    continue;
1918                }
1919
1920                let id = id_raw.to_lowercase();
1921                let url = cap.get(2).unwrap().as_str().to_string();
1922                let title_match = cap.get(3).or_else(|| cap.get(4));
1923                let title = title_match.map(|m| m.as_str().to_string());
1924
1925                // Calculate byte positions
1926                // The match starts at the beginning of the line (0) and extends to the end
1927                let match_obj = cap.get(0).unwrap();
1928                let byte_offset = line_info.byte_offset + match_obj.start();
1929                let byte_end = line_info.byte_offset + match_obj.end();
1930
1931                // Calculate title byte positions (includes the quote character before content)
1932                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1933                    // The match is the content inside quotes, so we include the quote before
1934                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1935                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1936                    (Some(start), Some(end))
1937                } else {
1938                    (None, None)
1939                };
1940
1941                refs.push(ReferenceDef {
1942                    line: line_num,
1943                    id,
1944                    url,
1945                    title,
1946                    byte_offset,
1947                    byte_end,
1948                    title_byte_start,
1949                    title_byte_end,
1950                });
1951            }
1952        }
1953
1954        refs
1955    }
1956
1957    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1958    /// Handles nested blockquotes like `> > > content`
1959    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1960    #[inline]
1961    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1962        let trimmed_start = line.trim_start();
1963        if !trimmed_start.starts_with('>') {
1964            return None;
1965        }
1966
1967        // Track total prefix length to handle nested blockquotes
1968        let mut remaining = line;
1969        let mut total_prefix_len = 0;
1970
1971        loop {
1972            let trimmed = remaining.trim_start();
1973            if !trimmed.starts_with('>') {
1974                break;
1975            }
1976
1977            // Add leading whitespace + '>' to prefix
1978            let leading_ws_len = remaining.len() - trimmed.len();
1979            total_prefix_len += leading_ws_len + 1;
1980
1981            let after_gt = &trimmed[1..];
1982
1983            // Handle optional whitespace after '>' (space or tab)
1984            if let Some(stripped) = after_gt.strip_prefix(' ') {
1985                total_prefix_len += 1;
1986                remaining = stripped;
1987            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1988                total_prefix_len += 1;
1989                remaining = stripped;
1990            } else {
1991                remaining = after_gt;
1992            }
1993        }
1994
1995        Some((&line[..total_prefix_len], remaining))
1996    }
1997
1998    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
1999    ///
2000    /// Returns a HashMap keyed by line byte offset, containing:
2001    /// `(is_ordered, marker, marker_column, content_column, number)`
2002    ///
2003    /// ## Why pulldown-cmark?
2004    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2005    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2006    /// This fixes issue #253 where continuation lines were falsely detected.
2007    ///
2008    /// ## Tab indentation quirk
2009    /// Pulldown-cmark reports nested list items at the newline character position
2010    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2011    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2012    /// We detect this and advance to the correct line.
2013    ///
2014    /// ## HashMap key strategy
2015    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2016    /// that resolve to the same line (after newline adjustment). The first event
2017    /// for each line is authoritative.
2018    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2019    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2020    /// This avoids a separate parse for emphasis detection.
2021    fn detect_list_items_and_emphasis_with_pulldown(
2022        content: &str,
2023        line_offsets: &[usize],
2024        flavor: MarkdownFlavor,
2025        front_matter_end: usize,
2026        code_blocks: &[(usize, usize)],
2027    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2028        use std::collections::HashMap;
2029
2030        let mut list_items = HashMap::new();
2031        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2032
2033        let mut options = Options::empty();
2034        options.insert(Options::ENABLE_TABLES);
2035        options.insert(Options::ENABLE_FOOTNOTES);
2036        options.insert(Options::ENABLE_STRIKETHROUGH);
2037        options.insert(Options::ENABLE_TASKLISTS);
2038        // Always enable GFM features for consistency with existing behavior
2039        options.insert(Options::ENABLE_GFM);
2040
2041        // Suppress unused variable warning
2042        let _ = flavor;
2043
2044        let parser = Parser::new_ext(content, options).into_offset_iter();
2045        let mut list_depth: usize = 0;
2046        let mut list_stack: Vec<bool> = Vec::new();
2047
2048        for (event, range) in parser {
2049            match event {
2050                // Capture emphasis spans (for MD030's emphasis detection)
2051                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2052                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2053                        2
2054                    } else {
2055                        1
2056                    };
2057                    let match_start = range.start;
2058                    let match_end = range.end;
2059
2060                    // Skip if in code block
2061                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2062                        // Determine marker character by looking at the content at the start
2063                        let marker = content[match_start..].chars().next().unwrap_or('*');
2064                        if marker == '*' || marker == '_' {
2065                            // Extract content between markers
2066                            let content_start = match_start + marker_count;
2067                            let content_end = if match_end >= marker_count {
2068                                match_end - marker_count
2069                            } else {
2070                                match_end
2071                            };
2072                            let content_part = if content_start < content_end && content_end <= content.len() {
2073                                &content[content_start..content_end]
2074                            } else {
2075                                ""
2076                            };
2077
2078                            // Find which line this emphasis is on using line_offsets
2079                            let line_idx = match line_offsets.binary_search(&match_start) {
2080                                Ok(idx) => idx,
2081                                Err(idx) => idx.saturating_sub(1),
2082                            };
2083                            let line_num = line_idx + 1;
2084                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2085                            let col_start = match_start - line_start;
2086                            let col_end = match_end - line_start;
2087
2088                            emphasis_spans.push(EmphasisSpan {
2089                                line: line_num,
2090                                start_col: col_start,
2091                                end_col: col_end,
2092                                byte_offset: match_start,
2093                                byte_end: match_end,
2094                                marker,
2095                                marker_count,
2096                                content: content_part.to_string(),
2097                            });
2098                        }
2099                    }
2100                }
2101                Event::Start(Tag::List(start_number)) => {
2102                    list_depth += 1;
2103                    list_stack.push(start_number.is_some());
2104                }
2105                Event::End(TagEnd::List(_)) => {
2106                    list_depth = list_depth.saturating_sub(1);
2107                    list_stack.pop();
2108                }
2109                Event::Start(Tag::Item) if list_depth > 0 => {
2110                    // Get the ordered state for the CURRENT (innermost) list
2111                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2112                    // Find which line this byte offset corresponds to
2113                    let item_start = range.start;
2114
2115                    // Binary search to find the line number
2116                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2117                        Ok(idx) => idx,
2118                        Err(idx) => idx.saturating_sub(1),
2119                    };
2120
2121                    // Pulldown-cmark reports nested list items at the newline before the item
2122                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2123                    // Advance to the actual content line in this case.
2124                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2125                        line_idx += 1;
2126                    }
2127
2128                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2129                    if front_matter_end > 0 && line_idx < front_matter_end {
2130                        continue;
2131                    }
2132
2133                    if line_idx < line_offsets.len() {
2134                        let line_start_byte = line_offsets[line_idx];
2135                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2136                        let line = &content[line_start_byte..line_end.min(content.len())];
2137
2138                        // Strip trailing newline
2139                        let line = line
2140                            .strip_suffix('\n')
2141                            .or_else(|| line.strip_suffix("\r\n"))
2142                            .unwrap_or(line);
2143
2144                        // Strip blockquote prefix if present
2145                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2146                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2147                            (prefix.len(), content)
2148                        } else {
2149                            (0, line)
2150                        };
2151
2152                        // Parse the list marker from the actual line
2153                        if current_list_is_ordered {
2154                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2155                                Self::parse_ordered_list(line_to_parse)
2156                            {
2157                                let marker = format!("{number_str}{delimiter}");
2158                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2159                                let content_column = marker_column + marker.len() + spacing.len();
2160                                let number = number_str.parse().ok();
2161
2162                                list_items.entry(line_start_byte).or_insert((
2163                                    true,
2164                                    marker,
2165                                    marker_column,
2166                                    content_column,
2167                                    number,
2168                                ));
2169                            }
2170                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2171                            Self::parse_unordered_list(line_to_parse)
2172                        {
2173                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2174                            let content_column = marker_column + 1 + spacing.len();
2175
2176                            list_items.entry(line_start_byte).or_insert((
2177                                false,
2178                                marker.to_string(),
2179                                marker_column,
2180                                content_column,
2181                                None,
2182                            ));
2183                        }
2184                    }
2185                }
2186                _ => {}
2187            }
2188        }
2189
2190        (list_items, emphasis_spans)
2191    }
2192
2193    /// Fast unordered list parser - replaces regex for 5-10x speedup
2194    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2195    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2196    #[inline]
2197    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2198        let bytes = line.as_bytes();
2199        let mut i = 0;
2200
2201        // Skip leading whitespace
2202        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2203            i += 1;
2204        }
2205
2206        // Check for marker
2207        if i >= bytes.len() {
2208            return None;
2209        }
2210        let marker = bytes[i] as char;
2211        if marker != '-' && marker != '*' && marker != '+' {
2212            return None;
2213        }
2214        let marker_pos = i;
2215        i += 1;
2216
2217        // Collect spacing after marker (space or tab only)
2218        let spacing_start = i;
2219        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2220            i += 1;
2221        }
2222
2223        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2224    }
2225
2226    /// Fast ordered list parser - replaces regex for 5-10x speedup
2227    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2228    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2229    #[inline]
2230    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2231        let bytes = line.as_bytes();
2232        let mut i = 0;
2233
2234        // Skip leading whitespace
2235        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2236            i += 1;
2237        }
2238
2239        // Collect digits
2240        let number_start = i;
2241        while i < bytes.len() && bytes[i].is_ascii_digit() {
2242            i += 1;
2243        }
2244        if i == number_start {
2245            return None; // No digits found
2246        }
2247
2248        // Check for delimiter
2249        if i >= bytes.len() {
2250            return None;
2251        }
2252        let delimiter = bytes[i] as char;
2253        if delimiter != '.' && delimiter != ')' {
2254            return None;
2255        }
2256        let delimiter_pos = i;
2257        i += 1;
2258
2259        // Collect spacing after delimiter (space or tab only)
2260        let spacing_start = i;
2261        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2262            i += 1;
2263        }
2264
2265        Some((
2266            &line[..number_start],
2267            &line[number_start..delimiter_pos],
2268            delimiter,
2269            &line[spacing_start..i],
2270            &line[i..],
2271        ))
2272    }
2273
2274    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2275    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2276    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2277        let num_lines = line_offsets.len();
2278        let mut in_code_block = vec![false; num_lines];
2279
2280        // For each code block, mark all lines within it
2281        for &(start, end) in code_blocks {
2282            // Ensure we're at valid UTF-8 boundaries
2283            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2284                let mut boundary = start;
2285                while boundary > 0 && !content.is_char_boundary(boundary) {
2286                    boundary -= 1;
2287                }
2288                boundary
2289            } else {
2290                start
2291            };
2292
2293            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2294                let mut boundary = end;
2295                while boundary < content.len() && !content.is_char_boundary(boundary) {
2296                    boundary += 1;
2297                }
2298                boundary
2299            } else {
2300                end.min(content.len())
2301            };
2302
2303            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2304            // That function now has proper list context awareness (see code_block_utils.rs)
2305            // and correctly distinguishes between:
2306            // - Fenced code blocks (``` or ~~~)
2307            // - Indented code blocks at document level (4 spaces + blank line before)
2308            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2309            //
2310            // We no longer need to re-validate here. The original validation logic
2311            // was causing false positives by marking list continuation paragraphs as
2312            // code blocks when they have 4 spaces of indentation.
2313
2314            // Use binary search to find the first and last line indices
2315            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2316            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2317            //
2318            // Find the line that CONTAINS safe_start: the line with the largest
2319            // start offset that is <= safe_start. partition_point gives us the
2320            // first line that starts AFTER safe_start, so we subtract 1.
2321            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2322            let first_line = first_line_after.saturating_sub(1);
2323            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2324
2325            // Mark all lines in the range at once
2326            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2327                *flag = true;
2328            }
2329        }
2330
2331        in_code_block
2332    }
2333
2334    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2335    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2336    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2337        let content_lines: Vec<&str> = content.lines().collect();
2338        let num_lines = content_lines.len();
2339        let mut in_math_block = vec![false; num_lines];
2340
2341        let mut inside_math = false;
2342
2343        for (i, line) in content_lines.iter().enumerate() {
2344            // Skip lines that are in code blocks - math delimiters inside code are literal
2345            if code_block_map.get(i).copied().unwrap_or(false) {
2346                continue;
2347            }
2348
2349            let trimmed = line.trim();
2350
2351            // Check for math block delimiter ($$)
2352            // A line with just $$ toggles the math block state
2353            if trimmed == "$$" {
2354                if inside_math {
2355                    // Closing delimiter - this line is still part of the math block
2356                    in_math_block[i] = true;
2357                    inside_math = false;
2358                } else {
2359                    // Opening delimiter - this line starts the math block
2360                    in_math_block[i] = true;
2361                    inside_math = true;
2362                }
2363            } else if inside_math {
2364                // Content inside math block
2365                in_math_block[i] = true;
2366            }
2367        }
2368
2369        in_math_block
2370    }
2371
2372    /// Pre-compute basic line information (without headings/blockquotes)
2373    /// Also returns emphasis spans detected during the pulldown-cmark parse
2374    fn compute_basic_line_info(
2375        content: &str,
2376        line_offsets: &[usize],
2377        code_blocks: &[(usize, usize)],
2378        flavor: MarkdownFlavor,
2379        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2380        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2381        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2382    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2383        let content_lines: Vec<&str> = content.lines().collect();
2384        let mut lines = Vec::with_capacity(content_lines.len());
2385
2386        // Pre-compute which lines are in code blocks
2387        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2388
2389        // Pre-compute which lines are in math blocks ($$ ... $$)
2390        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2391
2392        // Detect front matter boundaries FIRST, before any other parsing
2393        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2394        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2395
2396        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2397        // (context-aware, eliminates false positives)
2398        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2399            content,
2400            line_offsets,
2401            flavor,
2402            front_matter_end,
2403            code_blocks,
2404        );
2405
2406        for (i, line) in content_lines.iter().enumerate() {
2407            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2408            let indent = line.len() - line.trim_start().len();
2409            // Compute visual indent with proper CommonMark tab expansion
2410            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2411
2412            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2413            let blockquote_parse = Self::parse_blockquote_prefix(line);
2414
2415            // For blank detection, consider blockquote context
2416            let is_blank = if let Some((_, content)) = blockquote_parse {
2417                // In blockquote context, check if content after prefix is blank
2418                content.trim().is_empty()
2419            } else {
2420                line.trim().is_empty()
2421            };
2422
2423            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2424            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2425
2426            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2427            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2428                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2429            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2430            // This ensures content after `-->` on the same line is not incorrectly skipped
2431            let line_end_offset = byte_offset + line.len();
2432            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2433                html_comment_ranges,
2434                byte_offset,
2435                line_end_offset,
2436            );
2437            // Use pulldown-cmark's list detection for context-aware parsing
2438            // This eliminates false positives on continuation lines (issue #253)
2439            let list_item =
2440                list_item_map
2441                    .get(&byte_offset)
2442                    .map(
2443                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2444                            marker: marker.clone(),
2445                            is_ordered: *is_ordered,
2446                            number: *number,
2447                            marker_column: *marker_column,
2448                            content_column: *content_column,
2449                        },
2450                    );
2451
2452            // Detect horizontal rules (only outside code blocks and frontmatter)
2453            // Uses CommonMark-compliant check including leading indentation validation
2454            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2455            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2456
2457            // Get math block status for this line
2458            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2459
2460            // Check if line is inside a Quarto div block
2461            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2462                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2463
2464            lines.push(LineInfo {
2465                byte_offset,
2466                byte_len: line.len(),
2467                indent,
2468                visual_indent,
2469                is_blank,
2470                in_code_block,
2471                in_front_matter,
2472                in_html_block: false, // Will be populated after line creation
2473                in_html_comment,
2474                list_item,
2475                heading: None,    // Will be populated in second pass for Setext headings
2476                blockquote: None, // Will be populated after line creation
2477                in_mkdocstrings,
2478                in_esm_block: false, // Will be populated after line creation for MDX files
2479                in_code_span_continuation: false, // Will be populated after code spans are parsed
2480                is_horizontal_rule: is_hr,
2481                in_math_block,
2482                in_quarto_div,
2483                in_jsx_expression: false,  // Will be populated for MDX files
2484                in_mdx_comment: false,     // Will be populated for MDX files
2485                in_jsx_component: false,   // Will be populated for MDX files
2486                in_jsx_fragment: false,    // Will be populated for MDX files
2487                in_admonition: false,      // Will be populated for MkDocs files
2488                in_content_tab: false,     // Will be populated for MkDocs files
2489                in_definition_list: false, // Will be populated for MkDocs files
2490            });
2491        }
2492
2493        (lines, emphasis_spans)
2494    }
2495
2496    /// Detect headings and blockquotes (called after HTML block detection)
2497    fn detect_headings_and_blockquotes(
2498        content: &str,
2499        lines: &mut [LineInfo],
2500        flavor: MarkdownFlavor,
2501        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2502        link_byte_ranges: &[(usize, usize)],
2503    ) {
2504        // Regex for heading detection
2505        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2506            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2507        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2508            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2509
2510        let content_lines: Vec<&str> = content.lines().collect();
2511
2512        // Detect front matter boundaries to skip those lines
2513        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2514
2515        // Detect headings (including Setext which needs look-ahead) and blockquotes
2516        for i in 0..lines.len() {
2517            let line = content_lines[i];
2518
2519            // Detect blockquotes FIRST, before any skip conditions.
2520            // A line can be both a blockquote AND contain a code block inside it.
2521            // We need to know about the blockquote marker regardless of code block status.
2522            // Skip only frontmatter lines - those are never blockquotes.
2523            if !(front_matter_end > 0 && i < front_matter_end)
2524                && let Some(bq) = parse_blockquote_detailed(line)
2525            {
2526                let nesting_level = bq.markers.len();
2527                let marker_column = bq.indent.len();
2528                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2529                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2530                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2531                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2532
2533                lines[i].blockquote = Some(BlockquoteInfo {
2534                    nesting_level,
2535                    indent: bq.indent.to_string(),
2536                    marker_column,
2537                    prefix,
2538                    content: bq.content.to_string(),
2539                    has_no_space_after_marker: has_no_space,
2540                    has_multiple_spaces_after_marker: has_multiple_spaces,
2541                    needs_md028_fix,
2542                });
2543
2544                // Update is_horizontal_rule for blockquote content
2545                // The original detection doesn't strip blockquote prefix, so we need to check here
2546                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2547                    lines[i].is_horizontal_rule = true;
2548                }
2549            }
2550
2551            // Now apply skip conditions for heading detection
2552            if lines[i].in_code_block {
2553                continue;
2554            }
2555
2556            // Skip lines in front matter
2557            if front_matter_end > 0 && i < front_matter_end {
2558                continue;
2559            }
2560
2561            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2562            if lines[i].in_html_block {
2563                continue;
2564            }
2565
2566            // Skip heading detection for blank lines
2567            if lines[i].is_blank {
2568                continue;
2569            }
2570
2571            // Check for ATX headings (but skip MkDocs snippet lines)
2572            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2573            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2574                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2575                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2576            } else {
2577                false
2578            };
2579
2580            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2581                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2582                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2583                    continue;
2584                }
2585                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2586                // This prevents false positives where `#fragment` is detected as a heading
2587                let line_offset = lines[i].byte_offset;
2588                if link_byte_ranges
2589                    .iter()
2590                    .any(|&(start, end)| line_offset > start && line_offset < end)
2591                {
2592                    continue;
2593                }
2594                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2595                let hashes = caps.get(2).map_or("", |m| m.as_str());
2596                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2597                let rest = caps.get(4).map_or("", |m| m.as_str());
2598
2599                let level = hashes.len() as u8;
2600                let marker_column = leading_spaces.len();
2601
2602                // Check for closing sequence, but handle custom IDs that might come after
2603                let (text, has_closing, closing_seq) = {
2604                    // First check if there's a custom ID at the end
2605                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2606                        // Check if this looks like a valid custom ID (ends with })
2607                        if rest[id_start..].trim_end().ends_with('}') {
2608                            // Split off the custom ID
2609                            (&rest[..id_start], &rest[id_start..])
2610                        } else {
2611                            (rest, "")
2612                        }
2613                    } else {
2614                        (rest, "")
2615                    };
2616
2617                    // Now look for closing hashes in the part before the custom ID
2618                    let trimmed_rest = rest_without_id.trim_end();
2619                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2620                        // Find the start of the hash sequence by walking backwards
2621                        // Use char_indices to get byte positions at char boundaries
2622                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2623
2624                        // Find which char index corresponds to last_hash_byte_pos
2625                        let last_hash_char_idx = char_positions
2626                            .iter()
2627                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2628
2629                        if let Some(mut char_idx) = last_hash_char_idx {
2630                            // Walk backwards to find start of hash sequence
2631                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2632                                char_idx -= 1;
2633                            }
2634
2635                            // Get the byte position of the start of hashes
2636                            let start_of_hashes = char_positions[char_idx].0;
2637
2638                            // Check if there's at least one space before the closing hashes
2639                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2640
2641                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2642                            let potential_closing = &trimmed_rest[start_of_hashes..];
2643                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2644
2645                            if is_all_hashes && has_space_before {
2646                                // This is a closing sequence
2647                                let closing_hashes = potential_closing.to_string();
2648                                // The text is everything before the closing hashes
2649                                // Don't include the custom ID here - it will be extracted later
2650                                let text_part = if !custom_id_part.is_empty() {
2651                                    // If we have a custom ID, append it back to get the full rest
2652                                    // This allows the extract_header_id function to handle it properly
2653                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2654                                } else {
2655                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2656                                };
2657                                (text_part, true, closing_hashes)
2658                            } else {
2659                                // Not a valid closing sequence, return the full content
2660                                (rest.to_string(), false, String::new())
2661                            }
2662                        } else {
2663                            // Couldn't find char boundary, return the full content
2664                            (rest.to_string(), false, String::new())
2665                        }
2666                    } else {
2667                        // No hashes found, return the full content
2668                        (rest.to_string(), false, String::new())
2669                    }
2670                };
2671
2672                let content_column = marker_column + hashes.len() + spaces_after.len();
2673
2674                // Extract custom header ID if present
2675                let raw_text = text.trim().to_string();
2676                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2677
2678                // If no custom ID was found on the header line, check the next line for standalone attr-list
2679                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2680                    let next_line = content_lines[i + 1];
2681                    if !lines[i + 1].in_code_block
2682                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2683                        && let Some(next_line_id) =
2684                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2685                    {
2686                        custom_id = Some(next_line_id);
2687                    }
2688                }
2689
2690                // ATX heading is "valid" for processing by heading rules if:
2691                // 1. Has space after # (CommonMark compliant): `# Heading`
2692                // 2. Is empty (just hashes): `#`
2693                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2694                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2695                //
2696                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2697                // - `#tag` - single # with lowercase (social hashtag)
2698                // - `#123` - single # with number (GitHub issue ref)
2699                let is_valid = !spaces_after.is_empty()
2700                    || rest.is_empty()
2701                    || level > 1
2702                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2703
2704                lines[i].heading = Some(HeadingInfo {
2705                    level,
2706                    style: HeadingStyle::ATX,
2707                    marker: hashes.to_string(),
2708                    marker_column,
2709                    content_column,
2710                    text: clean_text,
2711                    custom_id,
2712                    raw_text,
2713                    has_closing_sequence: has_closing,
2714                    closing_sequence: closing_seq,
2715                    is_valid,
2716                });
2717            }
2718            // Check for Setext headings (need to look at next line)
2719            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2720                let next_line = content_lines[i + 1];
2721                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2722                    // Skip if next line is front matter delimiter
2723                    if front_matter_end > 0 && i < front_matter_end {
2724                        continue;
2725                    }
2726
2727                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2728                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2729                    {
2730                        continue;
2731                    }
2732
2733                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2734                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2735                    let content_line = line.trim();
2736
2737                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2738                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2739                        continue;
2740                    }
2741
2742                    // Skip underscore thematic breaks (___)
2743                    if content_line.starts_with('_') {
2744                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2745                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2746                            continue;
2747                        }
2748                    }
2749
2750                    // Skip numbered lists (1. Item, 2. Item, etc.)
2751                    if let Some(first_char) = content_line.chars().next()
2752                        && first_char.is_ascii_digit()
2753                    {
2754                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2755                        if num_end < content_line.len() {
2756                            let next = content_line.chars().nth(num_end);
2757                            if next == Some('.') || next == Some(')') {
2758                                continue;
2759                            }
2760                        }
2761                    }
2762
2763                    // Skip ATX headings
2764                    if ATX_HEADING_REGEX.is_match(line) {
2765                        continue;
2766                    }
2767
2768                    // Skip blockquotes
2769                    if content_line.starts_with('>') {
2770                        continue;
2771                    }
2772
2773                    // Skip code fences
2774                    let trimmed_start = line.trim_start();
2775                    if trimmed_start.len() >= 3 {
2776                        let first_three: String = trimmed_start.chars().take(3).collect();
2777                        if first_three == "```" || first_three == "~~~" {
2778                            continue;
2779                        }
2780                    }
2781
2782                    // Skip HTML blocks
2783                    if content_line.starts_with('<') {
2784                        continue;
2785                    }
2786
2787                    let underline = next_line.trim();
2788
2789                    let level = if underline.starts_with('=') { 1 } else { 2 };
2790                    let style = if level == 1 {
2791                        HeadingStyle::Setext1
2792                    } else {
2793                        HeadingStyle::Setext2
2794                    };
2795
2796                    // Extract custom header ID if present
2797                    let raw_text = line.trim().to_string();
2798                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2799
2800                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2801                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2802                        let attr_line = content_lines[i + 2];
2803                        if !lines[i + 2].in_code_block
2804                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2805                            && let Some(attr_line_id) =
2806                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2807                        {
2808                            custom_id = Some(attr_line_id);
2809                        }
2810                    }
2811
2812                    lines[i].heading = Some(HeadingInfo {
2813                        level,
2814                        style,
2815                        marker: underline.to_string(),
2816                        marker_column: next_line.len() - next_line.trim_start().len(),
2817                        content_column: lines[i].indent,
2818                        text: clean_text,
2819                        custom_id,
2820                        raw_text,
2821                        has_closing_sequence: false,
2822                        closing_sequence: String::new(),
2823                        is_valid: true, // Setext headings are always valid
2824                    });
2825                }
2826            }
2827        }
2828    }
2829
2830    /// Detect HTML blocks in the content
2831    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2832        // HTML block elements that trigger block context
2833        // Includes HTML5 media, embedded content, and interactive elements
2834        const BLOCK_ELEMENTS: &[&str] = &[
2835            "address",
2836            "article",
2837            "aside",
2838            "audio",
2839            "blockquote",
2840            "canvas",
2841            "details",
2842            "dialog",
2843            "dd",
2844            "div",
2845            "dl",
2846            "dt",
2847            "embed",
2848            "fieldset",
2849            "figcaption",
2850            "figure",
2851            "footer",
2852            "form",
2853            "h1",
2854            "h2",
2855            "h3",
2856            "h4",
2857            "h5",
2858            "h6",
2859            "header",
2860            "hr",
2861            "iframe",
2862            "li",
2863            "main",
2864            "menu",
2865            "nav",
2866            "noscript",
2867            "object",
2868            "ol",
2869            "p",
2870            "picture",
2871            "pre",
2872            "script",
2873            "search",
2874            "section",
2875            "source",
2876            "style",
2877            "summary",
2878            "svg",
2879            "table",
2880            "tbody",
2881            "td",
2882            "template",
2883            "textarea",
2884            "tfoot",
2885            "th",
2886            "thead",
2887            "tr",
2888            "track",
2889            "ul",
2890            "video",
2891        ];
2892
2893        let mut i = 0;
2894        while i < lines.len() {
2895            // Skip if already in code block or front matter
2896            if lines[i].in_code_block || lines[i].in_front_matter {
2897                i += 1;
2898                continue;
2899            }
2900
2901            let trimmed = lines[i].content(content).trim_start();
2902
2903            // Check if line starts with an HTML tag
2904            if trimmed.starts_with('<') && trimmed.len() > 1 {
2905                // Extract tag name safely
2906                let after_bracket = &trimmed[1..];
2907                let is_closing = after_bracket.starts_with('/');
2908                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2909
2910                // Extract tag name (stop at space, >, /, or end of string)
2911                let tag_name = tag_start
2912                    .chars()
2913                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2914                    .collect::<String>()
2915                    .to_lowercase();
2916
2917                // Check if it's a block element
2918                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2919                    // Mark this line as in HTML block
2920                    lines[i].in_html_block = true;
2921
2922                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2923                    // This avoids complex nesting logic that might cause infinite loops
2924                    if !is_closing {
2925                        let closing_tag = format!("</{tag_name}>");
2926                        // style and script tags can contain blank lines (CSS/JS formatting)
2927                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2928                        let mut j = i + 1;
2929                        let mut found_closing_tag = false;
2930                        while j < lines.len() && j < i + 100 {
2931                            // Limit search to 100 lines
2932                            // Stop at blank lines (except for style/script tags)
2933                            if !allow_blank_lines && lines[j].is_blank {
2934                                break;
2935                            }
2936
2937                            lines[j].in_html_block = true;
2938
2939                            // Check if this line contains the closing tag
2940                            if lines[j].content(content).contains(&closing_tag) {
2941                                found_closing_tag = true;
2942                            }
2943
2944                            // After finding closing tag, continue marking lines as
2945                            // in_html_block until blank line (per CommonMark spec)
2946                            if found_closing_tag {
2947                                j += 1;
2948                                // Continue marking subsequent lines until blank
2949                                while j < lines.len() && j < i + 100 {
2950                                    if lines[j].is_blank {
2951                                        break;
2952                                    }
2953                                    lines[j].in_html_block = true;
2954                                    j += 1;
2955                                }
2956                                break;
2957                            }
2958                            j += 1;
2959                        }
2960                    }
2961                }
2962            }
2963
2964            i += 1;
2965        }
2966    }
2967
2968    /// Detect ESM import/export blocks anywhere in MDX files
2969    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
2970    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2971        // Only process MDX files
2972        if !flavor.supports_esm_blocks() {
2973            return;
2974        }
2975
2976        let mut in_multiline_import = false;
2977
2978        for line in lines.iter_mut() {
2979            // Skip code blocks, front matter, and HTML comments
2980            if line.in_code_block || line.in_front_matter || line.in_html_comment {
2981                in_multiline_import = false;
2982                continue;
2983            }
2984
2985            let line_content = line.content(content);
2986            let trimmed = line_content.trim();
2987
2988            // Handle continuation of multi-line import/export
2989            if in_multiline_import {
2990                line.in_esm_block = true;
2991                // Check if this line completes the statement
2992                // Multi-line import ends when we see the closing quote + optional semicolon
2993                if trimmed.ends_with('\'')
2994                    || trimmed.ends_with('"')
2995                    || trimmed.ends_with("';")
2996                    || trimmed.ends_with("\";")
2997                    || line_content.contains(';')
2998                {
2999                    in_multiline_import = false;
3000                }
3001                continue;
3002            }
3003
3004            // Skip blank lines
3005            if line.is_blank {
3006                continue;
3007            }
3008
3009            // Check if line starts with import or export
3010            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3011                line.in_esm_block = true;
3012
3013                // Determine if this is a complete single-line statement or starts a multi-line one
3014                // Multi-line imports look like:
3015                //   import {
3016                //     Foo,
3017                //     Bar
3018                //   } from 'module'
3019                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3020                let is_import = trimmed.starts_with("import ");
3021
3022                // Check for simple complete statements
3023                let is_complete =
3024                    // Ends with semicolon
3025                    trimmed.ends_with(';')
3026                    // import/export with from clause that ends with quote
3027                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3028                    // Simple export (export const/let/var/function/class without from)
3029                    || (!is_import && !trimmed.contains(" from ") && (
3030                        trimmed.starts_with("export const ")
3031                        || trimmed.starts_with("export let ")
3032                        || trimmed.starts_with("export var ")
3033                        || trimmed.starts_with("export function ")
3034                        || trimmed.starts_with("export class ")
3035                        || trimmed.starts_with("export default ")
3036                    ));
3037
3038                if !is_complete && is_import {
3039                    // Only imports can span multiple lines in the typical case
3040                    // Check if it looks like the start of a multi-line import
3041                    // e.g., "import {" or "import type {"
3042                    if trimmed.contains('{') && !trimmed.contains('}') {
3043                        in_multiline_import = true;
3044                    }
3045                }
3046            }
3047        }
3048    }
3049
3050    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3051    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3052    fn detect_jsx_and_mdx_comments(
3053        content: &str,
3054        lines: &mut [LineInfo],
3055        flavor: MarkdownFlavor,
3056        code_blocks: &[(usize, usize)],
3057    ) -> (ByteRanges, ByteRanges) {
3058        // Only process MDX files
3059        if !flavor.supports_jsx() {
3060            return (Vec::new(), Vec::new());
3061        }
3062
3063        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3064        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3065
3066        // Quick check - if no braces, no JSX expressions or MDX comments
3067        if !content.contains('{') {
3068            return (jsx_expression_ranges, mdx_comment_ranges);
3069        }
3070
3071        let bytes = content.as_bytes();
3072        let mut i = 0;
3073
3074        while i < bytes.len() {
3075            if bytes[i] == b'{' {
3076                // Check if we're in a code block
3077                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3078                    i += 1;
3079                    continue;
3080                }
3081
3082                let start = i;
3083
3084                // Check if it's an MDX comment: {/* ... */}
3085                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3086                    // Find the closing */}
3087                    let mut j = i + 3;
3088                    while j + 2 < bytes.len() {
3089                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3090                            let end = j + 3;
3091                            mdx_comment_ranges.push((start, end));
3092
3093                            // Mark lines as in MDX comment
3094                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3095                                line.in_mdx_comment = true;
3096                            });
3097
3098                            i = end;
3099                            break;
3100                        }
3101                        j += 1;
3102                    }
3103                    if j + 2 >= bytes.len() {
3104                        // Unclosed MDX comment - mark rest as comment
3105                        mdx_comment_ranges.push((start, bytes.len()));
3106                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3107                            line.in_mdx_comment = true;
3108                        });
3109                        break;
3110                    }
3111                } else {
3112                    // Regular JSX expression: { ... }
3113                    // Need to handle nested braces
3114                    let mut brace_depth = 1;
3115                    let mut j = i + 1;
3116                    let mut in_string = false;
3117                    let mut string_char = b'"';
3118
3119                    while j < bytes.len() && brace_depth > 0 {
3120                        let c = bytes[j];
3121
3122                        // Handle strings to avoid counting braces inside them
3123                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3124                            in_string = true;
3125                            string_char = c;
3126                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3127                            in_string = false;
3128                        } else if !in_string {
3129                            if c == b'{' {
3130                                brace_depth += 1;
3131                            } else if c == b'}' {
3132                                brace_depth -= 1;
3133                            }
3134                        }
3135                        j += 1;
3136                    }
3137
3138                    if brace_depth == 0 {
3139                        let end = j;
3140                        jsx_expression_ranges.push((start, end));
3141
3142                        // Mark lines as in JSX expression
3143                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3144                            line.in_jsx_expression = true;
3145                        });
3146
3147                        i = end;
3148                    } else {
3149                        i += 1;
3150                    }
3151                }
3152            } else {
3153                i += 1;
3154            }
3155        }
3156
3157        (jsx_expression_ranges, mdx_comment_ranges)
3158    }
3159
3160    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3161    /// and populate the corresponding fields in LineInfo
3162    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3163        if flavor != MarkdownFlavor::MkDocs {
3164            return;
3165        }
3166
3167        use crate::utils::mkdocs_admonitions;
3168        use crate::utils::mkdocs_definition_lists;
3169        use crate::utils::mkdocs_tabs;
3170
3171        let content_lines: Vec<&str> = content.lines().collect();
3172
3173        // Track admonition context
3174        let mut in_admonition = false;
3175        let mut admonition_indent = 0;
3176
3177        // Track tab context
3178        let mut in_tab = false;
3179        let mut tab_indent = 0;
3180
3181        // Track definition list context
3182        let mut in_definition = false;
3183
3184        for (i, line) in content_lines.iter().enumerate() {
3185            if i >= lines.len() {
3186                break;
3187            }
3188
3189            // Skip lines in code blocks
3190            if lines[i].in_code_block {
3191                continue;
3192            }
3193
3194            // Check for admonition markers
3195            if mkdocs_admonitions::is_admonition_start(line) {
3196                in_admonition = true;
3197                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3198                lines[i].in_admonition = true;
3199            } else if in_admonition {
3200                // Check if still in admonition content
3201                if line.trim().is_empty() {
3202                    // Blank lines are part of admonitions
3203                    lines[i].in_admonition = true;
3204                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3205                    lines[i].in_admonition = true;
3206                } else {
3207                    // End of admonition
3208                    in_admonition = false;
3209                    // Check if this line starts a new admonition
3210                    if mkdocs_admonitions::is_admonition_start(line) {
3211                        in_admonition = true;
3212                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3213                        lines[i].in_admonition = true;
3214                    }
3215                }
3216            }
3217
3218            // Check for tab markers
3219            if mkdocs_tabs::is_tab_marker(line) {
3220                in_tab = true;
3221                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3222                lines[i].in_content_tab = true;
3223            } else if in_tab {
3224                // Check if still in tab content
3225                if line.trim().is_empty() {
3226                    // Blank lines are part of tabs
3227                    lines[i].in_content_tab = true;
3228                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3229                    lines[i].in_content_tab = true;
3230                } else {
3231                    // End of tab content
3232                    in_tab = false;
3233                    // Check if this line starts a new tab
3234                    if mkdocs_tabs::is_tab_marker(line) {
3235                        in_tab = true;
3236                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3237                        lines[i].in_content_tab = true;
3238                    }
3239                }
3240            }
3241
3242            // Check for definition list items
3243            if mkdocs_definition_lists::is_definition_line(line) {
3244                in_definition = true;
3245                lines[i].in_definition_list = true;
3246            } else if in_definition {
3247                // Check if continuation
3248                if mkdocs_definition_lists::is_definition_continuation(line) {
3249                    lines[i].in_definition_list = true;
3250                } else if line.trim().is_empty() {
3251                    // Blank line might continue definition
3252                    lines[i].in_definition_list = true;
3253                } else if mkdocs_definition_lists::could_be_term_line(line) {
3254                    // This could be a new term - check if followed by definition
3255                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3256                    {
3257                        lines[i].in_definition_list = true;
3258                    } else {
3259                        in_definition = false;
3260                    }
3261                } else {
3262                    in_definition = false;
3263                }
3264            } else if mkdocs_definition_lists::could_be_term_line(line) {
3265                // Check if this is a term followed by a definition
3266                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3267                    lines[i].in_definition_list = true;
3268                    in_definition = true;
3269                }
3270            }
3271        }
3272    }
3273
3274    /// Helper to mark lines within a byte range
3275    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3276    where
3277        F: FnMut(&mut LineInfo),
3278    {
3279        // Find lines that overlap with the range
3280        for line in lines.iter_mut() {
3281            let line_start = line.byte_offset;
3282            let line_end = line.byte_offset + line.byte_len;
3283
3284            // Check if this line overlaps with the range
3285            if line_start < end && line_end > start {
3286                f(line);
3287            }
3288        }
3289
3290        // Silence unused warning for content (needed for signature consistency)
3291        let _ = content;
3292    }
3293
3294    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3295    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3296        let mut code_spans = Vec::new();
3297
3298        // Quick check - if no backticks, no code spans
3299        if !content.contains('`') {
3300            return code_spans;
3301        }
3302
3303        // Use pulldown-cmark's streaming parser with byte offsets
3304        let parser = Parser::new(content).into_offset_iter();
3305
3306        for (event, range) in parser {
3307            if let Event::Code(_) = event {
3308                let start_pos = range.start;
3309                let end_pos = range.end;
3310
3311                // The range includes the backticks, extract the actual content
3312                let full_span = &content[start_pos..end_pos];
3313                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3314
3315                // Extract content between backticks, preserving spaces
3316                let content_start = start_pos + backtick_count;
3317                let content_end = end_pos - backtick_count;
3318                let span_content = if content_start < content_end {
3319                    content[content_start..content_end].to_string()
3320                } else {
3321                    String::new()
3322                };
3323
3324                // Use binary search to find line number - O(log n) instead of O(n)
3325                // Find the rightmost line whose byte_offset <= start_pos
3326                let line_idx = lines
3327                    .partition_point(|line| line.byte_offset <= start_pos)
3328                    .saturating_sub(1);
3329                let line_num = line_idx + 1;
3330                let byte_col_start = start_pos - lines[line_idx].byte_offset;
3331
3332                // Find end column using binary search
3333                let end_line_idx = lines
3334                    .partition_point(|line| line.byte_offset <= end_pos)
3335                    .saturating_sub(1);
3336                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3337
3338                // Convert byte offsets to character positions for correct Unicode handling
3339                // This ensures consistency with warning.column which uses character positions
3340                let line_content = lines[line_idx].content(content);
3341                let col_start = if byte_col_start <= line_content.len() {
3342                    line_content[..byte_col_start].chars().count()
3343                } else {
3344                    line_content.chars().count()
3345                };
3346
3347                let end_line_content = lines[end_line_idx].content(content);
3348                let col_end = if byte_col_end <= end_line_content.len() {
3349                    end_line_content[..byte_col_end].chars().count()
3350                } else {
3351                    end_line_content.chars().count()
3352                };
3353
3354                code_spans.push(CodeSpan {
3355                    line: line_num,
3356                    end_line: end_line_idx + 1,
3357                    start_col: col_start,
3358                    end_col: col_end,
3359                    byte_offset: start_pos,
3360                    byte_end: end_pos,
3361                    backtick_count,
3362                    content: span_content,
3363                });
3364            }
3365        }
3366
3367        // Sort by position to ensure consistent ordering
3368        code_spans.sort_by_key(|span| span.byte_offset);
3369
3370        code_spans
3371    }
3372
3373    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3374    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3375        let mut math_spans = Vec::new();
3376
3377        // Quick check - if no $ signs, no math spans
3378        if !content.contains('$') {
3379            return math_spans;
3380        }
3381
3382        // Use pulldown-cmark with ENABLE_MATH option
3383        let mut options = Options::empty();
3384        options.insert(Options::ENABLE_MATH);
3385        let parser = Parser::new_ext(content, options).into_offset_iter();
3386
3387        for (event, range) in parser {
3388            let (is_display, math_content) = match &event {
3389                Event::InlineMath(text) => (false, text.as_ref()),
3390                Event::DisplayMath(text) => (true, text.as_ref()),
3391                _ => continue,
3392            };
3393
3394            let start_pos = range.start;
3395            let end_pos = range.end;
3396
3397            // Use binary search to find line number - O(log n) instead of O(n)
3398            let line_idx = lines
3399                .partition_point(|line| line.byte_offset <= start_pos)
3400                .saturating_sub(1);
3401            let line_num = line_idx + 1;
3402            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3403
3404            // Find end column using binary search
3405            let end_line_idx = lines
3406                .partition_point(|line| line.byte_offset <= end_pos)
3407                .saturating_sub(1);
3408            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3409
3410            // Convert byte offsets to character positions for correct Unicode handling
3411            let line_content = lines[line_idx].content(content);
3412            let col_start = if byte_col_start <= line_content.len() {
3413                line_content[..byte_col_start].chars().count()
3414            } else {
3415                line_content.chars().count()
3416            };
3417
3418            let end_line_content = lines[end_line_idx].content(content);
3419            let col_end = if byte_col_end <= end_line_content.len() {
3420                end_line_content[..byte_col_end].chars().count()
3421            } else {
3422                end_line_content.chars().count()
3423            };
3424
3425            math_spans.push(MathSpan {
3426                line: line_num,
3427                end_line: end_line_idx + 1,
3428                start_col: col_start,
3429                end_col: col_end,
3430                byte_offset: start_pos,
3431                byte_end: end_pos,
3432                is_display,
3433                content: math_content.to_string(),
3434            });
3435        }
3436
3437        // Sort by position to ensure consistent ordering
3438        math_spans.sort_by_key(|span| span.byte_offset);
3439
3440        math_spans
3441    }
3442
3443    /// Parse all list blocks in the content (legacy line-by-line approach)
3444    ///
3445    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3446    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3447    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3448    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3449    ///   treated as list continuation (based on the list marker width)
3450    ///
3451    /// When a new list item is encountered, we check if list-breaking content was seen
3452    /// since the last item. If so, we start a new list block.
3453    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3454        // Minimum indentation for unordered list continuation per CommonMark spec
3455        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3456
3457        /// Initialize or reset the forward-scanning tracking state.
3458        /// This helper eliminates code duplication across three initialization sites.
3459        #[inline]
3460        fn reset_tracking_state(
3461            list_item: &ListItemInfo,
3462            has_list_breaking_content: &mut bool,
3463            min_continuation: &mut usize,
3464        ) {
3465            *has_list_breaking_content = false;
3466            let marker_width = if list_item.is_ordered {
3467                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3468            } else {
3469                list_item.marker.len()
3470            };
3471            *min_continuation = if list_item.is_ordered {
3472                marker_width
3473            } else {
3474                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3475            };
3476        }
3477
3478        // Pre-size based on lines that could be list items
3479        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3480        let mut current_block: Option<ListBlock> = None;
3481        let mut last_list_item_line = 0;
3482        let mut current_indent_level = 0;
3483        let mut last_marker_width = 0;
3484
3485        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3486        let mut has_list_breaking_content_since_last_item = false;
3487        let mut min_continuation_for_tracking = 0;
3488
3489        for (line_idx, line_info) in lines.iter().enumerate() {
3490            let line_num = line_idx + 1;
3491
3492            // Enhanced code block handling using Design #3's context analysis
3493            if line_info.in_code_block {
3494                if let Some(ref mut block) = current_block {
3495                    // Calculate minimum indentation for list continuation
3496                    let min_continuation_indent =
3497                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3498
3499                    // Analyze code block context using the three-tier classification
3500                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3501
3502                    match context {
3503                        CodeBlockContext::Indented => {
3504                            // Code block is properly indented - continues the list
3505                            block.end_line = line_num;
3506                            continue;
3507                        }
3508                        CodeBlockContext::Standalone => {
3509                            // Code block separates lists - end current block
3510                            let completed_block = current_block.take().unwrap();
3511                            list_blocks.push(completed_block);
3512                            continue;
3513                        }
3514                        CodeBlockContext::Adjacent => {
3515                            // Edge case - use conservative behavior (continue list)
3516                            block.end_line = line_num;
3517                            continue;
3518                        }
3519                    }
3520                } else {
3521                    // No current list block - skip code block lines
3522                    continue;
3523                }
3524            }
3525
3526            // Extract blockquote prefix if any
3527            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3528                caps.get(0).unwrap().as_str().to_string()
3529            } else {
3530                String::new()
3531            };
3532
3533            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3534            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3535            if let Some(ref block) = current_block
3536                && line_info.list_item.is_none()
3537                && !line_info.is_blank
3538                && !line_info.in_code_span_continuation
3539            {
3540                let line_content = line_info.content(content).trim();
3541
3542                // Check for structural separators that break lists
3543                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3544                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3545                // as they indicate improper indentation rather than lazy continuation.
3546                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3547
3548                // Check if blockquote context changes (different prefix than current block)
3549                // Lines within the SAME blockquote context don't break lists
3550                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3551
3552                let breaks_list = line_info.heading.is_some()
3553                    || line_content.starts_with("---")
3554                    || line_content.starts_with("***")
3555                    || line_content.starts_with("___")
3556                    || crate::utils::skip_context::is_table_line(line_content)
3557                    || blockquote_prefix_changes
3558                    || (line_info.indent > 0
3559                        && line_info.indent < min_continuation_for_tracking
3560                        && !is_lazy_continuation);
3561
3562                if breaks_list {
3563                    has_list_breaking_content_since_last_item = true;
3564                }
3565            }
3566
3567            // If this line is a code span continuation within an active list block,
3568            // extend the block's end_line to include this line (maintains list continuity)
3569            if line_info.in_code_span_continuation
3570                && line_info.list_item.is_none()
3571                && let Some(ref mut block) = current_block
3572            {
3573                block.end_line = line_num;
3574            }
3575
3576            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3577            // properly indented lines within the list). This ensures the workaround at line 2448
3578            // works correctly when there are multiple continuation lines before a nested list item.
3579            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3580            // For blockquote lines, compute effective indent after stripping the prefix
3581            let effective_continuation_indent = if let Some(ref block) = current_block {
3582                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3583                let line_content = line_info.content(content);
3584                let line_bq_level = line_content
3585                    .chars()
3586                    .take_while(|c| *c == '>' || c.is_whitespace())
3587                    .filter(|&c| c == '>')
3588                    .count();
3589                if line_bq_level > 0 && line_bq_level == block_bq_level {
3590                    // Compute indent after blockquote markers
3591                    let mut pos = 0;
3592                    let mut found_markers = 0;
3593                    for c in line_content.chars() {
3594                        pos += c.len_utf8();
3595                        if c == '>' {
3596                            found_markers += 1;
3597                            if found_markers == line_bq_level {
3598                                if line_content.get(pos..pos + 1) == Some(" ") {
3599                                    pos += 1;
3600                                }
3601                                break;
3602                            }
3603                        }
3604                    }
3605                    let after_bq = &line_content[pos..];
3606                    after_bq.len() - after_bq.trim_start().len()
3607                } else {
3608                    line_info.indent
3609                }
3610            } else {
3611                line_info.indent
3612            };
3613            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3614                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3615                if block_bq_level > 0 {
3616                    if block.is_ordered { last_marker_width } else { 2 }
3617                } else {
3618                    min_continuation_for_tracking
3619                }
3620            } else {
3621                min_continuation_for_tracking
3622            };
3623            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3624                || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
3625
3626            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3627                eprintln!(
3628                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3629                    line_num,
3630                    effective_continuation_indent,
3631                    adjusted_min_continuation_for_tracking,
3632                    is_valid_continuation,
3633                    line_info.in_code_span_continuation,
3634                    line_info.in_code_block,
3635                    current_block.is_some()
3636                );
3637            }
3638
3639            if !line_info.in_code_span_continuation
3640                && line_info.list_item.is_none()
3641                && !line_info.is_blank
3642                && !line_info.in_code_block
3643                && is_valid_continuation
3644                && let Some(ref mut block) = current_block
3645            {
3646                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3647                    eprintln!(
3648                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3649                        line_num, block.end_line, line_num
3650                    );
3651                }
3652                block.end_line = line_num;
3653            }
3654
3655            // Check if this line is a list item
3656            if let Some(list_item) = &line_info.list_item {
3657                // Calculate nesting level based on indentation
3658                let item_indent = list_item.marker_column;
3659                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3660
3661                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3662                    eprintln!(
3663                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3664                        line_num, list_item.marker, item_indent
3665                    );
3666                }
3667
3668                if let Some(ref mut block) = current_block {
3669                    // Check if this continues the current block
3670                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3671                    // or a continuation at the same or lower level
3672                    let is_nested = nesting > block.nesting_level;
3673                    let same_type =
3674                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3675                    let same_context = block.blockquote_prefix == blockquote_prefix;
3676                    // Allow one blank line after last item, or lines immediately after block content
3677                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3678
3679                    // For unordered lists, also check marker consistency
3680                    let marker_compatible =
3681                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3682
3683                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3684                    // This eliminates the quadratic bottleneck from issue #148
3685                    let has_non_list_content = has_list_breaking_content_since_last_item;
3686
3687                    // A list continues if:
3688                    // 1. It's a nested item (indented more than the parent), OR
3689                    // 2. It's the same type at the same level with reasonable distance
3690                    let mut continues_list = if is_nested {
3691                        // Nested items always continue the list if they're in the same context
3692                        same_context && reasonable_distance && !has_non_list_content
3693                    } else {
3694                        // Same-level items need to match type and markers
3695                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3696                    };
3697
3698                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3699                        eprintln!(
3700                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3701                            line_num,
3702                            continues_list,
3703                            is_nested,
3704                            same_type,
3705                            same_context,
3706                            reasonable_distance,
3707                            marker_compatible,
3708                            has_non_list_content,
3709                            last_list_item_line,
3710                            block.end_line
3711                        );
3712                    }
3713
3714                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3715                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3716                    // Apply for: nested items (different types OK), OR same-level same-type items
3717                    if !continues_list
3718                        && (is_nested || same_type)
3719                        && reasonable_distance
3720                        && line_num > 0
3721                        && block.end_line == line_num - 1
3722                    {
3723                        // Check if the previous line was a list item or a continuation of a list item
3724                        // (including lazy continuation lines)
3725                        if block.item_lines.contains(&(line_num - 1)) {
3726                            // They're consecutive list items - force them to be in the same list
3727                            continues_list = true;
3728                        } else {
3729                            // Previous line is a continuation line within this block
3730                            // (e.g., lazy continuation with indent=0)
3731                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3732                            continues_list = true;
3733                        }
3734                    }
3735
3736                    if continues_list {
3737                        // Extend current block
3738                        block.end_line = line_num;
3739                        block.item_lines.push(line_num);
3740
3741                        // Update max marker width
3742                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3743                            list_item.marker.len() + 1
3744                        } else {
3745                            list_item.marker.len()
3746                        });
3747
3748                        // Update marker consistency for unordered lists
3749                        if !block.is_ordered
3750                            && block.marker.is_some()
3751                            && block.marker.as_ref() != Some(&list_item.marker)
3752                        {
3753                            // Mixed markers, clear the marker field
3754                            block.marker = None;
3755                        }
3756
3757                        // Reset tracked state for issue #148 optimization
3758                        reset_tracking_state(
3759                            list_item,
3760                            &mut has_list_breaking_content_since_last_item,
3761                            &mut min_continuation_for_tracking,
3762                        );
3763                    } else {
3764                        // End current block and start a new one
3765                        // When a different list type starts AT THE SAME LEVEL (not nested),
3766                        // trim back lazy continuation lines (they become part of the gap, not the list)
3767                        // For nested items, different types are fine - they're sub-lists
3768                        if !same_type
3769                            && !is_nested
3770                            && let Some(&last_item) = block.item_lines.last()
3771                        {
3772                            block.end_line = last_item;
3773                        }
3774
3775                        list_blocks.push(block.clone());
3776
3777                        *block = ListBlock {
3778                            start_line: line_num,
3779                            end_line: line_num,
3780                            is_ordered: list_item.is_ordered,
3781                            marker: if list_item.is_ordered {
3782                                None
3783                            } else {
3784                                Some(list_item.marker.clone())
3785                            },
3786                            blockquote_prefix: blockquote_prefix.clone(),
3787                            item_lines: vec![line_num],
3788                            nesting_level: nesting,
3789                            max_marker_width: if list_item.is_ordered {
3790                                list_item.marker.len() + 1
3791                            } else {
3792                                list_item.marker.len()
3793                            },
3794                        };
3795
3796                        // Initialize tracked state for new block (issue #148 optimization)
3797                        reset_tracking_state(
3798                            list_item,
3799                            &mut has_list_breaking_content_since_last_item,
3800                            &mut min_continuation_for_tracking,
3801                        );
3802                    }
3803                } else {
3804                    // Start a new block
3805                    current_block = Some(ListBlock {
3806                        start_line: line_num,
3807                        end_line: line_num,
3808                        is_ordered: list_item.is_ordered,
3809                        marker: if list_item.is_ordered {
3810                            None
3811                        } else {
3812                            Some(list_item.marker.clone())
3813                        },
3814                        blockquote_prefix,
3815                        item_lines: vec![line_num],
3816                        nesting_level: nesting,
3817                        max_marker_width: list_item.marker.len(),
3818                    });
3819
3820                    // Initialize tracked state for new block (issue #148 optimization)
3821                    reset_tracking_state(
3822                        list_item,
3823                        &mut has_list_breaking_content_since_last_item,
3824                        &mut min_continuation_for_tracking,
3825                    );
3826                }
3827
3828                last_list_item_line = line_num;
3829                current_indent_level = item_indent;
3830                last_marker_width = if list_item.is_ordered {
3831                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3832                } else {
3833                    list_item.marker.len()
3834                };
3835            } else if let Some(ref mut block) = current_block {
3836                // Not a list item - check if it continues the current block
3837                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3838                    eprintln!(
3839                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3840                        line_num, line_info.is_blank
3841                    );
3842                }
3843
3844                // For MD032 compatibility, we use a simple approach:
3845                // - Indented lines continue the list
3846                // - Blank lines followed by indented content continue the list
3847                // - Everything else ends the list
3848
3849                // Check if the last line in the list block ended with a backslash (hard line break)
3850                // This handles cases where list items use backslash for hard line breaks
3851                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3852                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3853                } else {
3854                    false
3855                };
3856
3857                // Calculate minimum indentation for list continuation
3858                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3859                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3860                let min_continuation_indent = if block.is_ordered {
3861                    current_indent_level + last_marker_width
3862                } else {
3863                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3864                };
3865
3866                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3867                    // Indented line or backslash continuation continues the list
3868                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3869                        eprintln!(
3870                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3871                            line_num, line_info.indent, min_continuation_indent
3872                        );
3873                    }
3874                    block.end_line = line_num;
3875                } else if line_info.is_blank {
3876                    // Blank line - check if it's internal to the list or ending it
3877                    // We only include blank lines that are followed by more list content
3878                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3879                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3880                    }
3881                    let mut check_idx = line_idx + 1;
3882                    let mut found_continuation = false;
3883
3884                    // Skip additional blank lines
3885                    while check_idx < lines.len() && lines[check_idx].is_blank {
3886                        check_idx += 1;
3887                    }
3888
3889                    if check_idx < lines.len() {
3890                        let next_line = &lines[check_idx];
3891                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3892                        let next_content = next_line.content(content);
3893                        // Use blockquote level (count of >) to compare, not the full prefix
3894                        // This avoids issues where the regex captures extra whitespace
3895                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3896                        let next_bq_level_for_indent = next_content
3897                            .chars()
3898                            .take_while(|c| *c == '>' || c.is_whitespace())
3899                            .filter(|&c| c == '>')
3900                            .count();
3901                        let effective_indent =
3902                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3903                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3904                                // Find position after ">" and one space
3905                                let mut pos = 0;
3906                                let mut found_markers = 0;
3907                                for c in next_content.chars() {
3908                                    pos += c.len_utf8();
3909                                    if c == '>' {
3910                                        found_markers += 1;
3911                                        if found_markers == next_bq_level_for_indent {
3912                                            // Skip optional space after last >
3913                                            if next_content.get(pos..pos + 1) == Some(" ") {
3914                                                pos += 1;
3915                                            }
3916                                            break;
3917                                        }
3918                                    }
3919                                }
3920                                let after_blockquote_marker = &next_content[pos..];
3921                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3922                            } else {
3923                                next_line.indent
3924                            };
3925                        // Also adjust min_continuation_indent for blockquote lists
3926                        // The marker_column includes blockquote prefix, so subtract it
3927                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3928                            // For blockquote lists, the continuation is relative to blockquote content
3929                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3930                            if block.is_ordered { last_marker_width } else { 2 }
3931                        } else {
3932                            min_continuation_indent
3933                        };
3934                        // Check if followed by indented content (list continuation)
3935                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3936                            eprintln!(
3937                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3938                                line_num,
3939                                check_idx + 1,
3940                                effective_indent,
3941                                adjusted_min_continuation,
3942                                next_line.list_item.is_some(),
3943                                next_line.in_code_block
3944                            );
3945                        }
3946                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3947                            found_continuation = true;
3948                        }
3949                        // Check if followed by another list item at the same level
3950                        else if !next_line.in_code_block
3951                            && next_line.list_item.is_some()
3952                            && let Some(item) = &next_line.list_item
3953                        {
3954                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3955                                .find(next_line.content(content))
3956                                .map_or(String::new(), |m| m.as_str().to_string());
3957                            if item.marker_column == current_indent_level
3958                                && item.is_ordered == block.is_ordered
3959                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3960                            {
3961                                // Check if there was meaningful content between the list items (unused now)
3962                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3963                                // Pre-compute block's blockquote level for use in closures
3964                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3965                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3966                                    if let Some(between_line) = lines.get(idx) {
3967                                        let between_content = between_line.content(content);
3968                                        let trimmed = between_content.trim();
3969                                        // Skip empty lines
3970                                        if trimmed.is_empty() {
3971                                            return false;
3972                                        }
3973                                        // Check for meaningful content
3974                                        let line_indent = between_content.len() - between_content.trim_start().len();
3975
3976                                        // Check if blockquote level changed (not just if line starts with ">")
3977                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3978                                            .find(between_content)
3979                                            .map_or(String::new(), |m| m.as_str().to_string());
3980                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3981                                        let blockquote_level_changed =
3982                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
3983
3984                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
3985                                        if trimmed.starts_with("```")
3986                                            || trimmed.starts_with("~~~")
3987                                            || trimmed.starts_with("---")
3988                                            || trimmed.starts_with("***")
3989                                            || trimmed.starts_with("___")
3990                                            || blockquote_level_changed
3991                                            || crate::utils::skip_context::is_table_line(trimmed)
3992                                            || between_line.heading.is_some()
3993                                        {
3994                                            return true; // These are structural separators - meaningful content that breaks lists
3995                                        }
3996
3997                                        // Only properly indented content continues the list
3998                                        line_indent >= min_continuation_indent
3999                                    } else {
4000                                        false
4001                                    }
4002                                });
4003
4004                                if block.is_ordered {
4005                                    // For ordered lists: don't continue if there are structural separators
4006                                    // Check if there are structural separators between the list items
4007                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4008                                        if let Some(between_line) = lines.get(idx) {
4009                                            let between_content = between_line.content(content);
4010                                            let trimmed = between_content.trim();
4011                                            if trimmed.is_empty() {
4012                                                return false;
4013                                            }
4014                                            // Check if blockquote level changed (not just if line starts with ">")
4015                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4016                                                .find(between_content)
4017                                                .map_or(String::new(), |m| m.as_str().to_string());
4018                                            let between_bq_level =
4019                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4020                                            let blockquote_level_changed =
4021                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4022                                            // Check for structural separators that break lists
4023                                            trimmed.starts_with("```")
4024                                                || trimmed.starts_with("~~~")
4025                                                || trimmed.starts_with("---")
4026                                                || trimmed.starts_with("***")
4027                                                || trimmed.starts_with("___")
4028                                                || blockquote_level_changed
4029                                                || crate::utils::skip_context::is_table_line(trimmed)
4030                                                || between_line.heading.is_some()
4031                                        } else {
4032                                            false
4033                                        }
4034                                    });
4035                                    found_continuation = !has_structural_separators;
4036                                } else {
4037                                    // For unordered lists: also check for structural separators
4038                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4039                                        if let Some(between_line) = lines.get(idx) {
4040                                            let between_content = between_line.content(content);
4041                                            let trimmed = between_content.trim();
4042                                            if trimmed.is_empty() {
4043                                                return false;
4044                                            }
4045                                            // Check if blockquote level changed (not just if line starts with ">")
4046                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4047                                                .find(between_content)
4048                                                .map_or(String::new(), |m| m.as_str().to_string());
4049                                            let between_bq_level =
4050                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4051                                            let blockquote_level_changed =
4052                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4053                                            // Check for structural separators that break lists
4054                                            trimmed.starts_with("```")
4055                                                || trimmed.starts_with("~~~")
4056                                                || trimmed.starts_with("---")
4057                                                || trimmed.starts_with("***")
4058                                                || trimmed.starts_with("___")
4059                                                || blockquote_level_changed
4060                                                || crate::utils::skip_context::is_table_line(trimmed)
4061                                                || between_line.heading.is_some()
4062                                        } else {
4063                                            false
4064                                        }
4065                                    });
4066                                    found_continuation = !has_structural_separators;
4067                                }
4068                            }
4069                        }
4070                    }
4071
4072                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4073                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4074                    }
4075                    if found_continuation {
4076                        // Include the blank line in the block
4077                        block.end_line = line_num;
4078                    } else {
4079                        // Blank line ends the list - don't include it
4080                        list_blocks.push(block.clone());
4081                        current_block = None;
4082                    }
4083                } else {
4084                    // Check for lazy continuation - non-indented line immediately after a list item
4085                    // But only if the line has sufficient indentation for the list type
4086                    let min_required_indent = if block.is_ordered {
4087                        current_indent_level + last_marker_width
4088                    } else {
4089                        current_indent_level + 2
4090                    };
4091
4092                    // For lazy continuation to apply, the line must either:
4093                    // 1. Have no indentation (true lazy continuation)
4094                    // 2. Have sufficient indentation for the list type
4095                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4096                    let line_content = line_info.content(content).trim();
4097
4098                    // Check for table-like patterns
4099                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4100
4101                    // Check if blockquote level changed (not just if line starts with ">")
4102                    // Lines within the same blockquote level are NOT structural separators
4103                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4104                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4105                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4106
4107                    let is_structural_separator = line_info.heading.is_some()
4108                        || line_content.starts_with("```")
4109                        || line_content.starts_with("~~~")
4110                        || line_content.starts_with("---")
4111                        || line_content.starts_with("***")
4112                        || line_content.starts_with("___")
4113                        || blockquote_level_changed
4114                        || looks_like_table;
4115
4116                    // Allow lazy continuation if we're still within the same list block
4117                    // (not just immediately after a list item)
4118                    // Also treat code span continuations as valid continuations regardless of indent
4119                    let is_lazy_continuation = !is_structural_separator
4120                        && !line_info.is_blank
4121                        && (line_info.indent == 0
4122                            || line_info.indent >= min_required_indent
4123                            || line_info.in_code_span_continuation);
4124
4125                    if is_lazy_continuation {
4126                        // Per CommonMark, lazy continuation continues until a blank line
4127                        // or structural element, regardless of uppercase at line start
4128                        block.end_line = line_num;
4129                    } else {
4130                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4131                        list_blocks.push(block.clone());
4132                        current_block = None;
4133                    }
4134                }
4135            }
4136        }
4137
4138        // Don't forget the last block
4139        if let Some(block) = current_block {
4140            list_blocks.push(block);
4141        }
4142
4143        // Merge adjacent blocks that should be one
4144        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4145
4146        list_blocks
4147    }
4148
4149    /// Compute character frequency for fast content analysis
4150    fn compute_char_frequency(content: &str) -> CharFrequency {
4151        let mut frequency = CharFrequency::default();
4152
4153        for ch in content.chars() {
4154            match ch {
4155                '#' => frequency.hash_count += 1,
4156                '*' => frequency.asterisk_count += 1,
4157                '_' => frequency.underscore_count += 1,
4158                '-' => frequency.hyphen_count += 1,
4159                '+' => frequency.plus_count += 1,
4160                '>' => frequency.gt_count += 1,
4161                '|' => frequency.pipe_count += 1,
4162                '[' => frequency.bracket_count += 1,
4163                '`' => frequency.backtick_count += 1,
4164                '<' => frequency.lt_count += 1,
4165                '!' => frequency.exclamation_count += 1,
4166                '\n' => frequency.newline_count += 1,
4167                _ => {}
4168            }
4169        }
4170
4171        frequency
4172    }
4173
4174    /// Parse HTML tags in the content
4175    fn parse_html_tags(
4176        content: &str,
4177        lines: &[LineInfo],
4178        code_blocks: &[(usize, usize)],
4179        flavor: MarkdownFlavor,
4180    ) -> Vec<HtmlTag> {
4181        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4182            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4183
4184        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4185
4186        for cap in HTML_TAG_REGEX.captures_iter(content) {
4187            let full_match = cap.get(0).unwrap();
4188            let match_start = full_match.start();
4189            let match_end = full_match.end();
4190
4191            // Skip if in code block
4192            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4193                continue;
4194            }
4195
4196            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4197            let tag_name_original = cap.get(2).unwrap().as_str();
4198            let tag_name = tag_name_original.to_lowercase();
4199            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4200
4201            // Skip JSX components in MDX files (tags starting with uppercase letter)
4202            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4203            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4204                continue;
4205            }
4206
4207            // Find which line this tag is on
4208            let mut line_num = 1;
4209            let mut col_start = match_start;
4210            let mut col_end = match_end;
4211            for (idx, line_info) in lines.iter().enumerate() {
4212                if match_start >= line_info.byte_offset {
4213                    line_num = idx + 1;
4214                    col_start = match_start - line_info.byte_offset;
4215                    col_end = match_end - line_info.byte_offset;
4216                } else {
4217                    break;
4218                }
4219            }
4220
4221            html_tags.push(HtmlTag {
4222                line: line_num,
4223                start_col: col_start,
4224                end_col: col_end,
4225                byte_offset: match_start,
4226                byte_end: match_end,
4227                tag_name,
4228                is_closing,
4229                is_self_closing,
4230                raw_content: full_match.as_str().to_string(),
4231            });
4232        }
4233
4234        html_tags
4235    }
4236
4237    /// Parse table rows in the content
4238    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4239        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4240
4241        for (line_idx, line_info) in lines.iter().enumerate() {
4242            // Skip lines in code blocks or blank lines
4243            if line_info.in_code_block || line_info.is_blank {
4244                continue;
4245            }
4246
4247            let line = line_info.content(content);
4248            let line_num = line_idx + 1;
4249
4250            // Check if this line contains pipes (potential table row)
4251            if !line.contains('|') {
4252                continue;
4253            }
4254
4255            // Count columns by splitting on pipes
4256            let parts: Vec<&str> = line.split('|').collect();
4257            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4258
4259            // Check if this is a separator row
4260            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4261            let mut column_alignments = Vec::new();
4262
4263            if is_separator {
4264                for part in &parts[1..parts.len() - 1] {
4265                    // Skip first and last empty parts
4266                    let trimmed = part.trim();
4267                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4268                        "center".to_string()
4269                    } else if trimmed.ends_with(':') {
4270                        "right".to_string()
4271                    } else if trimmed.starts_with(':') {
4272                        "left".to_string()
4273                    } else {
4274                        "none".to_string()
4275                    };
4276                    column_alignments.push(alignment);
4277                }
4278            }
4279
4280            table_rows.push(TableRow {
4281                line: line_num,
4282                is_separator,
4283                column_count,
4284                column_alignments,
4285            });
4286        }
4287
4288        table_rows
4289    }
4290
4291    /// Parse bare URLs and emails in the content
4292    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4293        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4294
4295        // Check for bare URLs (not in angle brackets or markdown links)
4296        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4297            let full_match = cap.get(0).unwrap();
4298            let match_start = full_match.start();
4299            let match_end = full_match.end();
4300
4301            // Skip if in code block
4302            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4303                continue;
4304            }
4305
4306            // Skip if already in angle brackets or markdown links
4307            let preceding_char = if match_start > 0 {
4308                content.chars().nth(match_start - 1)
4309            } else {
4310                None
4311            };
4312            let following_char = content.chars().nth(match_end);
4313
4314            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4315                continue;
4316            }
4317            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4318                continue;
4319            }
4320
4321            let url = full_match.as_str();
4322            let url_type = if url.starts_with("https://") {
4323                "https"
4324            } else if url.starts_with("http://") {
4325                "http"
4326            } else if url.starts_with("ftp://") {
4327                "ftp"
4328            } else {
4329                "other"
4330            };
4331
4332            // Find which line this URL is on
4333            let mut line_num = 1;
4334            let mut col_start = match_start;
4335            let mut col_end = match_end;
4336            for (idx, line_info) in lines.iter().enumerate() {
4337                if match_start >= line_info.byte_offset {
4338                    line_num = idx + 1;
4339                    col_start = match_start - line_info.byte_offset;
4340                    col_end = match_end - line_info.byte_offset;
4341                } else {
4342                    break;
4343                }
4344            }
4345
4346            bare_urls.push(BareUrl {
4347                line: line_num,
4348                start_col: col_start,
4349                end_col: col_end,
4350                byte_offset: match_start,
4351                byte_end: match_end,
4352                url: url.to_string(),
4353                url_type: url_type.to_string(),
4354            });
4355        }
4356
4357        // Check for bare email addresses
4358        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4359            let full_match = cap.get(0).unwrap();
4360            let match_start = full_match.start();
4361            let match_end = full_match.end();
4362
4363            // Skip if in code block
4364            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4365                continue;
4366            }
4367
4368            // Skip if already in angle brackets or markdown links
4369            let preceding_char = if match_start > 0 {
4370                content.chars().nth(match_start - 1)
4371            } else {
4372                None
4373            };
4374            let following_char = content.chars().nth(match_end);
4375
4376            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4377                continue;
4378            }
4379            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4380                continue;
4381            }
4382
4383            let email = full_match.as_str();
4384
4385            // Find which line this email is on
4386            let mut line_num = 1;
4387            let mut col_start = match_start;
4388            let mut col_end = match_end;
4389            for (idx, line_info) in lines.iter().enumerate() {
4390                if match_start >= line_info.byte_offset {
4391                    line_num = idx + 1;
4392                    col_start = match_start - line_info.byte_offset;
4393                    col_end = match_end - line_info.byte_offset;
4394                } else {
4395                    break;
4396                }
4397            }
4398
4399            bare_urls.push(BareUrl {
4400                line: line_num,
4401                start_col: col_start,
4402                end_col: col_end,
4403                byte_offset: match_start,
4404                byte_end: match_end,
4405                url: email.to_string(),
4406                url_type: "email".to_string(),
4407            });
4408        }
4409
4410        bare_urls
4411    }
4412
4413    /// Get an iterator over valid CommonMark headings
4414    ///
4415    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4416    /// that should be flagged by MD018 but should not be processed by other heading rules.
4417    ///
4418    /// # Examples
4419    ///
4420    /// ```rust
4421    /// use rumdl_lib::lint_context::LintContext;
4422    /// use rumdl_lib::config::MarkdownFlavor;
4423    ///
4424    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4425    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4426    ///
4427    /// for heading in ctx.valid_headings() {
4428    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4429    /// }
4430    /// // Only prints valid headings, skips `#NoSpace`
4431    /// ```
4432    #[must_use]
4433    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4434        ValidHeadingsIter::new(&self.lines)
4435    }
4436
4437    /// Check if the document contains any valid CommonMark headings
4438    ///
4439    /// Returns `true` if there is at least one heading with proper space after `#`.
4440    #[must_use]
4441    pub fn has_valid_headings(&self) -> bool {
4442        self.lines
4443            .iter()
4444            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4445    }
4446}
4447
4448/// Merge adjacent list blocks that should be treated as one
4449fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4450    if list_blocks.len() < 2 {
4451        return;
4452    }
4453
4454    let mut merger = ListBlockMerger::new(content, lines);
4455    *list_blocks = merger.merge(list_blocks);
4456}
4457
4458/// Helper struct to manage the complex logic of merging list blocks
4459struct ListBlockMerger<'a> {
4460    content: &'a str,
4461    lines: &'a [LineInfo],
4462}
4463
4464impl<'a> ListBlockMerger<'a> {
4465    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4466        Self { content, lines }
4467    }
4468
4469    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4470        let mut merged = Vec::with_capacity(list_blocks.len());
4471        let mut current = list_blocks[0].clone();
4472
4473        for next in list_blocks.iter().skip(1) {
4474            if self.should_merge_blocks(&current, next) {
4475                current = self.merge_two_blocks(current, next);
4476            } else {
4477                merged.push(current);
4478                current = next.clone();
4479            }
4480        }
4481
4482        merged.push(current);
4483        merged
4484    }
4485
4486    /// Determine if two adjacent list blocks should be merged
4487    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4488        // Basic compatibility checks
4489        if !self.blocks_are_compatible(current, next) {
4490            return false;
4491        }
4492
4493        // Check spacing and content between blocks
4494        let spacing = self.analyze_spacing_between(current, next);
4495        match spacing {
4496            BlockSpacing::Consecutive => true,
4497            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4498            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4499                self.can_merge_with_content_between(current, next)
4500            }
4501        }
4502    }
4503
4504    /// Check if blocks have compatible structure for merging
4505    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4506        current.is_ordered == next.is_ordered
4507            && current.blockquote_prefix == next.blockquote_prefix
4508            && current.nesting_level == next.nesting_level
4509    }
4510
4511    /// Analyze the spacing between two list blocks
4512    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4513        let gap = next.start_line - current.end_line;
4514
4515        match gap {
4516            1 => BlockSpacing::Consecutive,
4517            2 => BlockSpacing::SingleBlank,
4518            _ if gap > 2 => {
4519                if self.has_only_blank_lines_between(current, next) {
4520                    BlockSpacing::MultipleBlanks
4521                } else {
4522                    BlockSpacing::ContentBetween
4523                }
4524            }
4525            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4526        }
4527    }
4528
4529    /// Check if unordered lists can be merged with a single blank line between
4530    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4531        // Check if there are structural separators between the blocks
4532        // If has_meaningful_content_between returns true, it means there are structural separators
4533        if has_meaningful_content_between(self.content, current, next, self.lines) {
4534            return false; // Structural separators prevent merging
4535        }
4536
4537        // Only merge unordered lists with same marker across single blank
4538        !current.is_ordered && current.marker == next.marker
4539    }
4540
4541    /// Check if ordered lists can be merged when there's content between them
4542    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4543        // Do not merge lists if there are structural separators between them
4544        if has_meaningful_content_between(self.content, current, next, self.lines) {
4545            return false; // Structural separators prevent merging
4546        }
4547
4548        // Only consider merging ordered lists if there's no structural content between
4549        current.is_ordered && next.is_ordered
4550    }
4551
4552    /// Check if there are only blank lines between blocks
4553    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4554        for line_num in (current.end_line + 1)..next.start_line {
4555            if let Some(line_info) = self.lines.get(line_num - 1)
4556                && !line_info.content(self.content).trim().is_empty()
4557            {
4558                return false;
4559            }
4560        }
4561        true
4562    }
4563
4564    /// Merge two compatible list blocks into one
4565    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4566        current.end_line = next.end_line;
4567        current.item_lines.extend_from_slice(&next.item_lines);
4568
4569        // Update max marker width
4570        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4571
4572        // Handle marker consistency for unordered lists
4573        if !current.is_ordered && self.markers_differ(&current, next) {
4574            current.marker = None; // Mixed markers
4575        }
4576
4577        current
4578    }
4579
4580    /// Check if two blocks have different markers
4581    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4582        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4583    }
4584}
4585
4586/// Types of spacing between list blocks
4587#[derive(Debug, PartialEq)]
4588enum BlockSpacing {
4589    Consecutive,    // No gap between blocks
4590    SingleBlank,    // One blank line between blocks
4591    MultipleBlanks, // Multiple blank lines but no content
4592    ContentBetween, // Content exists between blocks
4593}
4594
4595/// Check if there's meaningful content (not just blank lines) between two list blocks
4596fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4597    // Check lines between current.end_line and next.start_line
4598    for line_num in (current.end_line + 1)..next.start_line {
4599        if let Some(line_info) = lines.get(line_num - 1) {
4600            // Convert to 0-indexed
4601            let trimmed = line_info.content(content).trim();
4602
4603            // Skip empty lines
4604            if trimmed.is_empty() {
4605                continue;
4606            }
4607
4608            // Check for structural separators that should separate lists (CommonMark compliant)
4609
4610            // Headings separate lists
4611            if line_info.heading.is_some() {
4612                return true; // Has meaningful content - headings separate lists
4613            }
4614
4615            // Horizontal rules separate lists (---, ***, ___)
4616            if is_horizontal_rule(trimmed) {
4617                return true; // Has meaningful content - horizontal rules separate lists
4618            }
4619
4620            // Tables separate lists
4621            if crate::utils::skip_context::is_table_line(trimmed) {
4622                return true; // Has meaningful content - tables separate lists
4623            }
4624
4625            // Blockquotes separate lists
4626            if trimmed.starts_with('>') {
4627                return true; // Has meaningful content - blockquotes separate lists
4628            }
4629
4630            // Code block fences separate lists (unless properly indented as list content)
4631            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4632                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4633
4634                // Check if this code block is properly indented as list continuation
4635                let min_continuation_indent = if current.is_ordered {
4636                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4637                } else {
4638                    current.nesting_level + 2
4639                };
4640
4641                if line_indent < min_continuation_indent {
4642                    // This is a standalone code block that separates lists
4643                    return true; // Has meaningful content - standalone code blocks separate lists
4644                }
4645            }
4646
4647            // Check if this line has proper indentation for list continuation
4648            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4649
4650            // Calculate minimum indentation needed to be list continuation
4651            let min_indent = if current.is_ordered {
4652                current.nesting_level + current.max_marker_width
4653            } else {
4654                current.nesting_level + 2
4655            };
4656
4657            // If the line is not indented enough to be list continuation, it's meaningful content
4658            if line_indent < min_indent {
4659                return true; // Has meaningful content - content not indented as list continuation
4660            }
4661
4662            // If we reach here, the line is properly indented as list continuation
4663            // Continue checking other lines
4664        }
4665    }
4666
4667    // Only blank lines or properly indented list continuation content between blocks
4668    false
4669}
4670
4671/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4672/// CommonMark rules for thematic breaks (horizontal rules):
4673/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4674/// - Must have 3+ of the same character (-, *, or _)
4675/// - May have spaces between characters
4676/// - No other characters allowed
4677pub fn is_horizontal_rule_line(line: &str) -> bool {
4678    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4679    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4680    if leading_spaces > 3 || line.starts_with('\t') {
4681        return false;
4682    }
4683
4684    is_horizontal_rule_content(line.trim())
4685}
4686
4687/// Check if trimmed content matches horizontal rule pattern.
4688/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4689pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4690    if trimmed.len() < 3 {
4691        return false;
4692    }
4693
4694    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4695    let chars: Vec<char> = trimmed.chars().collect();
4696    if let Some(&first_char) = chars.first()
4697        && (first_char == '-' || first_char == '*' || first_char == '_')
4698    {
4699        let mut count = 0;
4700        for &ch in &chars {
4701            if ch == first_char {
4702                count += 1;
4703            } else if ch != ' ' && ch != '\t' {
4704                return false; // Non-matching, non-whitespace character
4705            }
4706        }
4707        return count >= 3;
4708    }
4709    false
4710}
4711
4712/// Backwards-compatible alias for `is_horizontal_rule_content`
4713pub fn is_horizontal_rule(trimmed: &str) -> bool {
4714    is_horizontal_rule_content(trimmed)
4715}
4716
4717/// Check if content contains patterns that cause the markdown crate to panic
4718#[cfg(test)]
4719mod tests {
4720    use super::*;
4721
4722    #[test]
4723    fn test_empty_content() {
4724        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4725        assert_eq!(ctx.content, "");
4726        assert_eq!(ctx.line_offsets, vec![0]);
4727        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4728        assert_eq!(ctx.lines.len(), 0);
4729    }
4730
4731    #[test]
4732    fn test_single_line() {
4733        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4734        assert_eq!(ctx.content, "# Hello");
4735        assert_eq!(ctx.line_offsets, vec![0]);
4736        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4737        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4738    }
4739
4740    #[test]
4741    fn test_multi_line() {
4742        let content = "# Title\n\nSecond line\nThird line";
4743        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4744        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4745        // Test offset to line/col
4746        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4747        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4748        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4749        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4750        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4751    }
4752
4753    #[test]
4754    fn test_line_info() {
4755        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4756        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4757
4758        // Test line info
4759        assert_eq!(ctx.lines.len(), 7);
4760
4761        // Line 1: "# Title"
4762        let line1 = &ctx.lines[0];
4763        assert_eq!(line1.content(ctx.content), "# Title");
4764        assert_eq!(line1.byte_offset, 0);
4765        assert_eq!(line1.indent, 0);
4766        assert!(!line1.is_blank);
4767        assert!(!line1.in_code_block);
4768        assert!(line1.list_item.is_none());
4769
4770        // Line 2: "    indented"
4771        let line2 = &ctx.lines[1];
4772        assert_eq!(line2.content(ctx.content), "    indented");
4773        assert_eq!(line2.byte_offset, 8);
4774        assert_eq!(line2.indent, 4);
4775        assert!(!line2.is_blank);
4776
4777        // Line 3: "" (blank)
4778        let line3 = &ctx.lines[2];
4779        assert_eq!(line3.content(ctx.content), "");
4780        assert!(line3.is_blank);
4781
4782        // Test helper methods
4783        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4784        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4785        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4786        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4787    }
4788
4789    #[test]
4790    fn test_list_item_detection() {
4791        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4792        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4793
4794        // Line 1: "- Unordered item"
4795        let line1 = &ctx.lines[0];
4796        assert!(line1.list_item.is_some());
4797        let list1 = line1.list_item.as_ref().unwrap();
4798        assert_eq!(list1.marker, "-");
4799        assert!(!list1.is_ordered);
4800        assert_eq!(list1.marker_column, 0);
4801        assert_eq!(list1.content_column, 2);
4802
4803        // Line 2: "  * Nested item"
4804        let line2 = &ctx.lines[1];
4805        assert!(line2.list_item.is_some());
4806        let list2 = line2.list_item.as_ref().unwrap();
4807        assert_eq!(list2.marker, "*");
4808        assert_eq!(list2.marker_column, 2);
4809
4810        // Line 3: "1. Ordered item"
4811        let line3 = &ctx.lines[2];
4812        assert!(line3.list_item.is_some());
4813        let list3 = line3.list_item.as_ref().unwrap();
4814        assert_eq!(list3.marker, "1.");
4815        assert!(list3.is_ordered);
4816        assert_eq!(list3.number, Some(1));
4817
4818        // Line 6: "Not a list"
4819        let line6 = &ctx.lines[5];
4820        assert!(line6.list_item.is_none());
4821    }
4822
4823    #[test]
4824    fn test_offset_to_line_col_edge_cases() {
4825        let content = "a\nb\nc";
4826        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4827        // line_offsets: [0, 2, 4]
4828        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4829        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4830        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4831        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4832        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4833        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4834    }
4835
4836    #[test]
4837    fn test_mdx_esm_blocks() {
4838        let content = r##"import {Chart} from './snowfall.js'
4839export const year = 2023
4840
4841# Last year's snowfall
4842
4843In {year}, the snowfall was above average.
4844It was followed by a warm spring which caused
4845flood conditions in many of the nearby rivers.
4846
4847<Chart color="#fcb32c" year={year} />
4848"##;
4849
4850        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4851
4852        // Check that lines 1 and 2 are marked as ESM blocks
4853        assert_eq!(ctx.lines.len(), 10);
4854        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4855        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4856        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4857        assert!(
4858            !ctx.lines[3].in_esm_block,
4859            "Line 4 (heading) should NOT be in_esm_block"
4860        );
4861        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4862        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4863    }
4864
4865    #[test]
4866    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4867        let content = r#"import {Chart} from './snowfall.js'
4868export const year = 2023
4869
4870# Last year's snowfall
4871"#;
4872
4873        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4874
4875        // ESM blocks should NOT be detected in Standard flavor
4876        assert!(
4877            !ctx.lines[0].in_esm_block,
4878            "Line 1 should NOT be in_esm_block in Standard flavor"
4879        );
4880        assert!(
4881            !ctx.lines[1].in_esm_block,
4882            "Line 2 should NOT be in_esm_block in Standard flavor"
4883        );
4884    }
4885
4886    #[test]
4887    fn test_blockquote_with_indented_content() {
4888        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4889        // The content inside the blockquote may also be detected as a code block (which is correct),
4890        // but for MD046 purposes, we need to know the line is inside a blockquote.
4891        let content = r#"# Heading
4892
4893>      -S socket-path
4894>                    More text
4895"#;
4896        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4897
4898        // Line 3 (index 2) should be detected as blockquote
4899        assert!(
4900            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4901            "Line 3 should be a blockquote"
4902        );
4903        // Line 4 (index 3) should also be blockquote
4904        assert!(
4905            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4906            "Line 4 should be a blockquote"
4907        );
4908
4909        // Verify blockquote content is correctly parsed
4910        // Note: spaces_after includes the spaces between `>` and content
4911        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4912        assert_eq!(bq3.content, "-S socket-path");
4913        assert_eq!(bq3.nesting_level, 1);
4914        // 6 spaces after the `>` marker
4915        assert!(bq3.has_multiple_spaces_after_marker);
4916
4917        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4918        assert_eq!(bq4.content, "More text");
4919        assert_eq!(bq4.nesting_level, 1);
4920    }
4921
4922    #[test]
4923    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4924        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4925        let content = r#"# Title
4926
4927A footnote[^1].
4928
4929[^1]: This is the footnote content.
4930
4931[^note]: Another footnote with [link](https://example.com).
4932
4933[regular]: ./path.md "A real reference definition"
4934"#;
4935        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4936
4937        // Should only have one reference definition (the regular one)
4938        assert_eq!(
4939            ctx.reference_defs.len(),
4940            1,
4941            "Footnotes should not be parsed as reference definitions"
4942        );
4943
4944        // The only reference def should be the regular one
4945        assert_eq!(ctx.reference_defs[0].id, "regular");
4946        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4947        assert_eq!(
4948            ctx.reference_defs[0].title,
4949            Some("A real reference definition".to_string())
4950        );
4951    }
4952
4953    #[test]
4954    fn test_footnote_with_inline_link_not_misidentified() {
4955        // Regression test for issue #286: footnote containing an inline link
4956        // was incorrectly parsed as a reference definition with URL "[link](url)"
4957        let content = r#"# Title
4958
4959A footnote[^1].
4960
4961[^1]: [link](https://www.google.com).
4962"#;
4963        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4964
4965        // Should have no reference definitions
4966        assert!(
4967            ctx.reference_defs.is_empty(),
4968            "Footnote with inline link should not create a reference definition"
4969        );
4970    }
4971
4972    #[test]
4973    fn test_various_footnote_formats_excluded() {
4974        // Test various footnote ID formats are all excluded
4975        let content = r#"[^1]: Numeric footnote
4976[^note]: Named footnote
4977[^a]: Single char footnote
4978[^long-footnote-name]: Long named footnote
4979[^123abc]: Mixed alphanumeric
4980
4981[ref1]: ./file1.md
4982[ref2]: ./file2.md
4983"#;
4984        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4985
4986        // Should only have the two regular reference definitions
4987        assert_eq!(
4988            ctx.reference_defs.len(),
4989            2,
4990            "Only regular reference definitions should be parsed"
4991        );
4992
4993        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4994        assert!(ids.contains(&"ref1"));
4995        assert!(ids.contains(&"ref2"));
4996        assert!(!ids.iter().any(|id| id.starts_with('^')));
4997    }
4998
4999    // =========================================================================
5000    // Tests for has_char and char_count methods
5001    // =========================================================================
5002
5003    #[test]
5004    fn test_has_char_tracked_characters() {
5005        // Test all 12 tracked characters
5006        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5007        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5008
5009        // All tracked characters should be detected
5010        assert!(ctx.has_char('#'), "Should detect hash");
5011        assert!(ctx.has_char('*'), "Should detect asterisk");
5012        assert!(ctx.has_char('_'), "Should detect underscore");
5013        assert!(ctx.has_char('-'), "Should detect hyphen");
5014        assert!(ctx.has_char('+'), "Should detect plus");
5015        assert!(ctx.has_char('>'), "Should detect gt");
5016        assert!(ctx.has_char('|'), "Should detect pipe");
5017        assert!(ctx.has_char('['), "Should detect bracket");
5018        assert!(ctx.has_char('`'), "Should detect backtick");
5019        assert!(ctx.has_char('<'), "Should detect lt");
5020        assert!(ctx.has_char('!'), "Should detect exclamation");
5021        assert!(ctx.has_char('\n'), "Should detect newline");
5022    }
5023
5024    #[test]
5025    fn test_has_char_absent_characters() {
5026        let content = "Simple text without special chars";
5027        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5028
5029        // None of the tracked characters should be present
5030        assert!(!ctx.has_char('#'), "Should not detect hash");
5031        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5032        assert!(!ctx.has_char('_'), "Should not detect underscore");
5033        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5034        assert!(!ctx.has_char('+'), "Should not detect plus");
5035        assert!(!ctx.has_char('>'), "Should not detect gt");
5036        assert!(!ctx.has_char('|'), "Should not detect pipe");
5037        assert!(!ctx.has_char('['), "Should not detect bracket");
5038        assert!(!ctx.has_char('`'), "Should not detect backtick");
5039        assert!(!ctx.has_char('<'), "Should not detect lt");
5040        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5041        // Note: single line content has no newlines
5042        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5043    }
5044
5045    #[test]
5046    fn test_has_char_fallback_for_untracked() {
5047        let content = "Text with @mention and $dollar and %percent";
5048        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5049
5050        // Untracked characters should fall back to content.contains()
5051        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5052        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5053        assert!(ctx.has_char('%'), "Should detect % via fallback");
5054        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5055    }
5056
5057    #[test]
5058    fn test_char_count_tracked_characters() {
5059        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5060        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5061
5062        // Count each tracked character
5063        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5064        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5065        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5066        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5067        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5068        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5069        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5070        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5071        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5072        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5073        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5074        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5075    }
5076
5077    #[test]
5078    fn test_char_count_zero_for_absent() {
5079        let content = "Plain text";
5080        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5081
5082        assert_eq!(ctx.char_count('#'), 0);
5083        assert_eq!(ctx.char_count('*'), 0);
5084        assert_eq!(ctx.char_count('_'), 0);
5085        assert_eq!(ctx.char_count('\n'), 0);
5086    }
5087
5088    #[test]
5089    fn test_char_count_fallback_for_untracked() {
5090        let content = "@@@ $$ %%%";
5091        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5092
5093        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5094        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5095        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5096        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5097    }
5098
5099    #[test]
5100    fn test_char_count_empty_content() {
5101        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5102
5103        assert_eq!(ctx.char_count('#'), 0);
5104        assert_eq!(ctx.char_count('*'), 0);
5105        assert_eq!(ctx.char_count('@'), 0);
5106        assert!(!ctx.has_char('#'));
5107        assert!(!ctx.has_char('@'));
5108    }
5109
5110    // =========================================================================
5111    // Tests for is_in_html_tag method
5112    // =========================================================================
5113
5114    #[test]
5115    fn test_is_in_html_tag_simple() {
5116        let content = "<div>content</div>";
5117        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5118
5119        // Inside opening tag
5120        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5121        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5122        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5123
5124        // Outside tag (in content)
5125        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5126        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5127
5128        // Inside closing tag
5129        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5130        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5131    }
5132
5133    #[test]
5134    fn test_is_in_html_tag_self_closing() {
5135        let content = "Text <br/> more text";
5136        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5137
5138        // Before tag
5139        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5140        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5141
5142        // Inside self-closing tag
5143        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5144        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5145        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5146
5147        // After tag
5148        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5149    }
5150
5151    #[test]
5152    fn test_is_in_html_tag_with_attributes() {
5153        let content = r#"<a href="url" class="link">text</a>"#;
5154        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5155
5156        // All positions inside opening tag with attributes
5157        assert!(ctx.is_in_html_tag(0), "Start of tag");
5158        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5159        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5160        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5161
5162        // Content between tags
5163        assert!(!ctx.is_in_html_tag(27), "Start of content");
5164        assert!(!ctx.is_in_html_tag(30), "End of content");
5165
5166        // Closing tag
5167        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5168    }
5169
5170    #[test]
5171    fn test_is_in_html_tag_multiline() {
5172        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5173        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5174
5175        // Opening tag spans multiple lines
5176        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5177        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5178        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5179
5180        // After closing > of opening tag
5181        let closing_bracket_pos = content.find(">\n").unwrap();
5182        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5183    }
5184
5185    #[test]
5186    fn test_is_in_html_tag_no_tags() {
5187        let content = "Plain text without any HTML";
5188        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5189
5190        // No position should be in an HTML tag
5191        for i in 0..content.len() {
5192            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5193        }
5194    }
5195
5196    // =========================================================================
5197    // Tests for is_in_jinja_range method
5198    // =========================================================================
5199
5200    #[test]
5201    fn test_is_in_jinja_range_expression() {
5202        let content = "Hello {{ name }}!";
5203        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5204
5205        // Before Jinja
5206        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5207        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5208
5209        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5210        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5211        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5212        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5213        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5214        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5215
5216        // After Jinja
5217        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5218    }
5219
5220    #[test]
5221    fn test_is_in_jinja_range_statement() {
5222        let content = "{% if condition %}content{% endif %}";
5223        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5224
5225        // Inside opening statement
5226        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5227        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5228        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5229
5230        // Content between
5231        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5232
5233        // Inside closing statement
5234        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5235        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5236    }
5237
5238    #[test]
5239    fn test_is_in_jinja_range_multiple() {
5240        let content = "{{ a }} and {{ b }}";
5241        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5242
5243        // First Jinja expression
5244        assert!(ctx.is_in_jinja_range(0));
5245        assert!(ctx.is_in_jinja_range(3));
5246        assert!(ctx.is_in_jinja_range(6));
5247
5248        // Between expressions
5249        assert!(!ctx.is_in_jinja_range(8));
5250        assert!(!ctx.is_in_jinja_range(11));
5251
5252        // Second Jinja expression
5253        assert!(ctx.is_in_jinja_range(12));
5254        assert!(ctx.is_in_jinja_range(15));
5255        assert!(ctx.is_in_jinja_range(18));
5256    }
5257
5258    #[test]
5259    fn test_is_in_jinja_range_no_jinja() {
5260        let content = "Plain text with single braces but not Jinja";
5261        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5262
5263        // No position should be in Jinja
5264        for i in 0..content.len() {
5265            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5266        }
5267    }
5268
5269    // =========================================================================
5270    // Tests for is_in_link_title method
5271    // =========================================================================
5272
5273    #[test]
5274    fn test_is_in_link_title_with_title() {
5275        let content = r#"[ref]: https://example.com "Title text"
5276
5277Some content."#;
5278        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5279
5280        // Verify we have a reference def with title
5281        assert_eq!(ctx.reference_defs.len(), 1);
5282        let def = &ctx.reference_defs[0];
5283        assert!(def.title_byte_start.is_some());
5284        assert!(def.title_byte_end.is_some());
5285
5286        let title_start = def.title_byte_start.unwrap();
5287        let title_end = def.title_byte_end.unwrap();
5288
5289        // Before title (in URL)
5290        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5291
5292        // Inside title
5293        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5294        assert!(
5295            ctx.is_in_link_title(title_start + 5),
5296            "Middle of title should be in title"
5297        );
5298        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5299
5300        // After title
5301        assert!(
5302            !ctx.is_in_link_title(title_end),
5303            "After title end should not be in title"
5304        );
5305    }
5306
5307    #[test]
5308    fn test_is_in_link_title_without_title() {
5309        let content = "[ref]: https://example.com\n\nSome content.";
5310        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5311
5312        // Reference def without title
5313        assert_eq!(ctx.reference_defs.len(), 1);
5314        let def = &ctx.reference_defs[0];
5315        assert!(def.title_byte_start.is_none());
5316        assert!(def.title_byte_end.is_none());
5317
5318        // No position should be in a title
5319        for i in 0..content.len() {
5320            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5321        }
5322    }
5323
5324    #[test]
5325    fn test_is_in_link_title_multiple_refs() {
5326        let content = r#"[ref1]: /url1 "Title One"
5327[ref2]: /url2
5328[ref3]: /url3 "Title Three"
5329"#;
5330        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5331
5332        // Should have 3 reference defs
5333        assert_eq!(ctx.reference_defs.len(), 3);
5334
5335        // ref1 has title
5336        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5337        assert!(ref1.title_byte_start.is_some());
5338
5339        // ref2 has no title
5340        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5341        assert!(ref2.title_byte_start.is_none());
5342
5343        // ref3 has title
5344        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5345        assert!(ref3.title_byte_start.is_some());
5346
5347        // Check positions in ref1's title
5348        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5349            assert!(ctx.is_in_link_title(start + 1));
5350            assert!(!ctx.is_in_link_title(end + 5));
5351        }
5352
5353        // Check positions in ref3's title
5354        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5355            assert!(ctx.is_in_link_title(start + 1));
5356        }
5357    }
5358
5359    #[test]
5360    fn test_is_in_link_title_single_quotes() {
5361        let content = "[ref]: /url 'Single quoted title'\n";
5362        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5363
5364        assert_eq!(ctx.reference_defs.len(), 1);
5365        let def = &ctx.reference_defs[0];
5366
5367        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5368            assert!(ctx.is_in_link_title(start));
5369            assert!(ctx.is_in_link_title(start + 5));
5370            assert!(!ctx.is_in_link_title(end));
5371        }
5372    }
5373
5374    #[test]
5375    fn test_is_in_link_title_parentheses() {
5376        // Note: The reference def parser may not support parenthesized titles
5377        // This test verifies the is_in_link_title method works when titles exist
5378        let content = "[ref]: /url (Parenthesized title)\n";
5379        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5380
5381        // Parser behavior: may or may not parse parenthesized titles
5382        // We test that is_in_link_title correctly reflects whatever was parsed
5383        if ctx.reference_defs.is_empty() {
5384            // Parser didn't recognize this as a reference def
5385            for i in 0..content.len() {
5386                assert!(!ctx.is_in_link_title(i));
5387            }
5388        } else {
5389            let def = &ctx.reference_defs[0];
5390            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5391                assert!(ctx.is_in_link_title(start));
5392                assert!(ctx.is_in_link_title(start + 5));
5393                assert!(!ctx.is_in_link_title(end));
5394            } else {
5395                // Title wasn't parsed, so no position should be in title
5396                for i in 0..content.len() {
5397                    assert!(!ctx.is_in_link_title(i));
5398                }
5399            }
5400        }
5401    }
5402
5403    #[test]
5404    fn test_is_in_link_title_no_refs() {
5405        let content = "Just plain text without any reference definitions.";
5406        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5407
5408        assert!(ctx.reference_defs.is_empty());
5409
5410        for i in 0..content.len() {
5411            assert!(!ctx.is_in_link_title(i));
5412        }
5413    }
5414
5415    // =========================================================================
5416    // Math span tests (Issue #289)
5417    // =========================================================================
5418
5419    #[test]
5420    fn test_math_spans_inline() {
5421        let content = "Text with inline math $[f](x)$ in it.";
5422        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5423
5424        let math_spans = ctx.math_spans();
5425        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5426
5427        let span = &math_spans[0];
5428        assert!(!span.is_display, "Should be inline math, not display");
5429        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5430    }
5431
5432    #[test]
5433    fn test_math_spans_display_single_line() {
5434        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5435        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5436
5437        let math_spans = ctx.math_spans();
5438        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5439
5440        let span = &math_spans[0];
5441        assert!(span.is_display, "Should be display math");
5442        assert!(
5443            span.content.contains("[x](\\zeta)"),
5444            "Content should contain the link-like pattern"
5445        );
5446    }
5447
5448    #[test]
5449    fn test_math_spans_display_multiline() {
5450        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5451        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5452
5453        let math_spans = ctx.math_spans();
5454        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5455
5456        let span = &math_spans[0];
5457        assert!(span.is_display, "Should be display math");
5458    }
5459
5460    #[test]
5461    fn test_is_in_math_span() {
5462        let content = "Text $[f](x)$ more text";
5463        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5464
5465        // Position inside the math span
5466        let math_start = content.find('$').unwrap();
5467        let math_end = content.rfind('$').unwrap() + 1;
5468
5469        assert!(
5470            ctx.is_in_math_span(math_start + 1),
5471            "Position inside math span should return true"
5472        );
5473        assert!(
5474            ctx.is_in_math_span(math_start + 3),
5475            "Position inside math span should return true"
5476        );
5477
5478        // Position outside the math span
5479        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5480        assert!(
5481            !ctx.is_in_math_span(math_end + 1),
5482            "Position after math span should return false"
5483        );
5484    }
5485
5486    #[test]
5487    fn test_math_spans_mixed_with_code() {
5488        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5489        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5490
5491        let math_spans = ctx.math_spans();
5492        let code_spans = ctx.code_spans();
5493
5494        assert_eq!(math_spans.len(), 1, "Should have one math span");
5495        assert_eq!(code_spans.len(), 1, "Should have one code span");
5496
5497        // Verify math span content
5498        assert_eq!(math_spans[0].content, "[f](x)");
5499        // Verify code span content
5500        assert_eq!(code_spans[0].content, "[g](y)");
5501    }
5502
5503    #[test]
5504    fn test_math_spans_no_math() {
5505        let content = "Regular text without any math at all.";
5506        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5507
5508        let math_spans = ctx.math_spans();
5509        assert!(math_spans.is_empty(), "Should have no math spans");
5510    }
5511
5512    #[test]
5513    fn test_math_spans_multiple() {
5514        let content = "First $a$ and second $b$ and display $$c$$";
5515        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5516
5517        let math_spans = ctx.math_spans();
5518        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5519
5520        // Two inline, one display
5521        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5522        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5523
5524        assert_eq!(inline_count, 2, "Should have two inline math spans");
5525        assert_eq!(display_count, 1, "Should have one display math span");
5526    }
5527
5528    #[test]
5529    fn test_is_in_math_span_boundary_positions() {
5530        // Test exact boundary positions: $[f](x)$
5531        // Byte positions:                0123456789
5532        let content = "$[f](x)$";
5533        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5534
5535        let math_spans = ctx.math_spans();
5536        assert_eq!(math_spans.len(), 1, "Should have one math span");
5537
5538        let span = &math_spans[0];
5539
5540        // Position at opening $ should be in span (byte 0)
5541        assert!(
5542            ctx.is_in_math_span(span.byte_offset),
5543            "Start position should be in span"
5544        );
5545
5546        // Position just inside should be in span
5547        assert!(
5548            ctx.is_in_math_span(span.byte_offset + 1),
5549            "Position after start should be in span"
5550        );
5551
5552        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5553        assert!(
5554            ctx.is_in_math_span(span.byte_end - 1),
5555            "Position at end-1 should be in span"
5556        );
5557
5558        // Position at byte_end should NOT be in span (exclusive end)
5559        assert!(
5560            !ctx.is_in_math_span(span.byte_end),
5561            "Position at byte_end should NOT be in span (exclusive)"
5562        );
5563    }
5564
5565    #[test]
5566    fn test_math_spans_at_document_start() {
5567        let content = "$x$ text";
5568        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5569
5570        let math_spans = ctx.math_spans();
5571        assert_eq!(math_spans.len(), 1);
5572        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5573    }
5574
5575    #[test]
5576    fn test_math_spans_at_document_end() {
5577        let content = "text $x$";
5578        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5579
5580        let math_spans = ctx.math_spans();
5581        assert_eq!(math_spans.len(), 1);
5582        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5583    }
5584
5585    #[test]
5586    fn test_math_spans_consecutive() {
5587        let content = "$a$$b$";
5588        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5589
5590        let math_spans = ctx.math_spans();
5591        // pulldown-cmark should parse these as separate spans
5592        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5593
5594        // All positions should be in some math span
5595        for i in 0..content.len() {
5596            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5597        }
5598    }
5599
5600    #[test]
5601    fn test_math_spans_currency_not_math() {
5602        // Unbalanced $ should not create math spans
5603        let content = "Price is $100";
5604        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5605
5606        let math_spans = ctx.math_spans();
5607        // pulldown-cmark requires balanced delimiters for math
5608        // $100 alone is not math
5609        assert!(
5610            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5611            "Unbalanced $ should not create math span containing 100"
5612        );
5613    }
5614
5615    // =========================================================================
5616    // Tests for O(1) reference definition lookups via HashMap
5617    // =========================================================================
5618
5619    #[test]
5620    fn test_reference_lookup_o1_basic() {
5621        let content = r#"[ref1]: /url1
5622[REF2]: /url2 "Title"
5623[Ref3]: /url3
5624
5625Use [link][ref1] and [link][REF2]."#;
5626        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5627
5628        // Verify we have 3 reference defs
5629        assert_eq!(ctx.reference_defs.len(), 3);
5630
5631        // Test get_reference_url with various cases
5632        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5633        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5634        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5635        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5636        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5637        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5638        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5639    }
5640
5641    #[test]
5642    fn test_reference_lookup_o1_get_reference_def() {
5643        let content = r#"[myref]: https://example.com "My Title"
5644"#;
5645        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5646
5647        // Test get_reference_def
5648        let def = ctx.get_reference_def("myref").expect("Should find myref");
5649        assert_eq!(def.url, "https://example.com");
5650        assert_eq!(def.title.as_deref(), Some("My Title"));
5651
5652        // Case insensitive
5653        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5654        assert_eq!(def2.url, "https://example.com");
5655
5656        // Non-existent
5657        assert!(ctx.get_reference_def("nonexistent").is_none());
5658    }
5659
5660    #[test]
5661    fn test_reference_lookup_o1_has_reference_def() {
5662        let content = r#"[foo]: /foo
5663[BAR]: /bar
5664"#;
5665        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5666
5667        // Test has_reference_def
5668        assert!(ctx.has_reference_def("foo"));
5669        assert!(ctx.has_reference_def("FOO")); // case insensitive
5670        assert!(ctx.has_reference_def("bar"));
5671        assert!(ctx.has_reference_def("Bar")); // case insensitive
5672        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5673    }
5674
5675    #[test]
5676    fn test_reference_lookup_o1_empty_content() {
5677        let content = "No references here.";
5678        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5679
5680        assert!(ctx.reference_defs.is_empty());
5681        assert_eq!(ctx.get_reference_url("anything"), None);
5682        assert!(ctx.get_reference_def("anything").is_none());
5683        assert!(!ctx.has_reference_def("anything"));
5684    }
5685
5686    #[test]
5687    fn test_reference_lookup_o1_special_characters_in_id() {
5688        let content = r#"[ref-with-dash]: /url1
5689[ref_with_underscore]: /url2
5690[ref.with.dots]: /url3
5691"#;
5692        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5693
5694        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5695        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5696        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5697    }
5698
5699    #[test]
5700    fn test_reference_lookup_o1_unicode_id() {
5701        let content = r#"[日本語]: /japanese
5702[émoji]: /emoji
5703"#;
5704        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5705
5706        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5707        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5708        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5709    }
5710}