rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::collections::HashMap;
10use std::path::PathBuf;
11use std::sync::LazyLock;
12
13/// Macro for profiling sections - only active in non-WASM builds
14#[cfg(not(target_arch = "wasm32"))]
15macro_rules! profile_section {
16    ($name:expr, $profile:expr, $code:expr) => {{
17        let start = std::time::Instant::now();
18        let result = $code;
19        if $profile {
20            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
21        }
22        result
23    }};
24}
25
26#[cfg(target_arch = "wasm32")]
27macro_rules! profile_section {
28    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
29}
30
31// Comprehensive link pattern that captures both inline and reference links
32// Use (?s) flag to make . match newlines
33static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
34    Regex::new(
35        r#"(?sx)
36        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
37        (?:
38            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
39            |
40            \[([^\]]*)\]      # Reference ID in group 6
41        )"#
42    ).unwrap()
43});
44
45// Image pattern (similar to links but with ! prefix)
46// Use (?s) flag to make . match newlines
47static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
48    Regex::new(
49        r#"(?sx)
50        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
51        (?:
52            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
53            |
54            \[([^\]]*)\]      # Reference ID in group 6
55        )"#
56    ).unwrap()
57});
58
59// Reference definition pattern
60static REF_DEF_PATTERN: LazyLock<Regex> =
61    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
62
63// Pattern for bare URLs - uses centralized URL pattern from regex_cache
64
65// Pattern for email addresses
66static BARE_EMAIL_PATTERN: LazyLock<Regex> =
67    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
68
69// Pattern for blockquote prefix in parse_list_blocks
70static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
71
72/// Pre-computed information about a line
73#[derive(Debug, Clone)]
74pub struct LineInfo {
75    /// Byte offset where this line starts in the document
76    pub byte_offset: usize,
77    /// Length of the line in bytes (without newline)
78    pub byte_len: usize,
79    /// Number of bytes of leading whitespace (for substring extraction)
80    pub indent: usize,
81    /// Visual column width of leading whitespace (with proper tab expansion)
82    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
83    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
84    pub visual_indent: usize,
85    /// Whether the line is blank (empty or only whitespace)
86    pub is_blank: bool,
87    /// Whether this line is inside a code block
88    pub in_code_block: bool,
89    /// Whether this line is inside front matter
90    pub in_front_matter: bool,
91    /// Whether this line is inside an HTML block
92    pub in_html_block: bool,
93    /// Whether this line is inside an HTML comment
94    pub in_html_comment: bool,
95    /// List item information if this line starts a list item
96    pub list_item: Option<ListItemInfo>,
97    /// Heading information if this line is a heading
98    pub heading: Option<HeadingInfo>,
99    /// Blockquote information if this line is a blockquote
100    pub blockquote: Option<BlockquoteInfo>,
101    /// Whether this line is inside a mkdocstrings autodoc block
102    pub in_mkdocstrings: bool,
103    /// Whether this line is part of an ESM import/export block (MDX only)
104    pub in_esm_block: bool,
105    /// Whether this line is a continuation of a multi-line code span from a previous line
106    pub in_code_span_continuation: bool,
107    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
108    /// Pre-computed for consistent detection across all rules
109    pub is_horizontal_rule: bool,
110    /// Whether this line is inside a math block ($$ ... $$)
111    pub in_math_block: bool,
112    /// Whether this line is inside a Quarto div block (::: ... :::)
113    pub in_quarto_div: bool,
114    /// Whether this line contains or is inside a JSX expression (MDX only)
115    pub in_jsx_expression: bool,
116    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
117    pub in_mdx_comment: bool,
118    /// Whether this line is inside a JSX component (MDX only)
119    pub in_jsx_component: bool,
120    /// Whether this line is inside a JSX fragment (MDX only)
121    pub in_jsx_fragment: bool,
122    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
123    pub in_admonition: bool,
124    /// Whether this line is inside an MkDocs content tab block (===)
125    pub in_content_tab: bool,
126    /// Whether this line is a definition list item (: definition)
127    pub in_definition_list: bool,
128}
129
130impl LineInfo {
131    /// Get the line content as a string slice from the source document
132    pub fn content<'a>(&self, source: &'a str) -> &'a str {
133        &source[self.byte_offset..self.byte_offset + self.byte_len]
134    }
135}
136
137/// Information about a list item
138#[derive(Debug, Clone)]
139pub struct ListItemInfo {
140    /// The marker used (*, -, +, or number with . or ))
141    pub marker: String,
142    /// Whether it's ordered (true) or unordered (false)
143    pub is_ordered: bool,
144    /// The number for ordered lists
145    pub number: Option<usize>,
146    /// Column where the marker starts (0-based)
147    pub marker_column: usize,
148    /// Column where content after marker starts
149    pub content_column: usize,
150}
151
152/// Heading style type
153#[derive(Debug, Clone, PartialEq)]
154pub enum HeadingStyle {
155    /// ATX style heading (# Heading)
156    ATX,
157    /// Setext style heading with = underline
158    Setext1,
159    /// Setext style heading with - underline
160    Setext2,
161}
162
163/// Parsed link information
164#[derive(Debug, Clone)]
165pub struct ParsedLink<'a> {
166    /// Line number (1-indexed)
167    pub line: usize,
168    /// Start column (0-indexed) in the line
169    pub start_col: usize,
170    /// End column (0-indexed) in the line
171    pub end_col: usize,
172    /// Byte offset in document
173    pub byte_offset: usize,
174    /// End byte offset in document
175    pub byte_end: usize,
176    /// Link text
177    pub text: Cow<'a, str>,
178    /// Link URL or reference
179    pub url: Cow<'a, str>,
180    /// Whether this is a reference link [text][ref] vs inline [text](url)
181    pub is_reference: bool,
182    /// Reference ID for reference links
183    pub reference_id: Option<Cow<'a, str>>,
184    /// Link type from pulldown-cmark
185    pub link_type: LinkType,
186}
187
188/// Information about a broken link reported by pulldown-cmark
189#[derive(Debug, Clone)]
190pub struct BrokenLinkInfo {
191    /// The reference text that couldn't be resolved
192    pub reference: String,
193    /// Byte span in the source document
194    pub span: std::ops::Range<usize>,
195}
196
197/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
198#[derive(Debug, Clone)]
199pub struct FootnoteRef {
200    /// The footnote ID (without the ^ prefix)
201    pub id: String,
202    /// Line number (1-indexed)
203    pub line: usize,
204    /// Start byte offset in document
205    pub byte_offset: usize,
206    /// End byte offset in document
207    pub byte_end: usize,
208}
209
210/// Parsed image information
211#[derive(Debug, Clone)]
212pub struct ParsedImage<'a> {
213    /// Line number (1-indexed)
214    pub line: usize,
215    /// Start column (0-indexed) in the line
216    pub start_col: usize,
217    /// End column (0-indexed) in the line
218    pub end_col: usize,
219    /// Byte offset in document
220    pub byte_offset: usize,
221    /// End byte offset in document
222    pub byte_end: usize,
223    /// Alt text
224    pub alt_text: Cow<'a, str>,
225    /// Image URL or reference
226    pub url: Cow<'a, str>,
227    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
228    pub is_reference: bool,
229    /// Reference ID for reference images
230    pub reference_id: Option<Cow<'a, str>>,
231    /// Link type from pulldown-cmark
232    pub link_type: LinkType,
233}
234
235/// Reference definition [ref]: url "title"
236#[derive(Debug, Clone)]
237pub struct ReferenceDef {
238    /// Line number (1-indexed)
239    pub line: usize,
240    /// Reference ID (normalized to lowercase)
241    pub id: String,
242    /// URL
243    pub url: String,
244    /// Optional title
245    pub title: Option<String>,
246    /// Byte offset where the reference definition starts
247    pub byte_offset: usize,
248    /// Byte offset where the reference definition ends
249    pub byte_end: usize,
250    /// Byte offset where the title starts (if present, includes quote)
251    pub title_byte_start: Option<usize>,
252    /// Byte offset where the title ends (if present, includes quote)
253    pub title_byte_end: Option<usize>,
254}
255
256/// Parsed code span information
257#[derive(Debug, Clone)]
258pub struct CodeSpan {
259    /// Line number where the code span starts (1-indexed)
260    pub line: usize,
261    /// Line number where the code span ends (1-indexed)
262    pub end_line: usize,
263    /// Start column (0-indexed) in the line
264    pub start_col: usize,
265    /// End column (0-indexed) in the line
266    pub end_col: usize,
267    /// Byte offset in document
268    pub byte_offset: usize,
269    /// End byte offset in document
270    pub byte_end: usize,
271    /// Number of backticks used (1, 2, 3, etc.)
272    pub backtick_count: usize,
273    /// Content inside the code span (without backticks)
274    pub content: String,
275}
276
277/// Parsed math span information (inline $...$ or display $$...$$)
278#[derive(Debug, Clone)]
279pub struct MathSpan {
280    /// Line number where the math span starts (1-indexed)
281    pub line: usize,
282    /// Line number where the math span ends (1-indexed)
283    pub end_line: usize,
284    /// Start column (0-indexed) in the line
285    pub start_col: usize,
286    /// End column (0-indexed) in the line
287    pub end_col: usize,
288    /// Byte offset in document
289    pub byte_offset: usize,
290    /// End byte offset in document
291    pub byte_end: usize,
292    /// Whether this is display math ($$...$$) vs inline ($...$)
293    pub is_display: bool,
294    /// Content inside the math delimiters
295    pub content: String,
296}
297
298/// Information about a heading
299#[derive(Debug, Clone)]
300pub struct HeadingInfo {
301    /// Heading level (1-6 for ATX, 1-2 for Setext)
302    pub level: u8,
303    /// Style of heading
304    pub style: HeadingStyle,
305    /// The heading marker (# characters or underline)
306    pub marker: String,
307    /// Column where the marker starts (0-based)
308    pub marker_column: usize,
309    /// Column where heading text starts
310    pub content_column: usize,
311    /// The heading text (without markers and without custom ID syntax)
312    pub text: String,
313    /// Custom header ID if present (e.g., from {#custom-id} syntax)
314    pub custom_id: Option<String>,
315    /// Original heading text including custom ID syntax
316    pub raw_text: String,
317    /// Whether it has a closing sequence (for ATX)
318    pub has_closing_sequence: bool,
319    /// The closing sequence if present
320    pub closing_sequence: String,
321    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
322    /// False for malformed headings like `#NoSpace` that MD018 should flag
323    pub is_valid: bool,
324}
325
326/// A valid heading from a filtered iteration
327///
328/// Only includes headings that are CommonMark-compliant (have space after #).
329/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
330#[derive(Debug, Clone)]
331pub struct ValidHeading<'a> {
332    /// The 1-indexed line number in the document
333    pub line_num: usize,
334    /// Reference to the heading information
335    pub heading: &'a HeadingInfo,
336    /// Reference to the full line info (for rules that need additional context)
337    pub line_info: &'a LineInfo,
338}
339
340/// Iterator over valid CommonMark headings in a document
341///
342/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
343/// but should not be processed by other heading rules.
344pub struct ValidHeadingsIter<'a> {
345    lines: &'a [LineInfo],
346    current_index: usize,
347}
348
349impl<'a> ValidHeadingsIter<'a> {
350    fn new(lines: &'a [LineInfo]) -> Self {
351        Self {
352            lines,
353            current_index: 0,
354        }
355    }
356}
357
358impl<'a> Iterator for ValidHeadingsIter<'a> {
359    type Item = ValidHeading<'a>;
360
361    fn next(&mut self) -> Option<Self::Item> {
362        while self.current_index < self.lines.len() {
363            let idx = self.current_index;
364            self.current_index += 1;
365
366            let line_info = &self.lines[idx];
367            if let Some(heading) = &line_info.heading
368                && heading.is_valid
369            {
370                return Some(ValidHeading {
371                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
372                    heading,
373                    line_info,
374                });
375            }
376        }
377        None
378    }
379}
380
381/// Information about a blockquote line
382#[derive(Debug, Clone)]
383pub struct BlockquoteInfo {
384    /// Nesting level (1 for >, 2 for >>, etc.)
385    pub nesting_level: usize,
386    /// The indentation before the blockquote marker
387    pub indent: String,
388    /// Column where the first > starts (0-based)
389    pub marker_column: usize,
390    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
391    pub prefix: String,
392    /// Content after the blockquote marker(s)
393    pub content: String,
394    /// Whether the line has no space after the marker
395    pub has_no_space_after_marker: bool,
396    /// Whether the line has multiple spaces after the marker
397    pub has_multiple_spaces_after_marker: bool,
398    /// Whether this is an empty blockquote line needing MD028 fix
399    pub needs_md028_fix: bool,
400}
401
402/// Information about a list block
403#[derive(Debug, Clone)]
404pub struct ListBlock {
405    /// Line number where the list starts (1-indexed)
406    pub start_line: usize,
407    /// Line number where the list ends (1-indexed)
408    pub end_line: usize,
409    /// Whether it's ordered or unordered
410    pub is_ordered: bool,
411    /// The consistent marker for unordered lists (if any)
412    pub marker: Option<String>,
413    /// Blockquote prefix for this list (empty if not in blockquote)
414    pub blockquote_prefix: String,
415    /// Lines that are list items within this block
416    pub item_lines: Vec<usize>,
417    /// Nesting level (0 for top-level lists)
418    pub nesting_level: usize,
419    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
420    pub max_marker_width: usize,
421}
422
423use std::sync::{Arc, OnceLock};
424
425/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
426type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
427
428/// Type alias for byte ranges used in JSX expression and MDX comment detection
429type ByteRanges = Vec<(usize, usize)>;
430
431/// Character frequency data for fast content analysis
432#[derive(Debug, Clone, Default)]
433pub struct CharFrequency {
434    /// Count of # characters (headings)
435    pub hash_count: usize,
436    /// Count of * characters (emphasis, lists, horizontal rules)
437    pub asterisk_count: usize,
438    /// Count of _ characters (emphasis, horizontal rules)
439    pub underscore_count: usize,
440    /// Count of - characters (lists, horizontal rules, setext headings)
441    pub hyphen_count: usize,
442    /// Count of + characters (lists)
443    pub plus_count: usize,
444    /// Count of > characters (blockquotes)
445    pub gt_count: usize,
446    /// Count of | characters (tables)
447    pub pipe_count: usize,
448    /// Count of [ characters (links, images)
449    pub bracket_count: usize,
450    /// Count of ` characters (code spans, code blocks)
451    pub backtick_count: usize,
452    /// Count of < characters (HTML tags, autolinks)
453    pub lt_count: usize,
454    /// Count of ! characters (images)
455    pub exclamation_count: usize,
456    /// Count of newline characters
457    pub newline_count: usize,
458}
459
460/// Pre-parsed HTML tag information
461#[derive(Debug, Clone)]
462pub struct HtmlTag {
463    /// Line number (1-indexed)
464    pub line: usize,
465    /// Start column (0-indexed) in the line
466    pub start_col: usize,
467    /// End column (0-indexed) in the line
468    pub end_col: usize,
469    /// Byte offset in document
470    pub byte_offset: usize,
471    /// End byte offset in document
472    pub byte_end: usize,
473    /// Tag name (e.g., "div", "img", "br")
474    pub tag_name: String,
475    /// Whether it's a closing tag (`</tag>`)
476    pub is_closing: bool,
477    /// Whether it's self-closing (`<tag />`)
478    pub is_self_closing: bool,
479    /// Raw tag content
480    pub raw_content: String,
481}
482
483/// Pre-parsed emphasis span information
484#[derive(Debug, Clone)]
485pub struct EmphasisSpan {
486    /// Line number (1-indexed)
487    pub line: usize,
488    /// Start column (0-indexed) in the line
489    pub start_col: usize,
490    /// End column (0-indexed) in the line
491    pub end_col: usize,
492    /// Byte offset in document
493    pub byte_offset: usize,
494    /// End byte offset in document
495    pub byte_end: usize,
496    /// Type of emphasis ('*' or '_')
497    pub marker: char,
498    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
499    pub marker_count: usize,
500    /// Content inside the emphasis
501    pub content: String,
502}
503
504/// Pre-parsed table row information
505#[derive(Debug, Clone)]
506pub struct TableRow {
507    /// Line number (1-indexed)
508    pub line: usize,
509    /// Whether this is a separator row (contains only |, -, :, and spaces)
510    pub is_separator: bool,
511    /// Number of columns (pipe-separated cells)
512    pub column_count: usize,
513    /// Alignment info from separator row
514    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
515}
516
517/// Pre-parsed bare URL information (not in links)
518#[derive(Debug, Clone)]
519pub struct BareUrl {
520    /// Line number (1-indexed)
521    pub line: usize,
522    /// Start column (0-indexed) in the line
523    pub start_col: usize,
524    /// End column (0-indexed) in the line
525    pub end_col: usize,
526    /// Byte offset in document
527    pub byte_offset: usize,
528    /// End byte offset in document
529    pub byte_end: usize,
530    /// The URL string
531    pub url: String,
532    /// Type of URL ("http", "https", "ftp", "email")
533    pub url_type: String,
534}
535
536pub struct LintContext<'a> {
537    pub content: &'a str,
538    pub line_offsets: Vec<usize>,
539    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
540    pub lines: Vec<LineInfo>,             // Pre-computed line information
541    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
542    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
543    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
544    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
545    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
546    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
547    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
548    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
549    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
550    pub char_frequency: CharFrequency,    // Character frequency analysis
551    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
552    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
553    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
554    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
555    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
556    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
557    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
558    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
559    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
560    pub flavor: MarkdownFlavor,           // Markdown flavor being used
561    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
562    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
563    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
564    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
565    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
566}
567
568/// Detailed blockquote parse result with all components
569struct BlockquoteComponents<'a> {
570    indent: &'a str,
571    markers: &'a str,
572    spaces_after: &'a str,
573    content: &'a str,
574}
575
576/// Parse blockquote prefix with detailed components using manual parsing
577#[inline]
578fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
579    let bytes = line.as_bytes();
580    let mut pos = 0;
581
582    // Parse leading whitespace (indent)
583    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
584        pos += 1;
585    }
586    let indent_end = pos;
587
588    // Must have at least one '>' marker
589    if pos >= bytes.len() || bytes[pos] != b'>' {
590        return None;
591    }
592
593    // Parse '>' markers
594    while pos < bytes.len() && bytes[pos] == b'>' {
595        pos += 1;
596    }
597    let markers_end = pos;
598
599    // Parse spaces after markers
600    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
601        pos += 1;
602    }
603    let spaces_end = pos;
604
605    Some(BlockquoteComponents {
606        indent: &line[0..indent_end],
607        markers: &line[indent_end..markers_end],
608        spaces_after: &line[markers_end..spaces_end],
609        content: &line[spaces_end..],
610    })
611}
612
613impl<'a> LintContext<'a> {
614    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
615        #[cfg(not(target_arch = "wasm32"))]
616        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
617        #[cfg(target_arch = "wasm32")]
618        let profile = false;
619
620        let line_offsets = profile_section!("Line offsets", profile, {
621            let mut offsets = vec![0];
622            for (i, c) in content.char_indices() {
623                if c == '\n' {
624                    offsets.push(i + 1);
625                }
626            }
627            offsets
628        });
629
630        // Detect code blocks once and cache them
631        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
632
633        // Pre-compute HTML comment ranges ONCE for all operations
634        let html_comment_ranges = profile_section!(
635            "HTML comment ranges",
636            profile,
637            crate::utils::skip_context::compute_html_comment_ranges(content)
638        );
639
640        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
641        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
642            if flavor == MarkdownFlavor::MkDocs {
643                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
644            } else {
645                Vec::new()
646            }
647        });
648
649        // Pre-compute Quarto div block ranges for Quarto flavor
650        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
651            if flavor == MarkdownFlavor::Quarto {
652                crate::utils::quarto_divs::detect_div_block_ranges(content)
653            } else {
654                Vec::new()
655            }
656        });
657
658        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
659        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
660        let (mut lines, emphasis_spans) = profile_section!(
661            "Basic line info",
662            profile,
663            Self::compute_basic_line_info(
664                content,
665                &line_offsets,
666                &code_blocks,
667                flavor,
668                &html_comment_ranges,
669                &autodoc_ranges,
670                &quarto_div_ranges,
671            )
672        );
673
674        // Detect HTML blocks BEFORE heading detection
675        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
676
677        // Detect ESM import/export blocks in MDX files BEFORE heading detection
678        profile_section!(
679            "ESM blocks",
680            profile,
681            Self::detect_esm_blocks(content, &mut lines, flavor)
682        );
683
684        // Detect JSX expressions and MDX comments in MDX files
685        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
686            "JSX/MDX detection",
687            profile,
688            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
689        );
690
691        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
692        profile_section!(
693            "MkDocs constructs",
694            profile,
695            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
696        );
697
698        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
699        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
700
701        // Now detect headings and blockquotes
702        profile_section!(
703            "Headings & blockquotes",
704            profile,
705            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
706        );
707
708        // Parse code spans early so we can exclude them from link/image parsing
709        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
710
711        // Mark lines that are continuations of multi-line code spans
712        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
713        for span in &code_spans {
714            if span.end_line > span.line {
715                // Mark lines after the first line as continuations
716                for line_num in (span.line + 1)..=span.end_line {
717                    if let Some(line_info) = lines.get_mut(line_num - 1) {
718                        line_info.in_code_span_continuation = true;
719                    }
720                }
721            }
722        }
723
724        // Parse links, images, references, and list blocks
725        let (links, broken_links, footnote_refs) = profile_section!(
726            "Links",
727            profile,
728            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
729        );
730
731        let images = profile_section!(
732            "Images",
733            profile,
734            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
735        );
736
737        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
738
739        // Build O(1) lookup map for reference definitions by lowercase ID
740        let reference_defs_map: HashMap<String, usize> = reference_defs
741            .iter()
742            .enumerate()
743            .map(|(idx, def)| (def.id.to_lowercase(), idx))
744            .collect();
745
746        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
747
748        // Compute character frequency for fast content analysis
749        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
750
751        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
752        let table_blocks = profile_section!(
753            "Table blocks",
754            profile,
755            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
756                content,
757                &code_blocks,
758                &code_spans,
759                &html_comment_ranges,
760            )
761        );
762
763        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
764        let line_index = profile_section!(
765            "Line index",
766            profile,
767            crate::utils::range_utils::LineIndex::new(content)
768        );
769
770        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
771        let jinja_ranges = profile_section!(
772            "Jinja ranges",
773            profile,
774            crate::utils::jinja_utils::find_jinja_ranges(content)
775        );
776
777        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
778        let citation_ranges = profile_section!("Citation ranges", profile, {
779            if flavor == MarkdownFlavor::Quarto {
780                crate::utils::quarto_divs::find_citation_ranges(content)
781            } else {
782                Vec::new()
783            }
784        });
785
786        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
787        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
788            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
789            let mut ranges = Vec::new();
790            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
791                ranges.push((mat.start(), mat.end()));
792            }
793            ranges
794        });
795
796        Self {
797            content,
798            line_offsets,
799            code_blocks,
800            lines,
801            links,
802            images,
803            broken_links,
804            footnote_refs,
805            reference_defs,
806            reference_defs_map,
807            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
808            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
809            list_blocks,
810            char_frequency,
811            html_tags_cache: OnceLock::new(),
812            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
813            table_rows_cache: OnceLock::new(),
814            bare_urls_cache: OnceLock::new(),
815            has_mixed_list_nesting_cache: OnceLock::new(),
816            html_comment_ranges,
817            table_blocks,
818            line_index,
819            jinja_ranges,
820            flavor,
821            source_file,
822            jsx_expression_ranges,
823            mdx_comment_ranges,
824            citation_ranges,
825            shortcode_ranges,
826        }
827    }
828
829    /// Get code spans - computed lazily on first access
830    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
831        Arc::clone(
832            self.code_spans_cache
833                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
834        )
835    }
836
837    /// Get math spans - computed lazily on first access
838    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
839        Arc::clone(
840            self.math_spans_cache
841                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
842        )
843    }
844
845    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
846    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
847        let math_spans = self.math_spans();
848        math_spans
849            .iter()
850            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
851    }
852
853    /// Get HTML comment ranges - pre-computed during LintContext construction
854    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
855        &self.html_comment_ranges
856    }
857
858    /// Get HTML tags - computed lazily on first access
859    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
860        Arc::clone(self.html_tags_cache.get_or_init(|| {
861            Arc::new(Self::parse_html_tags(
862                self.content,
863                &self.lines,
864                &self.code_blocks,
865                self.flavor,
866            ))
867        }))
868    }
869
870    /// Get emphasis spans - pre-computed during construction
871    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
872        Arc::clone(
873            self.emphasis_spans_cache
874                .get()
875                .expect("emphasis_spans_cache initialized during construction"),
876        )
877    }
878
879    /// Get table rows - computed lazily on first access
880    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
881        Arc::clone(
882            self.table_rows_cache
883                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
884        )
885    }
886
887    /// Get bare URLs - computed lazily on first access
888    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
889        Arc::clone(
890            self.bare_urls_cache
891                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
892        )
893    }
894
895    /// Check if document has mixed ordered/unordered list nesting.
896    /// Result is cached after first computation (document-level invariant).
897    /// This is used by MD007 for smart style auto-detection.
898    pub fn has_mixed_list_nesting(&self) -> bool {
899        *self
900            .has_mixed_list_nesting_cache
901            .get_or_init(|| self.compute_mixed_list_nesting())
902    }
903
904    /// Internal computation for mixed list nesting (only called once per LintContext).
905    fn compute_mixed_list_nesting(&self) -> bool {
906        // Track parent list items by their marker position and type
907        // Using marker_column instead of indent because it works correctly
908        // for blockquoted content where indent doesn't account for the prefix
909        // Stack stores: (marker_column, is_ordered)
910        let mut stack: Vec<(usize, bool)> = Vec::new();
911        let mut last_was_blank = false;
912
913        for line_info in &self.lines {
914            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
915            if line_info.in_code_block
916                || line_info.in_front_matter
917                || line_info.in_mkdocstrings
918                || line_info.in_html_comment
919                || line_info.in_esm_block
920            {
921                continue;
922            }
923
924            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
925            if line_info.is_blank {
926                last_was_blank = true;
927                continue;
928            }
929
930            if let Some(list_item) = &line_info.list_item {
931                // Normalize column 1 to column 0 (consistent with MD007 check function)
932                let current_pos = if list_item.marker_column == 1 {
933                    0
934                } else {
935                    list_item.marker_column
936                };
937
938                // If there was a blank line and this item is at root level, reset stack
939                if last_was_blank && current_pos == 0 {
940                    stack.clear();
941                }
942                last_was_blank = false;
943
944                // Pop items at same or greater position (they're siblings or deeper, not parents)
945                while let Some(&(pos, _)) = stack.last() {
946                    if pos >= current_pos {
947                        stack.pop();
948                    } else {
949                        break;
950                    }
951                }
952
953                // Check if immediate parent has different type - this is mixed nesting
954                if let Some(&(_, parent_is_ordered)) = stack.last()
955                    && parent_is_ordered != list_item.is_ordered
956                {
957                    return true; // Found mixed nesting - early exit
958                }
959
960                stack.push((current_pos, list_item.is_ordered));
961            } else {
962                // Non-list line (but not blank) - could be paragraph or other content
963                last_was_blank = false;
964            }
965        }
966
967        false
968    }
969
970    /// Map a byte offset to (line, column)
971    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
972        match self.line_offsets.binary_search(&offset) {
973            Ok(line) => (line + 1, 1),
974            Err(line) => {
975                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
976                (line, offset - line_start + 1)
977            }
978        }
979    }
980
981    /// Check if a position is within a code block or code span
982    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
983        // Check code blocks first
984        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
985            return true;
986        }
987
988        // Check inline code spans (lazy load if needed)
989        self.code_spans()
990            .iter()
991            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
992    }
993
994    /// Get line information by line number (1-indexed)
995    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
996        if line_num > 0 {
997            self.lines.get(line_num - 1)
998        } else {
999            None
1000        }
1001    }
1002
1003    /// Get byte offset for a line number (1-indexed)
1004    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1005        self.line_info(line_num).map(|info| info.byte_offset)
1006    }
1007
1008    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1009    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1010        let normalized_id = ref_id.to_lowercase();
1011        self.reference_defs_map
1012            .get(&normalized_id)
1013            .map(|&idx| self.reference_defs[idx].url.as_str())
1014    }
1015
1016    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1017    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1018        let normalized_id = ref_id.to_lowercase();
1019        self.reference_defs_map
1020            .get(&normalized_id)
1021            .map(|&idx| &self.reference_defs[idx])
1022    }
1023
1024    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1025    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1026        let normalized_id = ref_id.to_lowercase();
1027        self.reference_defs_map.contains_key(&normalized_id)
1028    }
1029
1030    /// Check if a line is part of a list block
1031    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1032        self.list_blocks
1033            .iter()
1034            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1035    }
1036
1037    /// Get the list block containing a specific line
1038    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1039        self.list_blocks
1040            .iter()
1041            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1042    }
1043
1044    // Compatibility methods for DocumentStructure migration
1045
1046    /// Check if a line is within a code block
1047    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1048        if line_num == 0 || line_num > self.lines.len() {
1049            return false;
1050        }
1051        self.lines[line_num - 1].in_code_block
1052    }
1053
1054    /// Check if a line is within front matter
1055    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1056        if line_num == 0 || line_num > self.lines.len() {
1057            return false;
1058        }
1059        self.lines[line_num - 1].in_front_matter
1060    }
1061
1062    /// Check if a line is within an HTML block
1063    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1064        if line_num == 0 || line_num > self.lines.len() {
1065            return false;
1066        }
1067        self.lines[line_num - 1].in_html_block
1068    }
1069
1070    /// Check if a line and column is within a code span
1071    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1072        if line_num == 0 || line_num > self.lines.len() {
1073            return false;
1074        }
1075
1076        // Use the code spans cache to check
1077        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1078        // Convert col to 0-indexed for comparison
1079        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1080        let code_spans = self.code_spans();
1081        code_spans.iter().any(|span| {
1082            // Check if line is within the span's line range
1083            if line_num < span.line || line_num > span.end_line {
1084                return false;
1085            }
1086
1087            if span.line == span.end_line {
1088                // Single-line span: check column bounds
1089                col_0indexed >= span.start_col && col_0indexed < span.end_col
1090            } else if line_num == span.line {
1091                // First line of multi-line span: anything after start_col is in span
1092                col_0indexed >= span.start_col
1093            } else if line_num == span.end_line {
1094                // Last line of multi-line span: anything before end_col is in span
1095                col_0indexed < span.end_col
1096            } else {
1097                // Middle line of multi-line span: entire line is in span
1098                true
1099            }
1100        })
1101    }
1102
1103    /// Check if a byte offset is within a code span
1104    #[inline]
1105    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1106        let code_spans = self.code_spans();
1107        code_spans
1108            .iter()
1109            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1110    }
1111
1112    /// Check if a byte position is within a reference definition
1113    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1114    #[inline]
1115    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1116        self.reference_defs
1117            .iter()
1118            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1119    }
1120
1121    /// Check if a byte position is within an HTML comment
1122    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1123    /// where k is the number of HTML comments (typically very small)
1124    #[inline]
1125    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1126        self.html_comment_ranges
1127            .iter()
1128            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1129    }
1130
1131    /// Check if a byte position is within an HTML tag (including multiline tags)
1132    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1133    #[inline]
1134    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1135        self.html_tags()
1136            .iter()
1137            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1138    }
1139
1140    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1141    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1142        self.jinja_ranges
1143            .iter()
1144            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1145    }
1146
1147    /// Check if a byte position is within a JSX expression (MDX: {expression})
1148    #[inline]
1149    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1150        self.jsx_expression_ranges
1151            .iter()
1152            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1153    }
1154
1155    /// Check if a byte position is within an MDX comment ({/* ... */})
1156    #[inline]
1157    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1158        self.mdx_comment_ranges
1159            .iter()
1160            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1161    }
1162
1163    /// Get all JSX expression byte ranges
1164    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1165        &self.jsx_expression_ranges
1166    }
1167
1168    /// Get all MDX comment byte ranges
1169    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1170        &self.mdx_comment_ranges
1171    }
1172
1173    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1174    /// Only active in Quarto flavor
1175    #[inline]
1176    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1177        self.citation_ranges
1178            .iter()
1179            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1180    }
1181
1182    /// Get all citation byte ranges (Quarto flavor only)
1183    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1184        &self.citation_ranges
1185    }
1186
1187    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1188    #[inline]
1189    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1190        self.shortcode_ranges
1191            .iter()
1192            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1193    }
1194
1195    /// Get all shortcode byte ranges
1196    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1197        &self.shortcode_ranges
1198    }
1199
1200    /// Check if a byte position is within a link reference definition title
1201    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1202        self.reference_defs.iter().any(|def| {
1203            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1204                byte_pos >= start && byte_pos < end
1205            } else {
1206                false
1207            }
1208        })
1209    }
1210
1211    /// Check if content has any instances of a specific character (fast)
1212    pub fn has_char(&self, ch: char) -> bool {
1213        match ch {
1214            '#' => self.char_frequency.hash_count > 0,
1215            '*' => self.char_frequency.asterisk_count > 0,
1216            '_' => self.char_frequency.underscore_count > 0,
1217            '-' => self.char_frequency.hyphen_count > 0,
1218            '+' => self.char_frequency.plus_count > 0,
1219            '>' => self.char_frequency.gt_count > 0,
1220            '|' => self.char_frequency.pipe_count > 0,
1221            '[' => self.char_frequency.bracket_count > 0,
1222            '`' => self.char_frequency.backtick_count > 0,
1223            '<' => self.char_frequency.lt_count > 0,
1224            '!' => self.char_frequency.exclamation_count > 0,
1225            '\n' => self.char_frequency.newline_count > 0,
1226            _ => self.content.contains(ch), // Fallback for other characters
1227        }
1228    }
1229
1230    /// Get count of a specific character (fast)
1231    pub fn char_count(&self, ch: char) -> usize {
1232        match ch {
1233            '#' => self.char_frequency.hash_count,
1234            '*' => self.char_frequency.asterisk_count,
1235            '_' => self.char_frequency.underscore_count,
1236            '-' => self.char_frequency.hyphen_count,
1237            '+' => self.char_frequency.plus_count,
1238            '>' => self.char_frequency.gt_count,
1239            '|' => self.char_frequency.pipe_count,
1240            '[' => self.char_frequency.bracket_count,
1241            '`' => self.char_frequency.backtick_count,
1242            '<' => self.char_frequency.lt_count,
1243            '!' => self.char_frequency.exclamation_count,
1244            '\n' => self.char_frequency.newline_count,
1245            _ => self.content.matches(ch).count(), // Fallback for other characters
1246        }
1247    }
1248
1249    /// Check if content likely contains headings (fast)
1250    pub fn likely_has_headings(&self) -> bool {
1251        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1252    }
1253
1254    /// Check if content likely contains lists (fast)
1255    pub fn likely_has_lists(&self) -> bool {
1256        self.char_frequency.asterisk_count > 0
1257            || self.char_frequency.hyphen_count > 0
1258            || self.char_frequency.plus_count > 0
1259    }
1260
1261    /// Check if content likely contains emphasis (fast)
1262    pub fn likely_has_emphasis(&self) -> bool {
1263        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1264    }
1265
1266    /// Check if content likely contains tables (fast)
1267    pub fn likely_has_tables(&self) -> bool {
1268        self.char_frequency.pipe_count > 2
1269    }
1270
1271    /// Check if content likely contains blockquotes (fast)
1272    pub fn likely_has_blockquotes(&self) -> bool {
1273        self.char_frequency.gt_count > 0
1274    }
1275
1276    /// Check if content likely contains code (fast)
1277    pub fn likely_has_code(&self) -> bool {
1278        self.char_frequency.backtick_count > 0
1279    }
1280
1281    /// Check if content likely contains links or images (fast)
1282    pub fn likely_has_links_or_images(&self) -> bool {
1283        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1284    }
1285
1286    /// Check if content likely contains HTML (fast)
1287    pub fn likely_has_html(&self) -> bool {
1288        self.char_frequency.lt_count > 0
1289    }
1290
1291    /// Get the blockquote prefix for inserting a blank line at the given line index.
1292    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1293    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1294    /// Returns an empty string if the line is not inside a blockquote.
1295    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1296        if let Some(line_info) = self.lines.get(line_idx)
1297            && let Some(ref bq) = line_info.blockquote
1298        {
1299            bq.prefix.trim_end().to_string()
1300        } else {
1301            String::new()
1302        }
1303    }
1304
1305    /// Get HTML tags on a specific line
1306    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1307        self.html_tags()
1308            .iter()
1309            .filter(|tag| tag.line == line_num)
1310            .cloned()
1311            .collect()
1312    }
1313
1314    /// Get emphasis spans on a specific line
1315    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1316        self.emphasis_spans()
1317            .iter()
1318            .filter(|span| span.line == line_num)
1319            .cloned()
1320            .collect()
1321    }
1322
1323    /// Get table rows on a specific line
1324    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1325        self.table_rows()
1326            .iter()
1327            .filter(|row| row.line == line_num)
1328            .cloned()
1329            .collect()
1330    }
1331
1332    /// Get bare URLs on a specific line
1333    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1334        self.bare_urls()
1335            .iter()
1336            .filter(|url| url.line == line_num)
1337            .cloned()
1338            .collect()
1339    }
1340
1341    /// Find the line index for a given byte offset using binary search.
1342    /// Returns (line_index, line_number, column) where:
1343    /// - line_index is the 0-based index in the lines array
1344    /// - line_number is the 1-based line number
1345    /// - column is the byte offset within that line
1346    #[inline]
1347    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1348        // Binary search to find the line containing this byte offset
1349        let idx = match lines.binary_search_by(|line| {
1350            if byte_offset < line.byte_offset {
1351                std::cmp::Ordering::Greater
1352            } else if byte_offset > line.byte_offset + line.byte_len {
1353                std::cmp::Ordering::Less
1354            } else {
1355                std::cmp::Ordering::Equal
1356            }
1357        }) {
1358            Ok(idx) => idx,
1359            Err(idx) => idx.saturating_sub(1),
1360        };
1361
1362        let line = &lines[idx];
1363        let line_num = idx + 1;
1364        let col = byte_offset.saturating_sub(line.byte_offset);
1365
1366        (idx, line_num, col)
1367    }
1368
1369    /// Check if a byte offset is within a code span using binary search
1370    #[inline]
1371    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1372        // Since spans are sorted by byte_offset, use partition_point for binary search
1373        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1374
1375        // Check the span that starts at or before our offset
1376        if idx > 0 {
1377            let span = &code_spans[idx - 1];
1378            if offset >= span.byte_offset && offset < span.byte_end {
1379                return true;
1380            }
1381        }
1382
1383        false
1384    }
1385
1386    /// Collect byte ranges of all links using pulldown-cmark
1387    /// This is used to skip heading detection for lines that fall within link syntax
1388    /// (e.g., multiline links like `[text](url\n#fragment)`)
1389    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1390        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1391
1392        let mut link_ranges = Vec::new();
1393        let mut options = Options::empty();
1394        options.insert(Options::ENABLE_WIKILINKS);
1395        options.insert(Options::ENABLE_FOOTNOTES);
1396
1397        let parser = Parser::new_ext(content, options).into_offset_iter();
1398        let mut link_stack: Vec<usize> = Vec::new();
1399
1400        for (event, range) in parser {
1401            match event {
1402                Event::Start(Tag::Link { .. }) => {
1403                    link_stack.push(range.start);
1404                }
1405                Event::End(TagEnd::Link) => {
1406                    if let Some(start_pos) = link_stack.pop() {
1407                        link_ranges.push((start_pos, range.end));
1408                    }
1409                }
1410                _ => {}
1411            }
1412        }
1413
1414        link_ranges
1415    }
1416
1417    /// Parse all links in the content
1418    fn parse_links(
1419        content: &'a str,
1420        lines: &[LineInfo],
1421        code_blocks: &[(usize, usize)],
1422        code_spans: &[CodeSpan],
1423        flavor: MarkdownFlavor,
1424        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1425    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1426        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1427        use std::collections::HashSet;
1428
1429        let mut links = Vec::with_capacity(content.len() / 500);
1430        let mut broken_links = Vec::new();
1431        let mut footnote_refs = Vec::new();
1432
1433        // Track byte positions of links found by pulldown-cmark
1434        let mut found_positions = HashSet::new();
1435
1436        // Use pulldown-cmark's streaming parser with BrokenLink callback
1437        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1438        // This automatically handles:
1439        // - Escaped links (won't generate events)
1440        // - Links in code blocks/spans (won't generate Link events)
1441        // - Images (generates Tag::Image instead)
1442        // - Reference resolution (dest_url is already resolved!)
1443        // - Broken references (callback is invoked)
1444        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1445        let mut options = Options::empty();
1446        options.insert(Options::ENABLE_WIKILINKS);
1447        options.insert(Options::ENABLE_FOOTNOTES);
1448
1449        let parser = Parser::new_with_broken_link_callback(
1450            content,
1451            options,
1452            Some(|link: BrokenLink<'_>| {
1453                broken_links.push(BrokenLinkInfo {
1454                    reference: link.reference.to_string(),
1455                    span: link.span.clone(),
1456                });
1457                None
1458            }),
1459        )
1460        .into_offset_iter();
1461
1462        let mut link_stack: Vec<(
1463            usize,
1464            usize,
1465            pulldown_cmark::CowStr<'a>,
1466            LinkType,
1467            pulldown_cmark::CowStr<'a>,
1468        )> = Vec::new();
1469        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1470
1471        for (event, range) in parser {
1472            match event {
1473                Event::Start(Tag::Link {
1474                    link_type,
1475                    dest_url,
1476                    id,
1477                    ..
1478                }) => {
1479                    // Link start - record position, URL, and reference ID
1480                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1481                    text_chunks.clear();
1482                }
1483                Event::Text(text) if !link_stack.is_empty() => {
1484                    // Track text content with its byte range
1485                    text_chunks.push((text.to_string(), range.start, range.end));
1486                }
1487                Event::Code(code) if !link_stack.is_empty() => {
1488                    // Include inline code in link text (with backticks)
1489                    let code_text = format!("`{code}`");
1490                    text_chunks.push((code_text, range.start, range.end));
1491                }
1492                Event::End(TagEnd::Link) => {
1493                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1494                        // Skip if in HTML comment
1495                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1496                            text_chunks.clear();
1497                            continue;
1498                        }
1499
1500                        // Find line and column information
1501                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1502
1503                        // Skip if this link is on a MkDocs snippet line
1504                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1505                            text_chunks.clear();
1506                            continue;
1507                        }
1508
1509                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1510
1511                        let is_reference = matches!(
1512                            link_type,
1513                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1514                        );
1515
1516                        // Extract link text directly from source bytes to preserve escaping
1517                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1518                        let link_text = if start_pos < content.len() {
1519                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1520
1521                            // Find MATCHING ] by tracking bracket depth for nested brackets
1522                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1523                            // Brackets inside code spans (between backticks) should be ignored
1524                            let mut close_pos = None;
1525                            let mut depth = 0;
1526                            let mut in_code_span = false;
1527
1528                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1529                                // Count preceding backslashes
1530                                let mut backslash_count = 0;
1531                                let mut j = i;
1532                                while j > 0 && link_bytes[j - 1] == b'\\' {
1533                                    backslash_count += 1;
1534                                    j -= 1;
1535                                }
1536                                let is_escaped = backslash_count % 2 != 0;
1537
1538                                // Track code spans - backticks toggle in/out of code
1539                                if byte == b'`' && !is_escaped {
1540                                    in_code_span = !in_code_span;
1541                                }
1542
1543                                // Only count brackets when NOT in a code span
1544                                if !is_escaped && !in_code_span {
1545                                    if byte == b'[' {
1546                                        depth += 1;
1547                                    } else if byte == b']' {
1548                                        if depth == 0 {
1549                                            // Found the matching closing bracket
1550                                            close_pos = Some(i);
1551                                            break;
1552                                        } else {
1553                                            depth -= 1;
1554                                        }
1555                                    }
1556                                }
1557                            }
1558
1559                            if let Some(pos) = close_pos {
1560                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1561                            } else {
1562                                Cow::Borrowed("")
1563                            }
1564                        } else {
1565                            Cow::Borrowed("")
1566                        };
1567
1568                        // For reference links, use the actual reference ID from pulldown-cmark
1569                        let reference_id = if is_reference && !ref_id.is_empty() {
1570                            Some(Cow::Owned(ref_id.to_lowercase()))
1571                        } else if is_reference {
1572                            // For collapsed/shortcut references without explicit ID, use the link text
1573                            Some(Cow::Owned(link_text.to_lowercase()))
1574                        } else {
1575                            None
1576                        };
1577
1578                        // Track this position as found
1579                        found_positions.insert(start_pos);
1580
1581                        links.push(ParsedLink {
1582                            line: line_num,
1583                            start_col: col_start,
1584                            end_col: col_end,
1585                            byte_offset: start_pos,
1586                            byte_end: range.end,
1587                            text: link_text,
1588                            url: Cow::Owned(url.to_string()),
1589                            is_reference,
1590                            reference_id,
1591                            link_type,
1592                        });
1593
1594                        text_chunks.clear();
1595                    }
1596                }
1597                Event::FootnoteReference(footnote_id) => {
1598                    // Capture footnote references like [^1], [^note]
1599                    // Skip if in HTML comment
1600                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1601                        continue;
1602                    }
1603
1604                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1605                    footnote_refs.push(FootnoteRef {
1606                        id: footnote_id.to_string(),
1607                        line: line_num,
1608                        byte_offset: range.start,
1609                        byte_end: range.end,
1610                    });
1611                }
1612                _ => {}
1613            }
1614        }
1615
1616        // Also find undefined references using regex
1617        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1618        // because the reference is undefined
1619        for cap in LINK_PATTERN.captures_iter(content) {
1620            let full_match = cap.get(0).unwrap();
1621            let match_start = full_match.start();
1622            let match_end = full_match.end();
1623
1624            // Skip if this was already found by pulldown-cmark (it's a valid link)
1625            if found_positions.contains(&match_start) {
1626                continue;
1627            }
1628
1629            // Skip if escaped
1630            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1631                continue;
1632            }
1633
1634            // Skip if it's an image
1635            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1636                continue;
1637            }
1638
1639            // Skip if in code block
1640            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1641                continue;
1642            }
1643
1644            // Skip if in code span
1645            if Self::is_offset_in_code_span(code_spans, match_start) {
1646                continue;
1647            }
1648
1649            // Skip if in HTML comment
1650            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1651                continue;
1652            }
1653
1654            // Find line and column information
1655            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1656
1657            // Skip if this link is on a MkDocs snippet line
1658            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1659                continue;
1660            }
1661
1662            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1663
1664            let text = cap.get(1).map_or("", |m| m.as_str());
1665
1666            // Only process reference links (group 6)
1667            if let Some(ref_id) = cap.get(6) {
1668                let ref_id_str = ref_id.as_str();
1669                let normalized_ref = if ref_id_str.is_empty() {
1670                    Cow::Owned(text.to_lowercase()) // Implicit reference
1671                } else {
1672                    Cow::Owned(ref_id_str.to_lowercase())
1673                };
1674
1675                // This is an undefined reference (pulldown-cmark didn't parse it)
1676                links.push(ParsedLink {
1677                    line: line_num,
1678                    start_col: col_start,
1679                    end_col: col_end,
1680                    byte_offset: match_start,
1681                    byte_end: match_end,
1682                    text: Cow::Borrowed(text),
1683                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1684                    is_reference: true,
1685                    reference_id: Some(normalized_ref),
1686                    link_type: LinkType::Reference, // Undefined references are reference-style
1687                });
1688            }
1689        }
1690
1691        (links, broken_links, footnote_refs)
1692    }
1693
1694    /// Parse all images in the content
1695    fn parse_images(
1696        content: &'a str,
1697        lines: &[LineInfo],
1698        code_blocks: &[(usize, usize)],
1699        code_spans: &[CodeSpan],
1700        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1701    ) -> Vec<ParsedImage<'a>> {
1702        use crate::utils::skip_context::is_in_html_comment_ranges;
1703        use std::collections::HashSet;
1704
1705        // Pre-size based on a heuristic: images are less common than links
1706        let mut images = Vec::with_capacity(content.len() / 1000);
1707        let mut found_positions = HashSet::new();
1708
1709        // Use pulldown-cmark for parsing - more accurate and faster
1710        let parser = Parser::new(content).into_offset_iter();
1711        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1712            Vec::new();
1713        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1714
1715        for (event, range) in parser {
1716            match event {
1717                Event::Start(Tag::Image {
1718                    link_type,
1719                    dest_url,
1720                    id,
1721                    ..
1722                }) => {
1723                    image_stack.push((range.start, dest_url, link_type, id));
1724                    text_chunks.clear();
1725                }
1726                Event::Text(text) if !image_stack.is_empty() => {
1727                    text_chunks.push((text.to_string(), range.start, range.end));
1728                }
1729                Event::Code(code) if !image_stack.is_empty() => {
1730                    let code_text = format!("`{code}`");
1731                    text_chunks.push((code_text, range.start, range.end));
1732                }
1733                Event::End(TagEnd::Image) => {
1734                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1735                        // Skip if in code block
1736                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1737                            continue;
1738                        }
1739
1740                        // Skip if in code span
1741                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1742                            continue;
1743                        }
1744
1745                        // Skip if in HTML comment
1746                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1747                            continue;
1748                        }
1749
1750                        // Find line and column using binary search
1751                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1752                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1753
1754                        let is_reference = matches!(
1755                            link_type,
1756                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1757                        );
1758
1759                        // Extract alt text directly from source bytes to preserve escaping
1760                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1761                        let alt_text = if start_pos < content.len() {
1762                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1763
1764                            // Find MATCHING ] by tracking bracket depth for nested brackets
1765                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1766                            let mut close_pos = None;
1767                            let mut depth = 0;
1768
1769                            if image_bytes.len() > 2 {
1770                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1771                                    // Count preceding backslashes
1772                                    let mut backslash_count = 0;
1773                                    let mut j = i;
1774                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1775                                        backslash_count += 1;
1776                                        j -= 1;
1777                                    }
1778                                    let is_escaped = backslash_count % 2 != 0;
1779
1780                                    if !is_escaped {
1781                                        if byte == b'[' {
1782                                            depth += 1;
1783                                        } else if byte == b']' {
1784                                            if depth == 0 {
1785                                                // Found the matching closing bracket
1786                                                close_pos = Some(i);
1787                                                break;
1788                                            } else {
1789                                                depth -= 1;
1790                                            }
1791                                        }
1792                                    }
1793                                }
1794                            }
1795
1796                            if let Some(pos) = close_pos {
1797                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1798                            } else {
1799                                Cow::Borrowed("")
1800                            }
1801                        } else {
1802                            Cow::Borrowed("")
1803                        };
1804
1805                        let reference_id = if is_reference && !ref_id.is_empty() {
1806                            Some(Cow::Owned(ref_id.to_lowercase()))
1807                        } else if is_reference {
1808                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1809                        } else {
1810                            None
1811                        };
1812
1813                        found_positions.insert(start_pos);
1814                        images.push(ParsedImage {
1815                            line: line_num,
1816                            start_col: col_start,
1817                            end_col: col_end,
1818                            byte_offset: start_pos,
1819                            byte_end: range.end,
1820                            alt_text,
1821                            url: Cow::Owned(url.to_string()),
1822                            is_reference,
1823                            reference_id,
1824                            link_type,
1825                        });
1826                    }
1827                }
1828                _ => {}
1829            }
1830        }
1831
1832        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1833        for cap in IMAGE_PATTERN.captures_iter(content) {
1834            let full_match = cap.get(0).unwrap();
1835            let match_start = full_match.start();
1836            let match_end = full_match.end();
1837
1838            // Skip if already found by pulldown-cmark
1839            if found_positions.contains(&match_start) {
1840                continue;
1841            }
1842
1843            // Skip if the ! is escaped
1844            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1845                continue;
1846            }
1847
1848            // Skip if in code block, code span, or HTML comment
1849            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1850                || Self::is_offset_in_code_span(code_spans, match_start)
1851                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1852            {
1853                continue;
1854            }
1855
1856            // Only process reference images (undefined references not found by pulldown-cmark)
1857            if let Some(ref_id) = cap.get(6) {
1858                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1859                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1860                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1861                let ref_id_str = ref_id.as_str();
1862                let normalized_ref = if ref_id_str.is_empty() {
1863                    Cow::Owned(alt_text.to_lowercase())
1864                } else {
1865                    Cow::Owned(ref_id_str.to_lowercase())
1866                };
1867
1868                images.push(ParsedImage {
1869                    line: line_num,
1870                    start_col: col_start,
1871                    end_col: col_end,
1872                    byte_offset: match_start,
1873                    byte_end: match_end,
1874                    alt_text: Cow::Borrowed(alt_text),
1875                    url: Cow::Borrowed(""),
1876                    is_reference: true,
1877                    reference_id: Some(normalized_ref),
1878                    link_type: LinkType::Reference, // Undefined references are reference-style
1879                });
1880            }
1881        }
1882
1883        images
1884    }
1885
1886    /// Parse reference definitions
1887    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1888        // Pre-size based on lines count as reference definitions are line-based
1889        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1890
1891        for (line_idx, line_info) in lines.iter().enumerate() {
1892            // Skip lines in code blocks
1893            if line_info.in_code_block {
1894                continue;
1895            }
1896
1897            let line = line_info.content(content);
1898            let line_num = line_idx + 1;
1899
1900            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1901                let id_raw = cap.get(1).unwrap().as_str();
1902
1903                // Skip footnote definitions - they use [^id]: syntax and are semantically
1904                // different from reference link definitions
1905                if id_raw.starts_with('^') {
1906                    continue;
1907                }
1908
1909                let id = id_raw.to_lowercase();
1910                let url = cap.get(2).unwrap().as_str().to_string();
1911                let title_match = cap.get(3).or_else(|| cap.get(4));
1912                let title = title_match.map(|m| m.as_str().to_string());
1913
1914                // Calculate byte positions
1915                // The match starts at the beginning of the line (0) and extends to the end
1916                let match_obj = cap.get(0).unwrap();
1917                let byte_offset = line_info.byte_offset + match_obj.start();
1918                let byte_end = line_info.byte_offset + match_obj.end();
1919
1920                // Calculate title byte positions (includes the quote character before content)
1921                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1922                    // The match is the content inside quotes, so we include the quote before
1923                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1924                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1925                    (Some(start), Some(end))
1926                } else {
1927                    (None, None)
1928                };
1929
1930                refs.push(ReferenceDef {
1931                    line: line_num,
1932                    id,
1933                    url,
1934                    title,
1935                    byte_offset,
1936                    byte_end,
1937                    title_byte_start,
1938                    title_byte_end,
1939                });
1940            }
1941        }
1942
1943        refs
1944    }
1945
1946    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1947    /// Handles nested blockquotes like `> > > content`
1948    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1949    #[inline]
1950    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1951        let trimmed_start = line.trim_start();
1952        if !trimmed_start.starts_with('>') {
1953            return None;
1954        }
1955
1956        // Track total prefix length to handle nested blockquotes
1957        let mut remaining = line;
1958        let mut total_prefix_len = 0;
1959
1960        loop {
1961            let trimmed = remaining.trim_start();
1962            if !trimmed.starts_with('>') {
1963                break;
1964            }
1965
1966            // Add leading whitespace + '>' to prefix
1967            let leading_ws_len = remaining.len() - trimmed.len();
1968            total_prefix_len += leading_ws_len + 1;
1969
1970            let after_gt = &trimmed[1..];
1971
1972            // Handle optional whitespace after '>' (space or tab)
1973            if let Some(stripped) = after_gt.strip_prefix(' ') {
1974                total_prefix_len += 1;
1975                remaining = stripped;
1976            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1977                total_prefix_len += 1;
1978                remaining = stripped;
1979            } else {
1980                remaining = after_gt;
1981            }
1982        }
1983
1984        Some((&line[..total_prefix_len], remaining))
1985    }
1986
1987    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
1988    ///
1989    /// Returns a HashMap keyed by line byte offset, containing:
1990    /// `(is_ordered, marker, marker_column, content_column, number)`
1991    ///
1992    /// ## Why pulldown-cmark?
1993    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
1994    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
1995    /// This fixes issue #253 where continuation lines were falsely detected.
1996    ///
1997    /// ## Tab indentation quirk
1998    /// Pulldown-cmark reports nested list items at the newline character position
1999    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2000    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2001    /// We detect this and advance to the correct line.
2002    ///
2003    /// ## HashMap key strategy
2004    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2005    /// that resolve to the same line (after newline adjustment). The first event
2006    /// for each line is authoritative.
2007    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2008    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2009    /// This avoids a separate parse for emphasis detection.
2010    fn detect_list_items_and_emphasis_with_pulldown(
2011        content: &str,
2012        line_offsets: &[usize],
2013        flavor: MarkdownFlavor,
2014        front_matter_end: usize,
2015        code_blocks: &[(usize, usize)],
2016    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2017        use std::collections::HashMap;
2018
2019        let mut list_items = HashMap::new();
2020        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2021
2022        let mut options = Options::empty();
2023        options.insert(Options::ENABLE_TABLES);
2024        options.insert(Options::ENABLE_FOOTNOTES);
2025        options.insert(Options::ENABLE_STRIKETHROUGH);
2026        options.insert(Options::ENABLE_TASKLISTS);
2027        // Always enable GFM features for consistency with existing behavior
2028        options.insert(Options::ENABLE_GFM);
2029
2030        // Suppress unused variable warning
2031        let _ = flavor;
2032
2033        let parser = Parser::new_ext(content, options).into_offset_iter();
2034        let mut list_depth: usize = 0;
2035        let mut list_stack: Vec<bool> = Vec::new();
2036
2037        for (event, range) in parser {
2038            match event {
2039                // Capture emphasis spans (for MD030's emphasis detection)
2040                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2041                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2042                        2
2043                    } else {
2044                        1
2045                    };
2046                    let match_start = range.start;
2047                    let match_end = range.end;
2048
2049                    // Skip if in code block
2050                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2051                        // Determine marker character by looking at the content at the start
2052                        let marker = content[match_start..].chars().next().unwrap_or('*');
2053                        if marker == '*' || marker == '_' {
2054                            // Extract content between markers
2055                            let content_start = match_start + marker_count;
2056                            let content_end = if match_end >= marker_count {
2057                                match_end - marker_count
2058                            } else {
2059                                match_end
2060                            };
2061                            let content_part = if content_start < content_end && content_end <= content.len() {
2062                                &content[content_start..content_end]
2063                            } else {
2064                                ""
2065                            };
2066
2067                            // Find which line this emphasis is on using line_offsets
2068                            let line_idx = match line_offsets.binary_search(&match_start) {
2069                                Ok(idx) => idx,
2070                                Err(idx) => idx.saturating_sub(1),
2071                            };
2072                            let line_num = line_idx + 1;
2073                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2074                            let col_start = match_start - line_start;
2075                            let col_end = match_end - line_start;
2076
2077                            emphasis_spans.push(EmphasisSpan {
2078                                line: line_num,
2079                                start_col: col_start,
2080                                end_col: col_end,
2081                                byte_offset: match_start,
2082                                byte_end: match_end,
2083                                marker,
2084                                marker_count,
2085                                content: content_part.to_string(),
2086                            });
2087                        }
2088                    }
2089                }
2090                Event::Start(Tag::List(start_number)) => {
2091                    list_depth += 1;
2092                    list_stack.push(start_number.is_some());
2093                }
2094                Event::End(TagEnd::List(_)) => {
2095                    list_depth = list_depth.saturating_sub(1);
2096                    list_stack.pop();
2097                }
2098                Event::Start(Tag::Item) if list_depth > 0 => {
2099                    // Get the ordered state for the CURRENT (innermost) list
2100                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2101                    // Find which line this byte offset corresponds to
2102                    let item_start = range.start;
2103
2104                    // Binary search to find the line number
2105                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2106                        Ok(idx) => idx,
2107                        Err(idx) => idx.saturating_sub(1),
2108                    };
2109
2110                    // Pulldown-cmark reports nested list items at the newline before the item
2111                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2112                    // Advance to the actual content line in this case.
2113                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2114                        line_idx += 1;
2115                    }
2116
2117                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2118                    if front_matter_end > 0 && line_idx < front_matter_end {
2119                        continue;
2120                    }
2121
2122                    if line_idx < line_offsets.len() {
2123                        let line_start_byte = line_offsets[line_idx];
2124                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2125                        let line = &content[line_start_byte..line_end.min(content.len())];
2126
2127                        // Strip trailing newline
2128                        let line = line
2129                            .strip_suffix('\n')
2130                            .or_else(|| line.strip_suffix("\r\n"))
2131                            .unwrap_or(line);
2132
2133                        // Strip blockquote prefix if present
2134                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2135                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2136                            (prefix.len(), content)
2137                        } else {
2138                            (0, line)
2139                        };
2140
2141                        // Parse the list marker from the actual line
2142                        if current_list_is_ordered {
2143                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2144                                Self::parse_ordered_list(line_to_parse)
2145                            {
2146                                let marker = format!("{number_str}{delimiter}");
2147                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2148                                let content_column = marker_column + marker.len() + spacing.len();
2149                                let number = number_str.parse().ok();
2150
2151                                list_items.entry(line_start_byte).or_insert((
2152                                    true,
2153                                    marker,
2154                                    marker_column,
2155                                    content_column,
2156                                    number,
2157                                ));
2158                            }
2159                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2160                            Self::parse_unordered_list(line_to_parse)
2161                        {
2162                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2163                            let content_column = marker_column + 1 + spacing.len();
2164
2165                            list_items.entry(line_start_byte).or_insert((
2166                                false,
2167                                marker.to_string(),
2168                                marker_column,
2169                                content_column,
2170                                None,
2171                            ));
2172                        }
2173                    }
2174                }
2175                _ => {}
2176            }
2177        }
2178
2179        (list_items, emphasis_spans)
2180    }
2181
2182    /// Fast unordered list parser - replaces regex for 5-10x speedup
2183    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2184    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2185    #[inline]
2186    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2187        let bytes = line.as_bytes();
2188        let mut i = 0;
2189
2190        // Skip leading whitespace
2191        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2192            i += 1;
2193        }
2194
2195        // Check for marker
2196        if i >= bytes.len() {
2197            return None;
2198        }
2199        let marker = bytes[i] as char;
2200        if marker != '-' && marker != '*' && marker != '+' {
2201            return None;
2202        }
2203        let marker_pos = i;
2204        i += 1;
2205
2206        // Collect spacing after marker (space or tab only)
2207        let spacing_start = i;
2208        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2209            i += 1;
2210        }
2211
2212        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2213    }
2214
2215    /// Fast ordered list parser - replaces regex for 5-10x speedup
2216    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2217    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2218    #[inline]
2219    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2220        let bytes = line.as_bytes();
2221        let mut i = 0;
2222
2223        // Skip leading whitespace
2224        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2225            i += 1;
2226        }
2227
2228        // Collect digits
2229        let number_start = i;
2230        while i < bytes.len() && bytes[i].is_ascii_digit() {
2231            i += 1;
2232        }
2233        if i == number_start {
2234            return None; // No digits found
2235        }
2236
2237        // Check for delimiter
2238        if i >= bytes.len() {
2239            return None;
2240        }
2241        let delimiter = bytes[i] as char;
2242        if delimiter != '.' && delimiter != ')' {
2243            return None;
2244        }
2245        let delimiter_pos = i;
2246        i += 1;
2247
2248        // Collect spacing after delimiter (space or tab only)
2249        let spacing_start = i;
2250        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2251            i += 1;
2252        }
2253
2254        Some((
2255            &line[..number_start],
2256            &line[number_start..delimiter_pos],
2257            delimiter,
2258            &line[spacing_start..i],
2259            &line[i..],
2260        ))
2261    }
2262
2263    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2264    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2265    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2266        let num_lines = line_offsets.len();
2267        let mut in_code_block = vec![false; num_lines];
2268
2269        // For each code block, mark all lines within it
2270        for &(start, end) in code_blocks {
2271            // Ensure we're at valid UTF-8 boundaries
2272            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2273                let mut boundary = start;
2274                while boundary > 0 && !content.is_char_boundary(boundary) {
2275                    boundary -= 1;
2276                }
2277                boundary
2278            } else {
2279                start
2280            };
2281
2282            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2283                let mut boundary = end;
2284                while boundary < content.len() && !content.is_char_boundary(boundary) {
2285                    boundary += 1;
2286                }
2287                boundary
2288            } else {
2289                end.min(content.len())
2290            };
2291
2292            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2293            // That function now has proper list context awareness (see code_block_utils.rs)
2294            // and correctly distinguishes between:
2295            // - Fenced code blocks (``` or ~~~)
2296            // - Indented code blocks at document level (4 spaces + blank line before)
2297            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2298            //
2299            // We no longer need to re-validate here. The original validation logic
2300            // was causing false positives by marking list continuation paragraphs as
2301            // code blocks when they have 4 spaces of indentation.
2302
2303            // Use binary search to find the first and last line indices
2304            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2305            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2306            //
2307            // Find the line that CONTAINS safe_start: the line with the largest
2308            // start offset that is <= safe_start. partition_point gives us the
2309            // first line that starts AFTER safe_start, so we subtract 1.
2310            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2311            let first_line = first_line_after.saturating_sub(1);
2312            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2313
2314            // Mark all lines in the range at once
2315            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2316                *flag = true;
2317            }
2318        }
2319
2320        in_code_block
2321    }
2322
2323    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2324    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2325    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2326        let content_lines: Vec<&str> = content.lines().collect();
2327        let num_lines = content_lines.len();
2328        let mut in_math_block = vec![false; num_lines];
2329
2330        let mut inside_math = false;
2331
2332        for (i, line) in content_lines.iter().enumerate() {
2333            // Skip lines that are in code blocks - math delimiters inside code are literal
2334            if code_block_map.get(i).copied().unwrap_or(false) {
2335                continue;
2336            }
2337
2338            let trimmed = line.trim();
2339
2340            // Check for math block delimiter ($$)
2341            // A line with just $$ toggles the math block state
2342            if trimmed == "$$" {
2343                if inside_math {
2344                    // Closing delimiter - this line is still part of the math block
2345                    in_math_block[i] = true;
2346                    inside_math = false;
2347                } else {
2348                    // Opening delimiter - this line starts the math block
2349                    in_math_block[i] = true;
2350                    inside_math = true;
2351                }
2352            } else if inside_math {
2353                // Content inside math block
2354                in_math_block[i] = true;
2355            }
2356        }
2357
2358        in_math_block
2359    }
2360
2361    /// Pre-compute basic line information (without headings/blockquotes)
2362    /// Also returns emphasis spans detected during the pulldown-cmark parse
2363    fn compute_basic_line_info(
2364        content: &str,
2365        line_offsets: &[usize],
2366        code_blocks: &[(usize, usize)],
2367        flavor: MarkdownFlavor,
2368        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2369        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2370        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2371    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2372        let content_lines: Vec<&str> = content.lines().collect();
2373        let mut lines = Vec::with_capacity(content_lines.len());
2374
2375        // Pre-compute which lines are in code blocks
2376        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2377
2378        // Pre-compute which lines are in math blocks ($$ ... $$)
2379        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2380
2381        // Detect front matter boundaries FIRST, before any other parsing
2382        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2383        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2384
2385        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2386        // (context-aware, eliminates false positives)
2387        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2388            content,
2389            line_offsets,
2390            flavor,
2391            front_matter_end,
2392            code_blocks,
2393        );
2394
2395        for (i, line) in content_lines.iter().enumerate() {
2396            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2397            let indent = line.len() - line.trim_start().len();
2398            // Compute visual indent with proper CommonMark tab expansion
2399            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2400
2401            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2402            let blockquote_parse = Self::parse_blockquote_prefix(line);
2403
2404            // For blank detection, consider blockquote context
2405            let is_blank = if let Some((_, content)) = blockquote_parse {
2406                // In blockquote context, check if content after prefix is blank
2407                content.trim().is_empty()
2408            } else {
2409                line.trim().is_empty()
2410            };
2411
2412            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2413            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2414
2415            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2416            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2417                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2418            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2419            // This ensures content after `-->` on the same line is not incorrectly skipped
2420            let line_end_offset = byte_offset + line.len();
2421            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2422                html_comment_ranges,
2423                byte_offset,
2424                line_end_offset,
2425            );
2426            // Use pulldown-cmark's list detection for context-aware parsing
2427            // This eliminates false positives on continuation lines (issue #253)
2428            let list_item =
2429                list_item_map
2430                    .get(&byte_offset)
2431                    .map(
2432                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2433                            marker: marker.clone(),
2434                            is_ordered: *is_ordered,
2435                            number: *number,
2436                            marker_column: *marker_column,
2437                            content_column: *content_column,
2438                        },
2439                    );
2440
2441            // Detect horizontal rules (only outside code blocks and frontmatter)
2442            // Uses CommonMark-compliant check including leading indentation validation
2443            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2444            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2445
2446            // Get math block status for this line
2447            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2448
2449            // Check if line is inside a Quarto div block
2450            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2451                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2452
2453            lines.push(LineInfo {
2454                byte_offset,
2455                byte_len: line.len(),
2456                indent,
2457                visual_indent,
2458                is_blank,
2459                in_code_block,
2460                in_front_matter,
2461                in_html_block: false, // Will be populated after line creation
2462                in_html_comment,
2463                list_item,
2464                heading: None,    // Will be populated in second pass for Setext headings
2465                blockquote: None, // Will be populated after line creation
2466                in_mkdocstrings,
2467                in_esm_block: false, // Will be populated after line creation for MDX files
2468                in_code_span_continuation: false, // Will be populated after code spans are parsed
2469                is_horizontal_rule: is_hr,
2470                in_math_block,
2471                in_quarto_div,
2472                in_jsx_expression: false,  // Will be populated for MDX files
2473                in_mdx_comment: false,     // Will be populated for MDX files
2474                in_jsx_component: false,   // Will be populated for MDX files
2475                in_jsx_fragment: false,    // Will be populated for MDX files
2476                in_admonition: false,      // Will be populated for MkDocs files
2477                in_content_tab: false,     // Will be populated for MkDocs files
2478                in_definition_list: false, // Will be populated for MkDocs files
2479            });
2480        }
2481
2482        (lines, emphasis_spans)
2483    }
2484
2485    /// Detect headings and blockquotes (called after HTML block detection)
2486    fn detect_headings_and_blockquotes(
2487        content: &str,
2488        lines: &mut [LineInfo],
2489        flavor: MarkdownFlavor,
2490        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2491        link_byte_ranges: &[(usize, usize)],
2492    ) {
2493        // Regex for heading detection
2494        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2495            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2496        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2497            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2498
2499        let content_lines: Vec<&str> = content.lines().collect();
2500
2501        // Detect front matter boundaries to skip those lines
2502        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2503
2504        // Detect headings (including Setext which needs look-ahead) and blockquotes
2505        for i in 0..lines.len() {
2506            let line = content_lines[i];
2507
2508            // Detect blockquotes FIRST, before any skip conditions.
2509            // A line can be both a blockquote AND contain a code block inside it.
2510            // We need to know about the blockquote marker regardless of code block status.
2511            // Skip only frontmatter lines - those are never blockquotes.
2512            if !(front_matter_end > 0 && i < front_matter_end)
2513                && let Some(bq) = parse_blockquote_detailed(line)
2514            {
2515                let nesting_level = bq.markers.len();
2516                let marker_column = bq.indent.len();
2517                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2518                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2519                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2520                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2521
2522                lines[i].blockquote = Some(BlockquoteInfo {
2523                    nesting_level,
2524                    indent: bq.indent.to_string(),
2525                    marker_column,
2526                    prefix,
2527                    content: bq.content.to_string(),
2528                    has_no_space_after_marker: has_no_space,
2529                    has_multiple_spaces_after_marker: has_multiple_spaces,
2530                    needs_md028_fix,
2531                });
2532
2533                // Update is_horizontal_rule for blockquote content
2534                // The original detection doesn't strip blockquote prefix, so we need to check here
2535                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2536                    lines[i].is_horizontal_rule = true;
2537                }
2538            }
2539
2540            // Now apply skip conditions for heading detection
2541            if lines[i].in_code_block {
2542                continue;
2543            }
2544
2545            // Skip lines in front matter
2546            if front_matter_end > 0 && i < front_matter_end {
2547                continue;
2548            }
2549
2550            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2551            if lines[i].in_html_block {
2552                continue;
2553            }
2554
2555            // Skip heading detection for blank lines
2556            if lines[i].is_blank {
2557                continue;
2558            }
2559
2560            // Check for ATX headings (but skip MkDocs snippet lines)
2561            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2562            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2563                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2564                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2565            } else {
2566                false
2567            };
2568
2569            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2570                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2571                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2572                    continue;
2573                }
2574                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2575                // This prevents false positives where `#fragment` is detected as a heading
2576                let line_offset = lines[i].byte_offset;
2577                if link_byte_ranges
2578                    .iter()
2579                    .any(|&(start, end)| line_offset > start && line_offset < end)
2580                {
2581                    continue;
2582                }
2583                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2584                let hashes = caps.get(2).map_or("", |m| m.as_str());
2585                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2586                let rest = caps.get(4).map_or("", |m| m.as_str());
2587
2588                let level = hashes.len() as u8;
2589                let marker_column = leading_spaces.len();
2590
2591                // Check for closing sequence, but handle custom IDs that might come after
2592                let (text, has_closing, closing_seq) = {
2593                    // First check if there's a custom ID at the end
2594                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2595                        // Check if this looks like a valid custom ID (ends with })
2596                        if rest[id_start..].trim_end().ends_with('}') {
2597                            // Split off the custom ID
2598                            (&rest[..id_start], &rest[id_start..])
2599                        } else {
2600                            (rest, "")
2601                        }
2602                    } else {
2603                        (rest, "")
2604                    };
2605
2606                    // Now look for closing hashes in the part before the custom ID
2607                    let trimmed_rest = rest_without_id.trim_end();
2608                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2609                        // Find the start of the hash sequence by walking backwards
2610                        // Use char_indices to get byte positions at char boundaries
2611                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2612
2613                        // Find which char index corresponds to last_hash_byte_pos
2614                        let last_hash_char_idx = char_positions
2615                            .iter()
2616                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2617
2618                        if let Some(mut char_idx) = last_hash_char_idx {
2619                            // Walk backwards to find start of hash sequence
2620                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2621                                char_idx -= 1;
2622                            }
2623
2624                            // Get the byte position of the start of hashes
2625                            let start_of_hashes = char_positions[char_idx].0;
2626
2627                            // Check if there's at least one space before the closing hashes
2628                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2629
2630                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2631                            let potential_closing = &trimmed_rest[start_of_hashes..];
2632                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2633
2634                            if is_all_hashes && has_space_before {
2635                                // This is a closing sequence
2636                                let closing_hashes = potential_closing.to_string();
2637                                // The text is everything before the closing hashes
2638                                // Don't include the custom ID here - it will be extracted later
2639                                let text_part = if !custom_id_part.is_empty() {
2640                                    // If we have a custom ID, append it back to get the full rest
2641                                    // This allows the extract_header_id function to handle it properly
2642                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2643                                } else {
2644                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2645                                };
2646                                (text_part, true, closing_hashes)
2647                            } else {
2648                                // Not a valid closing sequence, return the full content
2649                                (rest.to_string(), false, String::new())
2650                            }
2651                        } else {
2652                            // Couldn't find char boundary, return the full content
2653                            (rest.to_string(), false, String::new())
2654                        }
2655                    } else {
2656                        // No hashes found, return the full content
2657                        (rest.to_string(), false, String::new())
2658                    }
2659                };
2660
2661                let content_column = marker_column + hashes.len() + spaces_after.len();
2662
2663                // Extract custom header ID if present
2664                let raw_text = text.trim().to_string();
2665                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2666
2667                // If no custom ID was found on the header line, check the next line for standalone attr-list
2668                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2669                    let next_line = content_lines[i + 1];
2670                    if !lines[i + 1].in_code_block
2671                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2672                        && let Some(next_line_id) =
2673                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2674                    {
2675                        custom_id = Some(next_line_id);
2676                    }
2677                }
2678
2679                // ATX heading is "valid" for processing by heading rules if:
2680                // 1. Has space after # (CommonMark compliant): `# Heading`
2681                // 2. Is empty (just hashes): `#`
2682                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2683                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2684                //
2685                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2686                // - `#tag` - single # with lowercase (social hashtag)
2687                // - `#123` - single # with number (GitHub issue ref)
2688                let is_valid = !spaces_after.is_empty()
2689                    || rest.is_empty()
2690                    || level > 1
2691                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2692
2693                lines[i].heading = Some(HeadingInfo {
2694                    level,
2695                    style: HeadingStyle::ATX,
2696                    marker: hashes.to_string(),
2697                    marker_column,
2698                    content_column,
2699                    text: clean_text,
2700                    custom_id,
2701                    raw_text,
2702                    has_closing_sequence: has_closing,
2703                    closing_sequence: closing_seq,
2704                    is_valid,
2705                });
2706            }
2707            // Check for Setext headings (need to look at next line)
2708            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2709                let next_line = content_lines[i + 1];
2710                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2711                    // Skip if next line is front matter delimiter
2712                    if front_matter_end > 0 && i < front_matter_end {
2713                        continue;
2714                    }
2715
2716                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2717                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2718                    {
2719                        continue;
2720                    }
2721
2722                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2723                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2724                    let content_line = line.trim();
2725
2726                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2727                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2728                        continue;
2729                    }
2730
2731                    // Skip underscore thematic breaks (___)
2732                    if content_line.starts_with('_') {
2733                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2734                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2735                            continue;
2736                        }
2737                    }
2738
2739                    // Skip numbered lists (1. Item, 2. Item, etc.)
2740                    if let Some(first_char) = content_line.chars().next()
2741                        && first_char.is_ascii_digit()
2742                    {
2743                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2744                        if num_end < content_line.len() {
2745                            let next = content_line.chars().nth(num_end);
2746                            if next == Some('.') || next == Some(')') {
2747                                continue;
2748                            }
2749                        }
2750                    }
2751
2752                    // Skip ATX headings
2753                    if ATX_HEADING_REGEX.is_match(line) {
2754                        continue;
2755                    }
2756
2757                    // Skip blockquotes
2758                    if content_line.starts_with('>') {
2759                        continue;
2760                    }
2761
2762                    // Skip code fences
2763                    let trimmed_start = line.trim_start();
2764                    if trimmed_start.len() >= 3 {
2765                        let first_three: String = trimmed_start.chars().take(3).collect();
2766                        if first_three == "```" || first_three == "~~~" {
2767                            continue;
2768                        }
2769                    }
2770
2771                    // Skip HTML blocks
2772                    if content_line.starts_with('<') {
2773                        continue;
2774                    }
2775
2776                    let underline = next_line.trim();
2777
2778                    let level = if underline.starts_with('=') { 1 } else { 2 };
2779                    let style = if level == 1 {
2780                        HeadingStyle::Setext1
2781                    } else {
2782                        HeadingStyle::Setext2
2783                    };
2784
2785                    // Extract custom header ID if present
2786                    let raw_text = line.trim().to_string();
2787                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2788
2789                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2790                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2791                        let attr_line = content_lines[i + 2];
2792                        if !lines[i + 2].in_code_block
2793                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2794                            && let Some(attr_line_id) =
2795                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2796                        {
2797                            custom_id = Some(attr_line_id);
2798                        }
2799                    }
2800
2801                    lines[i].heading = Some(HeadingInfo {
2802                        level,
2803                        style,
2804                        marker: underline.to_string(),
2805                        marker_column: next_line.len() - next_line.trim_start().len(),
2806                        content_column: lines[i].indent,
2807                        text: clean_text,
2808                        custom_id,
2809                        raw_text,
2810                        has_closing_sequence: false,
2811                        closing_sequence: String::new(),
2812                        is_valid: true, // Setext headings are always valid
2813                    });
2814                }
2815            }
2816        }
2817    }
2818
2819    /// Detect HTML blocks in the content
2820    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2821        // HTML block elements that trigger block context
2822        // Includes HTML5 media, embedded content, and interactive elements
2823        const BLOCK_ELEMENTS: &[&str] = &[
2824            "address",
2825            "article",
2826            "aside",
2827            "audio",
2828            "blockquote",
2829            "canvas",
2830            "details",
2831            "dialog",
2832            "dd",
2833            "div",
2834            "dl",
2835            "dt",
2836            "embed",
2837            "fieldset",
2838            "figcaption",
2839            "figure",
2840            "footer",
2841            "form",
2842            "h1",
2843            "h2",
2844            "h3",
2845            "h4",
2846            "h5",
2847            "h6",
2848            "header",
2849            "hr",
2850            "iframe",
2851            "li",
2852            "main",
2853            "menu",
2854            "nav",
2855            "noscript",
2856            "object",
2857            "ol",
2858            "p",
2859            "picture",
2860            "pre",
2861            "script",
2862            "search",
2863            "section",
2864            "source",
2865            "style",
2866            "summary",
2867            "svg",
2868            "table",
2869            "tbody",
2870            "td",
2871            "template",
2872            "textarea",
2873            "tfoot",
2874            "th",
2875            "thead",
2876            "tr",
2877            "track",
2878            "ul",
2879            "video",
2880        ];
2881
2882        let mut i = 0;
2883        while i < lines.len() {
2884            // Skip if already in code block or front matter
2885            if lines[i].in_code_block || lines[i].in_front_matter {
2886                i += 1;
2887                continue;
2888            }
2889
2890            let trimmed = lines[i].content(content).trim_start();
2891
2892            // Check if line starts with an HTML tag
2893            if trimmed.starts_with('<') && trimmed.len() > 1 {
2894                // Extract tag name safely
2895                let after_bracket = &trimmed[1..];
2896                let is_closing = after_bracket.starts_with('/');
2897                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2898
2899                // Extract tag name (stop at space, >, /, or end of string)
2900                let tag_name = tag_start
2901                    .chars()
2902                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2903                    .collect::<String>()
2904                    .to_lowercase();
2905
2906                // Check if it's a block element
2907                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2908                    // Mark this line as in HTML block
2909                    lines[i].in_html_block = true;
2910
2911                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2912                    // This avoids complex nesting logic that might cause infinite loops
2913                    if !is_closing {
2914                        let closing_tag = format!("</{tag_name}>");
2915                        // style and script tags can contain blank lines (CSS/JS formatting)
2916                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2917                        let mut j = i + 1;
2918                        let mut found_closing_tag = false;
2919                        while j < lines.len() && j < i + 100 {
2920                            // Limit search to 100 lines
2921                            // Stop at blank lines (except for style/script tags)
2922                            if !allow_blank_lines && lines[j].is_blank {
2923                                break;
2924                            }
2925
2926                            lines[j].in_html_block = true;
2927
2928                            // Check if this line contains the closing tag
2929                            if lines[j].content(content).contains(&closing_tag) {
2930                                found_closing_tag = true;
2931                            }
2932
2933                            // After finding closing tag, continue marking lines as
2934                            // in_html_block until blank line (per CommonMark spec)
2935                            if found_closing_tag {
2936                                j += 1;
2937                                // Continue marking subsequent lines until blank
2938                                while j < lines.len() && j < i + 100 {
2939                                    if lines[j].is_blank {
2940                                        break;
2941                                    }
2942                                    lines[j].in_html_block = true;
2943                                    j += 1;
2944                                }
2945                                break;
2946                            }
2947                            j += 1;
2948                        }
2949                    }
2950                }
2951            }
2952
2953            i += 1;
2954        }
2955    }
2956
2957    /// Detect ESM import/export blocks anywhere in MDX files
2958    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
2959    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2960        // Only process MDX files
2961        if !flavor.supports_esm_blocks() {
2962            return;
2963        }
2964
2965        let mut in_multiline_import = false;
2966
2967        for line in lines.iter_mut() {
2968            // Skip code blocks, front matter, and HTML comments
2969            if line.in_code_block || line.in_front_matter || line.in_html_comment {
2970                in_multiline_import = false;
2971                continue;
2972            }
2973
2974            let line_content = line.content(content);
2975            let trimmed = line_content.trim();
2976
2977            // Handle continuation of multi-line import/export
2978            if in_multiline_import {
2979                line.in_esm_block = true;
2980                // Check if this line completes the statement
2981                // Multi-line import ends when we see the closing quote + optional semicolon
2982                if trimmed.ends_with('\'')
2983                    || trimmed.ends_with('"')
2984                    || trimmed.ends_with("';")
2985                    || trimmed.ends_with("\";")
2986                    || line_content.contains(';')
2987                {
2988                    in_multiline_import = false;
2989                }
2990                continue;
2991            }
2992
2993            // Skip blank lines
2994            if line.is_blank {
2995                continue;
2996            }
2997
2998            // Check if line starts with import or export
2999            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3000                line.in_esm_block = true;
3001
3002                // Determine if this is a complete single-line statement or starts a multi-line one
3003                // Multi-line imports look like:
3004                //   import {
3005                //     Foo,
3006                //     Bar
3007                //   } from 'module'
3008                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3009                let is_import = trimmed.starts_with("import ");
3010
3011                // Check for simple complete statements
3012                let is_complete =
3013                    // Ends with semicolon
3014                    trimmed.ends_with(';')
3015                    // import/export with from clause that ends with quote
3016                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3017                    // Simple export (export const/let/var/function/class without from)
3018                    || (!is_import && !trimmed.contains(" from ") && (
3019                        trimmed.starts_with("export const ")
3020                        || trimmed.starts_with("export let ")
3021                        || trimmed.starts_with("export var ")
3022                        || trimmed.starts_with("export function ")
3023                        || trimmed.starts_with("export class ")
3024                        || trimmed.starts_with("export default ")
3025                    ));
3026
3027                if !is_complete && is_import {
3028                    // Only imports can span multiple lines in the typical case
3029                    // Check if it looks like the start of a multi-line import
3030                    // e.g., "import {" or "import type {"
3031                    if trimmed.contains('{') && !trimmed.contains('}') {
3032                        in_multiline_import = true;
3033                    }
3034                }
3035            }
3036        }
3037    }
3038
3039    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3040    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3041    fn detect_jsx_and_mdx_comments(
3042        content: &str,
3043        lines: &mut [LineInfo],
3044        flavor: MarkdownFlavor,
3045        code_blocks: &[(usize, usize)],
3046    ) -> (ByteRanges, ByteRanges) {
3047        // Only process MDX files
3048        if !flavor.supports_jsx() {
3049            return (Vec::new(), Vec::new());
3050        }
3051
3052        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3053        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3054
3055        // Quick check - if no braces, no JSX expressions or MDX comments
3056        if !content.contains('{') {
3057            return (jsx_expression_ranges, mdx_comment_ranges);
3058        }
3059
3060        let bytes = content.as_bytes();
3061        let mut i = 0;
3062
3063        while i < bytes.len() {
3064            if bytes[i] == b'{' {
3065                // Check if we're in a code block
3066                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3067                    i += 1;
3068                    continue;
3069                }
3070
3071                let start = i;
3072
3073                // Check if it's an MDX comment: {/* ... */}
3074                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3075                    // Find the closing */}
3076                    let mut j = i + 3;
3077                    while j + 2 < bytes.len() {
3078                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3079                            let end = j + 3;
3080                            mdx_comment_ranges.push((start, end));
3081
3082                            // Mark lines as in MDX comment
3083                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3084                                line.in_mdx_comment = true;
3085                            });
3086
3087                            i = end;
3088                            break;
3089                        }
3090                        j += 1;
3091                    }
3092                    if j + 2 >= bytes.len() {
3093                        // Unclosed MDX comment - mark rest as comment
3094                        mdx_comment_ranges.push((start, bytes.len()));
3095                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3096                            line.in_mdx_comment = true;
3097                        });
3098                        break;
3099                    }
3100                } else {
3101                    // Regular JSX expression: { ... }
3102                    // Need to handle nested braces
3103                    let mut brace_depth = 1;
3104                    let mut j = i + 1;
3105                    let mut in_string = false;
3106                    let mut string_char = b'"';
3107
3108                    while j < bytes.len() && brace_depth > 0 {
3109                        let c = bytes[j];
3110
3111                        // Handle strings to avoid counting braces inside them
3112                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3113                            in_string = true;
3114                            string_char = c;
3115                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3116                            in_string = false;
3117                        } else if !in_string {
3118                            if c == b'{' {
3119                                brace_depth += 1;
3120                            } else if c == b'}' {
3121                                brace_depth -= 1;
3122                            }
3123                        }
3124                        j += 1;
3125                    }
3126
3127                    if brace_depth == 0 {
3128                        let end = j;
3129                        jsx_expression_ranges.push((start, end));
3130
3131                        // Mark lines as in JSX expression
3132                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3133                            line.in_jsx_expression = true;
3134                        });
3135
3136                        i = end;
3137                    } else {
3138                        i += 1;
3139                    }
3140                }
3141            } else {
3142                i += 1;
3143            }
3144        }
3145
3146        (jsx_expression_ranges, mdx_comment_ranges)
3147    }
3148
3149    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3150    /// and populate the corresponding fields in LineInfo
3151    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3152        if flavor != MarkdownFlavor::MkDocs {
3153            return;
3154        }
3155
3156        use crate::utils::mkdocs_admonitions;
3157        use crate::utils::mkdocs_definition_lists;
3158        use crate::utils::mkdocs_tabs;
3159
3160        let content_lines: Vec<&str> = content.lines().collect();
3161
3162        // Track admonition context
3163        let mut in_admonition = false;
3164        let mut admonition_indent = 0;
3165
3166        // Track tab context
3167        let mut in_tab = false;
3168        let mut tab_indent = 0;
3169
3170        // Track definition list context
3171        let mut in_definition = false;
3172
3173        for (i, line) in content_lines.iter().enumerate() {
3174            if i >= lines.len() {
3175                break;
3176            }
3177
3178            // Skip lines in code blocks
3179            if lines[i].in_code_block {
3180                continue;
3181            }
3182
3183            // Check for admonition markers
3184            if mkdocs_admonitions::is_admonition_start(line) {
3185                in_admonition = true;
3186                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3187                lines[i].in_admonition = true;
3188            } else if in_admonition {
3189                // Check if still in admonition content
3190                if line.trim().is_empty() {
3191                    // Blank lines are part of admonitions
3192                    lines[i].in_admonition = true;
3193                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3194                    lines[i].in_admonition = true;
3195                } else {
3196                    // End of admonition
3197                    in_admonition = false;
3198                    // Check if this line starts a new admonition
3199                    if mkdocs_admonitions::is_admonition_start(line) {
3200                        in_admonition = true;
3201                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3202                        lines[i].in_admonition = true;
3203                    }
3204                }
3205            }
3206
3207            // Check for tab markers
3208            if mkdocs_tabs::is_tab_marker(line) {
3209                in_tab = true;
3210                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3211                lines[i].in_content_tab = true;
3212            } else if in_tab {
3213                // Check if still in tab content
3214                if line.trim().is_empty() {
3215                    // Blank lines are part of tabs
3216                    lines[i].in_content_tab = true;
3217                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3218                    lines[i].in_content_tab = true;
3219                } else {
3220                    // End of tab content
3221                    in_tab = false;
3222                    // Check if this line starts a new tab
3223                    if mkdocs_tabs::is_tab_marker(line) {
3224                        in_tab = true;
3225                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3226                        lines[i].in_content_tab = true;
3227                    }
3228                }
3229            }
3230
3231            // Check for definition list items
3232            if mkdocs_definition_lists::is_definition_line(line) {
3233                in_definition = true;
3234                lines[i].in_definition_list = true;
3235            } else if in_definition {
3236                // Check if continuation
3237                if mkdocs_definition_lists::is_definition_continuation(line) {
3238                    lines[i].in_definition_list = true;
3239                } else if line.trim().is_empty() {
3240                    // Blank line might continue definition
3241                    lines[i].in_definition_list = true;
3242                } else if mkdocs_definition_lists::could_be_term_line(line) {
3243                    // This could be a new term - check if followed by definition
3244                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3245                    {
3246                        lines[i].in_definition_list = true;
3247                    } else {
3248                        in_definition = false;
3249                    }
3250                } else {
3251                    in_definition = false;
3252                }
3253            } else if mkdocs_definition_lists::could_be_term_line(line) {
3254                // Check if this is a term followed by a definition
3255                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3256                    lines[i].in_definition_list = true;
3257                    in_definition = true;
3258                }
3259            }
3260        }
3261    }
3262
3263    /// Helper to mark lines within a byte range
3264    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3265    where
3266        F: FnMut(&mut LineInfo),
3267    {
3268        // Find lines that overlap with the range
3269        for line in lines.iter_mut() {
3270            let line_start = line.byte_offset;
3271            let line_end = line.byte_offset + line.byte_len;
3272
3273            // Check if this line overlaps with the range
3274            if line_start < end && line_end > start {
3275                f(line);
3276            }
3277        }
3278
3279        // Silence unused warning for content (needed for signature consistency)
3280        let _ = content;
3281    }
3282
3283    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3284    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3285        let mut code_spans = Vec::new();
3286
3287        // Quick check - if no backticks, no code spans
3288        if !content.contains('`') {
3289            return code_spans;
3290        }
3291
3292        // Use pulldown-cmark's streaming parser with byte offsets
3293        let parser = Parser::new(content).into_offset_iter();
3294
3295        for (event, range) in parser {
3296            if let Event::Code(_) = event {
3297                let start_pos = range.start;
3298                let end_pos = range.end;
3299
3300                // The range includes the backticks, extract the actual content
3301                let full_span = &content[start_pos..end_pos];
3302                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3303
3304                // Extract content between backticks, preserving spaces
3305                let content_start = start_pos + backtick_count;
3306                let content_end = end_pos - backtick_count;
3307                let span_content = if content_start < content_end {
3308                    content[content_start..content_end].to_string()
3309                } else {
3310                    String::new()
3311                };
3312
3313                // Use binary search to find line number - O(log n) instead of O(n)
3314                // Find the rightmost line whose byte_offset <= start_pos
3315                let line_idx = lines
3316                    .partition_point(|line| line.byte_offset <= start_pos)
3317                    .saturating_sub(1);
3318                let line_num = line_idx + 1;
3319                let byte_col_start = start_pos - lines[line_idx].byte_offset;
3320
3321                // Find end column using binary search
3322                let end_line_idx = lines
3323                    .partition_point(|line| line.byte_offset <= end_pos)
3324                    .saturating_sub(1);
3325                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3326
3327                // Convert byte offsets to character positions for correct Unicode handling
3328                // This ensures consistency with warning.column which uses character positions
3329                let line_content = lines[line_idx].content(content);
3330                let col_start = if byte_col_start <= line_content.len() {
3331                    line_content[..byte_col_start].chars().count()
3332                } else {
3333                    line_content.chars().count()
3334                };
3335
3336                let end_line_content = lines[end_line_idx].content(content);
3337                let col_end = if byte_col_end <= end_line_content.len() {
3338                    end_line_content[..byte_col_end].chars().count()
3339                } else {
3340                    end_line_content.chars().count()
3341                };
3342
3343                code_spans.push(CodeSpan {
3344                    line: line_num,
3345                    end_line: end_line_idx + 1,
3346                    start_col: col_start,
3347                    end_col: col_end,
3348                    byte_offset: start_pos,
3349                    byte_end: end_pos,
3350                    backtick_count,
3351                    content: span_content,
3352                });
3353            }
3354        }
3355
3356        // Sort by position to ensure consistent ordering
3357        code_spans.sort_by_key(|span| span.byte_offset);
3358
3359        code_spans
3360    }
3361
3362    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3363    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3364        let mut math_spans = Vec::new();
3365
3366        // Quick check - if no $ signs, no math spans
3367        if !content.contains('$') {
3368            return math_spans;
3369        }
3370
3371        // Use pulldown-cmark with ENABLE_MATH option
3372        let mut options = Options::empty();
3373        options.insert(Options::ENABLE_MATH);
3374        let parser = Parser::new_ext(content, options).into_offset_iter();
3375
3376        for (event, range) in parser {
3377            let (is_display, math_content) = match &event {
3378                Event::InlineMath(text) => (false, text.as_ref()),
3379                Event::DisplayMath(text) => (true, text.as_ref()),
3380                _ => continue,
3381            };
3382
3383            let start_pos = range.start;
3384            let end_pos = range.end;
3385
3386            // Use binary search to find line number - O(log n) instead of O(n)
3387            let line_idx = lines
3388                .partition_point(|line| line.byte_offset <= start_pos)
3389                .saturating_sub(1);
3390            let line_num = line_idx + 1;
3391            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3392
3393            // Find end column using binary search
3394            let end_line_idx = lines
3395                .partition_point(|line| line.byte_offset <= end_pos)
3396                .saturating_sub(1);
3397            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3398
3399            // Convert byte offsets to character positions for correct Unicode handling
3400            let line_content = lines[line_idx].content(content);
3401            let col_start = if byte_col_start <= line_content.len() {
3402                line_content[..byte_col_start].chars().count()
3403            } else {
3404                line_content.chars().count()
3405            };
3406
3407            let end_line_content = lines[end_line_idx].content(content);
3408            let col_end = if byte_col_end <= end_line_content.len() {
3409                end_line_content[..byte_col_end].chars().count()
3410            } else {
3411                end_line_content.chars().count()
3412            };
3413
3414            math_spans.push(MathSpan {
3415                line: line_num,
3416                end_line: end_line_idx + 1,
3417                start_col: col_start,
3418                end_col: col_end,
3419                byte_offset: start_pos,
3420                byte_end: end_pos,
3421                is_display,
3422                content: math_content.to_string(),
3423            });
3424        }
3425
3426        // Sort by position to ensure consistent ordering
3427        math_spans.sort_by_key(|span| span.byte_offset);
3428
3429        math_spans
3430    }
3431
3432    /// Parse all list blocks in the content (legacy line-by-line approach)
3433    ///
3434    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3435    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3436    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3437    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3438    ///   treated as list continuation (based on the list marker width)
3439    ///
3440    /// When a new list item is encountered, we check if list-breaking content was seen
3441    /// since the last item. If so, we start a new list block.
3442    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3443        // Minimum indentation for unordered list continuation per CommonMark spec
3444        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3445
3446        /// Initialize or reset the forward-scanning tracking state.
3447        /// This helper eliminates code duplication across three initialization sites.
3448        #[inline]
3449        fn reset_tracking_state(
3450            list_item: &ListItemInfo,
3451            has_list_breaking_content: &mut bool,
3452            min_continuation: &mut usize,
3453        ) {
3454            *has_list_breaking_content = false;
3455            let marker_width = if list_item.is_ordered {
3456                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3457            } else {
3458                list_item.marker.len()
3459            };
3460            *min_continuation = if list_item.is_ordered {
3461                marker_width
3462            } else {
3463                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3464            };
3465        }
3466
3467        // Pre-size based on lines that could be list items
3468        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3469        let mut current_block: Option<ListBlock> = None;
3470        let mut last_list_item_line = 0;
3471        let mut current_indent_level = 0;
3472        let mut last_marker_width = 0;
3473
3474        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3475        let mut has_list_breaking_content_since_last_item = false;
3476        let mut min_continuation_for_tracking = 0;
3477
3478        for (line_idx, line_info) in lines.iter().enumerate() {
3479            let line_num = line_idx + 1;
3480
3481            // Enhanced code block handling using Design #3's context analysis
3482            if line_info.in_code_block {
3483                if let Some(ref mut block) = current_block {
3484                    // Calculate minimum indentation for list continuation
3485                    let min_continuation_indent =
3486                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3487
3488                    // Analyze code block context using the three-tier classification
3489                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3490
3491                    match context {
3492                        CodeBlockContext::Indented => {
3493                            // Code block is properly indented - continues the list
3494                            block.end_line = line_num;
3495                            continue;
3496                        }
3497                        CodeBlockContext::Standalone => {
3498                            // Code block separates lists - end current block
3499                            let completed_block = current_block.take().unwrap();
3500                            list_blocks.push(completed_block);
3501                            continue;
3502                        }
3503                        CodeBlockContext::Adjacent => {
3504                            // Edge case - use conservative behavior (continue list)
3505                            block.end_line = line_num;
3506                            continue;
3507                        }
3508                    }
3509                } else {
3510                    // No current list block - skip code block lines
3511                    continue;
3512                }
3513            }
3514
3515            // Extract blockquote prefix if any
3516            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3517                caps.get(0).unwrap().as_str().to_string()
3518            } else {
3519                String::new()
3520            };
3521
3522            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3523            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3524            if let Some(ref block) = current_block
3525                && line_info.list_item.is_none()
3526                && !line_info.is_blank
3527                && !line_info.in_code_span_continuation
3528            {
3529                let line_content = line_info.content(content).trim();
3530
3531                // Check for structural separators that break lists
3532                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3533                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3534                // as they indicate improper indentation rather than lazy continuation.
3535                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3536
3537                // Check if blockquote context changes (different prefix than current block)
3538                // Lines within the SAME blockquote context don't break lists
3539                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3540
3541                let breaks_list = line_info.heading.is_some()
3542                    || line_content.starts_with("---")
3543                    || line_content.starts_with("***")
3544                    || line_content.starts_with("___")
3545                    || crate::utils::skip_context::is_table_line(line_content)
3546                    || blockquote_prefix_changes
3547                    || (line_info.indent > 0
3548                        && line_info.indent < min_continuation_for_tracking
3549                        && !is_lazy_continuation);
3550
3551                if breaks_list {
3552                    has_list_breaking_content_since_last_item = true;
3553                }
3554            }
3555
3556            // If this line is a code span continuation within an active list block,
3557            // extend the block's end_line to include this line (maintains list continuity)
3558            if line_info.in_code_span_continuation
3559                && line_info.list_item.is_none()
3560                && let Some(ref mut block) = current_block
3561            {
3562                block.end_line = line_num;
3563            }
3564
3565            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3566            // properly indented lines within the list). This ensures the workaround at line 2448
3567            // works correctly when there are multiple continuation lines before a nested list item.
3568            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3569            // For blockquote lines, compute effective indent after stripping the prefix
3570            let effective_continuation_indent = if let Some(ref block) = current_block {
3571                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3572                let line_content = line_info.content(content);
3573                let line_bq_level = line_content
3574                    .chars()
3575                    .take_while(|c| *c == '>' || c.is_whitespace())
3576                    .filter(|&c| c == '>')
3577                    .count();
3578                if line_bq_level > 0 && line_bq_level == block_bq_level {
3579                    // Compute indent after blockquote markers
3580                    let mut pos = 0;
3581                    let mut found_markers = 0;
3582                    for c in line_content.chars() {
3583                        pos += c.len_utf8();
3584                        if c == '>' {
3585                            found_markers += 1;
3586                            if found_markers == line_bq_level {
3587                                if line_content.get(pos..pos + 1) == Some(" ") {
3588                                    pos += 1;
3589                                }
3590                                break;
3591                            }
3592                        }
3593                    }
3594                    let after_bq = &line_content[pos..];
3595                    after_bq.len() - after_bq.trim_start().len()
3596                } else {
3597                    line_info.indent
3598                }
3599            } else {
3600                line_info.indent
3601            };
3602            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3603                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3604                if block_bq_level > 0 {
3605                    if block.is_ordered { last_marker_width } else { 2 }
3606                } else {
3607                    min_continuation_for_tracking
3608                }
3609            } else {
3610                min_continuation_for_tracking
3611            };
3612            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3613                || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
3614
3615            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3616                eprintln!(
3617                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3618                    line_num,
3619                    effective_continuation_indent,
3620                    adjusted_min_continuation_for_tracking,
3621                    is_valid_continuation,
3622                    line_info.in_code_span_continuation,
3623                    line_info.in_code_block,
3624                    current_block.is_some()
3625                );
3626            }
3627
3628            if !line_info.in_code_span_continuation
3629                && line_info.list_item.is_none()
3630                && !line_info.is_blank
3631                && !line_info.in_code_block
3632                && is_valid_continuation
3633                && let Some(ref mut block) = current_block
3634            {
3635                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3636                    eprintln!(
3637                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3638                        line_num, block.end_line, line_num
3639                    );
3640                }
3641                block.end_line = line_num;
3642            }
3643
3644            // Check if this line is a list item
3645            if let Some(list_item) = &line_info.list_item {
3646                // Calculate nesting level based on indentation
3647                let item_indent = list_item.marker_column;
3648                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3649
3650                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3651                    eprintln!(
3652                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3653                        line_num, list_item.marker, item_indent
3654                    );
3655                }
3656
3657                if let Some(ref mut block) = current_block {
3658                    // Check if this continues the current block
3659                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3660                    // or a continuation at the same or lower level
3661                    let is_nested = nesting > block.nesting_level;
3662                    let same_type =
3663                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3664                    let same_context = block.blockquote_prefix == blockquote_prefix;
3665                    // Allow one blank line after last item, or lines immediately after block content
3666                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3667
3668                    // For unordered lists, also check marker consistency
3669                    let marker_compatible =
3670                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3671
3672                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3673                    // This eliminates the quadratic bottleneck from issue #148
3674                    let has_non_list_content = has_list_breaking_content_since_last_item;
3675
3676                    // A list continues if:
3677                    // 1. It's a nested item (indented more than the parent), OR
3678                    // 2. It's the same type at the same level with reasonable distance
3679                    let mut continues_list = if is_nested {
3680                        // Nested items always continue the list if they're in the same context
3681                        same_context && reasonable_distance && !has_non_list_content
3682                    } else {
3683                        // Same-level items need to match type and markers
3684                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3685                    };
3686
3687                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3688                        eprintln!(
3689                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3690                            line_num,
3691                            continues_list,
3692                            is_nested,
3693                            same_type,
3694                            same_context,
3695                            reasonable_distance,
3696                            marker_compatible,
3697                            has_non_list_content,
3698                            last_list_item_line,
3699                            block.end_line
3700                        );
3701                    }
3702
3703                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3704                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3705                    // Apply for: nested items (different types OK), OR same-level same-type items
3706                    if !continues_list
3707                        && (is_nested || same_type)
3708                        && reasonable_distance
3709                        && line_num > 0
3710                        && block.end_line == line_num - 1
3711                    {
3712                        // Check if the previous line was a list item or a continuation of a list item
3713                        // (including lazy continuation lines)
3714                        if block.item_lines.contains(&(line_num - 1)) {
3715                            // They're consecutive list items - force them to be in the same list
3716                            continues_list = true;
3717                        } else {
3718                            // Previous line is a continuation line within this block
3719                            // (e.g., lazy continuation with indent=0)
3720                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3721                            continues_list = true;
3722                        }
3723                    }
3724
3725                    if continues_list {
3726                        // Extend current block
3727                        block.end_line = line_num;
3728                        block.item_lines.push(line_num);
3729
3730                        // Update max marker width
3731                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3732                            list_item.marker.len() + 1
3733                        } else {
3734                            list_item.marker.len()
3735                        });
3736
3737                        // Update marker consistency for unordered lists
3738                        if !block.is_ordered
3739                            && block.marker.is_some()
3740                            && block.marker.as_ref() != Some(&list_item.marker)
3741                        {
3742                            // Mixed markers, clear the marker field
3743                            block.marker = None;
3744                        }
3745
3746                        // Reset tracked state for issue #148 optimization
3747                        reset_tracking_state(
3748                            list_item,
3749                            &mut has_list_breaking_content_since_last_item,
3750                            &mut min_continuation_for_tracking,
3751                        );
3752                    } else {
3753                        // End current block and start a new one
3754                        // When a different list type starts AT THE SAME LEVEL (not nested),
3755                        // trim back lazy continuation lines (they become part of the gap, not the list)
3756                        // For nested items, different types are fine - they're sub-lists
3757                        if !same_type
3758                            && !is_nested
3759                            && let Some(&last_item) = block.item_lines.last()
3760                        {
3761                            block.end_line = last_item;
3762                        }
3763
3764                        list_blocks.push(block.clone());
3765
3766                        *block = ListBlock {
3767                            start_line: line_num,
3768                            end_line: line_num,
3769                            is_ordered: list_item.is_ordered,
3770                            marker: if list_item.is_ordered {
3771                                None
3772                            } else {
3773                                Some(list_item.marker.clone())
3774                            },
3775                            blockquote_prefix: blockquote_prefix.clone(),
3776                            item_lines: vec![line_num],
3777                            nesting_level: nesting,
3778                            max_marker_width: if list_item.is_ordered {
3779                                list_item.marker.len() + 1
3780                            } else {
3781                                list_item.marker.len()
3782                            },
3783                        };
3784
3785                        // Initialize tracked state for new block (issue #148 optimization)
3786                        reset_tracking_state(
3787                            list_item,
3788                            &mut has_list_breaking_content_since_last_item,
3789                            &mut min_continuation_for_tracking,
3790                        );
3791                    }
3792                } else {
3793                    // Start a new block
3794                    current_block = Some(ListBlock {
3795                        start_line: line_num,
3796                        end_line: line_num,
3797                        is_ordered: list_item.is_ordered,
3798                        marker: if list_item.is_ordered {
3799                            None
3800                        } else {
3801                            Some(list_item.marker.clone())
3802                        },
3803                        blockquote_prefix,
3804                        item_lines: vec![line_num],
3805                        nesting_level: nesting,
3806                        max_marker_width: list_item.marker.len(),
3807                    });
3808
3809                    // Initialize tracked state for new block (issue #148 optimization)
3810                    reset_tracking_state(
3811                        list_item,
3812                        &mut has_list_breaking_content_since_last_item,
3813                        &mut min_continuation_for_tracking,
3814                    );
3815                }
3816
3817                last_list_item_line = line_num;
3818                current_indent_level = item_indent;
3819                last_marker_width = if list_item.is_ordered {
3820                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3821                } else {
3822                    list_item.marker.len()
3823                };
3824            } else if let Some(ref mut block) = current_block {
3825                // Not a list item - check if it continues the current block
3826                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3827                    eprintln!(
3828                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3829                        line_num, line_info.is_blank
3830                    );
3831                }
3832
3833                // For MD032 compatibility, we use a simple approach:
3834                // - Indented lines continue the list
3835                // - Blank lines followed by indented content continue the list
3836                // - Everything else ends the list
3837
3838                // Check if the last line in the list block ended with a backslash (hard line break)
3839                // This handles cases where list items use backslash for hard line breaks
3840                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3841                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3842                } else {
3843                    false
3844                };
3845
3846                // Calculate minimum indentation for list continuation
3847                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3848                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3849                let min_continuation_indent = if block.is_ordered {
3850                    current_indent_level + last_marker_width
3851                } else {
3852                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3853                };
3854
3855                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3856                    // Indented line or backslash continuation continues the list
3857                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3858                        eprintln!(
3859                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3860                            line_num, line_info.indent, min_continuation_indent
3861                        );
3862                    }
3863                    block.end_line = line_num;
3864                } else if line_info.is_blank {
3865                    // Blank line - check if it's internal to the list or ending it
3866                    // We only include blank lines that are followed by more list content
3867                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3868                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3869                    }
3870                    let mut check_idx = line_idx + 1;
3871                    let mut found_continuation = false;
3872
3873                    // Skip additional blank lines
3874                    while check_idx < lines.len() && lines[check_idx].is_blank {
3875                        check_idx += 1;
3876                    }
3877
3878                    if check_idx < lines.len() {
3879                        let next_line = &lines[check_idx];
3880                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3881                        let next_content = next_line.content(content);
3882                        // Use blockquote level (count of >) to compare, not the full prefix
3883                        // This avoids issues where the regex captures extra whitespace
3884                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3885                        let next_bq_level_for_indent = next_content
3886                            .chars()
3887                            .take_while(|c| *c == '>' || c.is_whitespace())
3888                            .filter(|&c| c == '>')
3889                            .count();
3890                        let effective_indent =
3891                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3892                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3893                                // Find position after ">" and one space
3894                                let mut pos = 0;
3895                                let mut found_markers = 0;
3896                                for c in next_content.chars() {
3897                                    pos += c.len_utf8();
3898                                    if c == '>' {
3899                                        found_markers += 1;
3900                                        if found_markers == next_bq_level_for_indent {
3901                                            // Skip optional space after last >
3902                                            if next_content.get(pos..pos + 1) == Some(" ") {
3903                                                pos += 1;
3904                                            }
3905                                            break;
3906                                        }
3907                                    }
3908                                }
3909                                let after_blockquote_marker = &next_content[pos..];
3910                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3911                            } else {
3912                                next_line.indent
3913                            };
3914                        // Also adjust min_continuation_indent for blockquote lists
3915                        // The marker_column includes blockquote prefix, so subtract it
3916                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3917                            // For blockquote lists, the continuation is relative to blockquote content
3918                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3919                            if block.is_ordered { last_marker_width } else { 2 }
3920                        } else {
3921                            min_continuation_indent
3922                        };
3923                        // Check if followed by indented content (list continuation)
3924                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3925                            eprintln!(
3926                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3927                                line_num,
3928                                check_idx + 1,
3929                                effective_indent,
3930                                adjusted_min_continuation,
3931                                next_line.list_item.is_some(),
3932                                next_line.in_code_block
3933                            );
3934                        }
3935                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3936                            found_continuation = true;
3937                        }
3938                        // Check if followed by another list item at the same level
3939                        else if !next_line.in_code_block
3940                            && next_line.list_item.is_some()
3941                            && let Some(item) = &next_line.list_item
3942                        {
3943                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3944                                .find(next_line.content(content))
3945                                .map_or(String::new(), |m| m.as_str().to_string());
3946                            if item.marker_column == current_indent_level
3947                                && item.is_ordered == block.is_ordered
3948                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3949                            {
3950                                // Check if there was meaningful content between the list items (unused now)
3951                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3952                                // Pre-compute block's blockquote level for use in closures
3953                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3954                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3955                                    if let Some(between_line) = lines.get(idx) {
3956                                        let between_content = between_line.content(content);
3957                                        let trimmed = between_content.trim();
3958                                        // Skip empty lines
3959                                        if trimmed.is_empty() {
3960                                            return false;
3961                                        }
3962                                        // Check for meaningful content
3963                                        let line_indent = between_content.len() - between_content.trim_start().len();
3964
3965                                        // Check if blockquote level changed (not just if line starts with ">")
3966                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3967                                            .find(between_content)
3968                                            .map_or(String::new(), |m| m.as_str().to_string());
3969                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3970                                        let blockquote_level_changed =
3971                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
3972
3973                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
3974                                        if trimmed.starts_with("```")
3975                                            || trimmed.starts_with("~~~")
3976                                            || trimmed.starts_with("---")
3977                                            || trimmed.starts_with("***")
3978                                            || trimmed.starts_with("___")
3979                                            || blockquote_level_changed
3980                                            || crate::utils::skip_context::is_table_line(trimmed)
3981                                            || between_line.heading.is_some()
3982                                        {
3983                                            return true; // These are structural separators - meaningful content that breaks lists
3984                                        }
3985
3986                                        // Only properly indented content continues the list
3987                                        line_indent >= min_continuation_indent
3988                                    } else {
3989                                        false
3990                                    }
3991                                });
3992
3993                                if block.is_ordered {
3994                                    // For ordered lists: don't continue if there are structural separators
3995                                    // Check if there are structural separators between the list items
3996                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3997                                        if let Some(between_line) = lines.get(idx) {
3998                                            let between_content = between_line.content(content);
3999                                            let trimmed = between_content.trim();
4000                                            if trimmed.is_empty() {
4001                                                return false;
4002                                            }
4003                                            // Check if blockquote level changed (not just if line starts with ">")
4004                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4005                                                .find(between_content)
4006                                                .map_or(String::new(), |m| m.as_str().to_string());
4007                                            let between_bq_level =
4008                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4009                                            let blockquote_level_changed =
4010                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4011                                            // Check for structural separators that break lists
4012                                            trimmed.starts_with("```")
4013                                                || trimmed.starts_with("~~~")
4014                                                || trimmed.starts_with("---")
4015                                                || trimmed.starts_with("***")
4016                                                || trimmed.starts_with("___")
4017                                                || blockquote_level_changed
4018                                                || crate::utils::skip_context::is_table_line(trimmed)
4019                                                || between_line.heading.is_some()
4020                                        } else {
4021                                            false
4022                                        }
4023                                    });
4024                                    found_continuation = !has_structural_separators;
4025                                } else {
4026                                    // For unordered lists: also check for structural separators
4027                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4028                                        if let Some(between_line) = lines.get(idx) {
4029                                            let between_content = between_line.content(content);
4030                                            let trimmed = between_content.trim();
4031                                            if trimmed.is_empty() {
4032                                                return false;
4033                                            }
4034                                            // Check if blockquote level changed (not just if line starts with ">")
4035                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4036                                                .find(between_content)
4037                                                .map_or(String::new(), |m| m.as_str().to_string());
4038                                            let between_bq_level =
4039                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4040                                            let blockquote_level_changed =
4041                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4042                                            // Check for structural separators that break lists
4043                                            trimmed.starts_with("```")
4044                                                || trimmed.starts_with("~~~")
4045                                                || trimmed.starts_with("---")
4046                                                || trimmed.starts_with("***")
4047                                                || trimmed.starts_with("___")
4048                                                || blockquote_level_changed
4049                                                || crate::utils::skip_context::is_table_line(trimmed)
4050                                                || between_line.heading.is_some()
4051                                        } else {
4052                                            false
4053                                        }
4054                                    });
4055                                    found_continuation = !has_structural_separators;
4056                                }
4057                            }
4058                        }
4059                    }
4060
4061                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4062                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4063                    }
4064                    if found_continuation {
4065                        // Include the blank line in the block
4066                        block.end_line = line_num;
4067                    } else {
4068                        // Blank line ends the list - don't include it
4069                        list_blocks.push(block.clone());
4070                        current_block = None;
4071                    }
4072                } else {
4073                    // Check for lazy continuation - non-indented line immediately after a list item
4074                    // But only if the line has sufficient indentation for the list type
4075                    let min_required_indent = if block.is_ordered {
4076                        current_indent_level + last_marker_width
4077                    } else {
4078                        current_indent_level + 2
4079                    };
4080
4081                    // For lazy continuation to apply, the line must either:
4082                    // 1. Have no indentation (true lazy continuation)
4083                    // 2. Have sufficient indentation for the list type
4084                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4085                    let line_content = line_info.content(content).trim();
4086
4087                    // Check for table-like patterns
4088                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4089
4090                    // Check if blockquote level changed (not just if line starts with ">")
4091                    // Lines within the same blockquote level are NOT structural separators
4092                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4093                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4094                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4095
4096                    let is_structural_separator = line_info.heading.is_some()
4097                        || line_content.starts_with("```")
4098                        || line_content.starts_with("~~~")
4099                        || line_content.starts_with("---")
4100                        || line_content.starts_with("***")
4101                        || line_content.starts_with("___")
4102                        || blockquote_level_changed
4103                        || looks_like_table;
4104
4105                    // Allow lazy continuation if we're still within the same list block
4106                    // (not just immediately after a list item)
4107                    // Also treat code span continuations as valid continuations regardless of indent
4108                    let is_lazy_continuation = !is_structural_separator
4109                        && !line_info.is_blank
4110                        && (line_info.indent == 0
4111                            || line_info.indent >= min_required_indent
4112                            || line_info.in_code_span_continuation);
4113
4114                    if is_lazy_continuation {
4115                        // Per CommonMark, lazy continuation continues until a blank line
4116                        // or structural element, regardless of uppercase at line start
4117                        block.end_line = line_num;
4118                    } else {
4119                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4120                        list_blocks.push(block.clone());
4121                        current_block = None;
4122                    }
4123                }
4124            }
4125        }
4126
4127        // Don't forget the last block
4128        if let Some(block) = current_block {
4129            list_blocks.push(block);
4130        }
4131
4132        // Merge adjacent blocks that should be one
4133        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4134
4135        list_blocks
4136    }
4137
4138    /// Compute character frequency for fast content analysis
4139    fn compute_char_frequency(content: &str) -> CharFrequency {
4140        let mut frequency = CharFrequency::default();
4141
4142        for ch in content.chars() {
4143            match ch {
4144                '#' => frequency.hash_count += 1,
4145                '*' => frequency.asterisk_count += 1,
4146                '_' => frequency.underscore_count += 1,
4147                '-' => frequency.hyphen_count += 1,
4148                '+' => frequency.plus_count += 1,
4149                '>' => frequency.gt_count += 1,
4150                '|' => frequency.pipe_count += 1,
4151                '[' => frequency.bracket_count += 1,
4152                '`' => frequency.backtick_count += 1,
4153                '<' => frequency.lt_count += 1,
4154                '!' => frequency.exclamation_count += 1,
4155                '\n' => frequency.newline_count += 1,
4156                _ => {}
4157            }
4158        }
4159
4160        frequency
4161    }
4162
4163    /// Parse HTML tags in the content
4164    fn parse_html_tags(
4165        content: &str,
4166        lines: &[LineInfo],
4167        code_blocks: &[(usize, usize)],
4168        flavor: MarkdownFlavor,
4169    ) -> Vec<HtmlTag> {
4170        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4171            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4172
4173        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4174
4175        for cap in HTML_TAG_REGEX.captures_iter(content) {
4176            let full_match = cap.get(0).unwrap();
4177            let match_start = full_match.start();
4178            let match_end = full_match.end();
4179
4180            // Skip if in code block
4181            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4182                continue;
4183            }
4184
4185            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4186            let tag_name_original = cap.get(2).unwrap().as_str();
4187            let tag_name = tag_name_original.to_lowercase();
4188            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4189
4190            // Skip JSX components in MDX files (tags starting with uppercase letter)
4191            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4192            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4193                continue;
4194            }
4195
4196            // Find which line this tag is on
4197            let mut line_num = 1;
4198            let mut col_start = match_start;
4199            let mut col_end = match_end;
4200            for (idx, line_info) in lines.iter().enumerate() {
4201                if match_start >= line_info.byte_offset {
4202                    line_num = idx + 1;
4203                    col_start = match_start - line_info.byte_offset;
4204                    col_end = match_end - line_info.byte_offset;
4205                } else {
4206                    break;
4207                }
4208            }
4209
4210            html_tags.push(HtmlTag {
4211                line: line_num,
4212                start_col: col_start,
4213                end_col: col_end,
4214                byte_offset: match_start,
4215                byte_end: match_end,
4216                tag_name,
4217                is_closing,
4218                is_self_closing,
4219                raw_content: full_match.as_str().to_string(),
4220            });
4221        }
4222
4223        html_tags
4224    }
4225
4226    /// Parse table rows in the content
4227    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4228        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4229
4230        for (line_idx, line_info) in lines.iter().enumerate() {
4231            // Skip lines in code blocks or blank lines
4232            if line_info.in_code_block || line_info.is_blank {
4233                continue;
4234            }
4235
4236            let line = line_info.content(content);
4237            let line_num = line_idx + 1;
4238
4239            // Check if this line contains pipes (potential table row)
4240            if !line.contains('|') {
4241                continue;
4242            }
4243
4244            // Count columns by splitting on pipes
4245            let parts: Vec<&str> = line.split('|').collect();
4246            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4247
4248            // Check if this is a separator row
4249            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4250            let mut column_alignments = Vec::new();
4251
4252            if is_separator {
4253                for part in &parts[1..parts.len() - 1] {
4254                    // Skip first and last empty parts
4255                    let trimmed = part.trim();
4256                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4257                        "center".to_string()
4258                    } else if trimmed.ends_with(':') {
4259                        "right".to_string()
4260                    } else if trimmed.starts_with(':') {
4261                        "left".to_string()
4262                    } else {
4263                        "none".to_string()
4264                    };
4265                    column_alignments.push(alignment);
4266                }
4267            }
4268
4269            table_rows.push(TableRow {
4270                line: line_num,
4271                is_separator,
4272                column_count,
4273                column_alignments,
4274            });
4275        }
4276
4277        table_rows
4278    }
4279
4280    /// Parse bare URLs and emails in the content
4281    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4282        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4283
4284        // Check for bare URLs (not in angle brackets or markdown links)
4285        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4286            let full_match = cap.get(0).unwrap();
4287            let match_start = full_match.start();
4288            let match_end = full_match.end();
4289
4290            // Skip if in code block
4291            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4292                continue;
4293            }
4294
4295            // Skip if already in angle brackets or markdown links
4296            let preceding_char = if match_start > 0 {
4297                content.chars().nth(match_start - 1)
4298            } else {
4299                None
4300            };
4301            let following_char = content.chars().nth(match_end);
4302
4303            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4304                continue;
4305            }
4306            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4307                continue;
4308            }
4309
4310            let url = full_match.as_str();
4311            let url_type = if url.starts_with("https://") {
4312                "https"
4313            } else if url.starts_with("http://") {
4314                "http"
4315            } else if url.starts_with("ftp://") {
4316                "ftp"
4317            } else {
4318                "other"
4319            };
4320
4321            // Find which line this URL is on
4322            let mut line_num = 1;
4323            let mut col_start = match_start;
4324            let mut col_end = match_end;
4325            for (idx, line_info) in lines.iter().enumerate() {
4326                if match_start >= line_info.byte_offset {
4327                    line_num = idx + 1;
4328                    col_start = match_start - line_info.byte_offset;
4329                    col_end = match_end - line_info.byte_offset;
4330                } else {
4331                    break;
4332                }
4333            }
4334
4335            bare_urls.push(BareUrl {
4336                line: line_num,
4337                start_col: col_start,
4338                end_col: col_end,
4339                byte_offset: match_start,
4340                byte_end: match_end,
4341                url: url.to_string(),
4342                url_type: url_type.to_string(),
4343            });
4344        }
4345
4346        // Check for bare email addresses
4347        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4348            let full_match = cap.get(0).unwrap();
4349            let match_start = full_match.start();
4350            let match_end = full_match.end();
4351
4352            // Skip if in code block
4353            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4354                continue;
4355            }
4356
4357            // Skip if already in angle brackets or markdown links
4358            let preceding_char = if match_start > 0 {
4359                content.chars().nth(match_start - 1)
4360            } else {
4361                None
4362            };
4363            let following_char = content.chars().nth(match_end);
4364
4365            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4366                continue;
4367            }
4368            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4369                continue;
4370            }
4371
4372            let email = full_match.as_str();
4373
4374            // Find which line this email is on
4375            let mut line_num = 1;
4376            let mut col_start = match_start;
4377            let mut col_end = match_end;
4378            for (idx, line_info) in lines.iter().enumerate() {
4379                if match_start >= line_info.byte_offset {
4380                    line_num = idx + 1;
4381                    col_start = match_start - line_info.byte_offset;
4382                    col_end = match_end - line_info.byte_offset;
4383                } else {
4384                    break;
4385                }
4386            }
4387
4388            bare_urls.push(BareUrl {
4389                line: line_num,
4390                start_col: col_start,
4391                end_col: col_end,
4392                byte_offset: match_start,
4393                byte_end: match_end,
4394                url: email.to_string(),
4395                url_type: "email".to_string(),
4396            });
4397        }
4398
4399        bare_urls
4400    }
4401
4402    /// Get an iterator over valid CommonMark headings
4403    ///
4404    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4405    /// that should be flagged by MD018 but should not be processed by other heading rules.
4406    ///
4407    /// # Examples
4408    ///
4409    /// ```rust
4410    /// use rumdl_lib::lint_context::LintContext;
4411    /// use rumdl_lib::config::MarkdownFlavor;
4412    ///
4413    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4414    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4415    ///
4416    /// for heading in ctx.valid_headings() {
4417    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4418    /// }
4419    /// // Only prints valid headings, skips `#NoSpace`
4420    /// ```
4421    #[must_use]
4422    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4423        ValidHeadingsIter::new(&self.lines)
4424    }
4425
4426    /// Check if the document contains any valid CommonMark headings
4427    ///
4428    /// Returns `true` if there is at least one heading with proper space after `#`.
4429    #[must_use]
4430    pub fn has_valid_headings(&self) -> bool {
4431        self.lines
4432            .iter()
4433            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4434    }
4435}
4436
4437/// Merge adjacent list blocks that should be treated as one
4438fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4439    if list_blocks.len() < 2 {
4440        return;
4441    }
4442
4443    let mut merger = ListBlockMerger::new(content, lines);
4444    *list_blocks = merger.merge(list_blocks);
4445}
4446
4447/// Helper struct to manage the complex logic of merging list blocks
4448struct ListBlockMerger<'a> {
4449    content: &'a str,
4450    lines: &'a [LineInfo],
4451}
4452
4453impl<'a> ListBlockMerger<'a> {
4454    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4455        Self { content, lines }
4456    }
4457
4458    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4459        let mut merged = Vec::with_capacity(list_blocks.len());
4460        let mut current = list_blocks[0].clone();
4461
4462        for next in list_blocks.iter().skip(1) {
4463            if self.should_merge_blocks(&current, next) {
4464                current = self.merge_two_blocks(current, next);
4465            } else {
4466                merged.push(current);
4467                current = next.clone();
4468            }
4469        }
4470
4471        merged.push(current);
4472        merged
4473    }
4474
4475    /// Determine if two adjacent list blocks should be merged
4476    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4477        // Basic compatibility checks
4478        if !self.blocks_are_compatible(current, next) {
4479            return false;
4480        }
4481
4482        // Check spacing and content between blocks
4483        let spacing = self.analyze_spacing_between(current, next);
4484        match spacing {
4485            BlockSpacing::Consecutive => true,
4486            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4487            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4488                self.can_merge_with_content_between(current, next)
4489            }
4490        }
4491    }
4492
4493    /// Check if blocks have compatible structure for merging
4494    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4495        current.is_ordered == next.is_ordered
4496            && current.blockquote_prefix == next.blockquote_prefix
4497            && current.nesting_level == next.nesting_level
4498    }
4499
4500    /// Analyze the spacing between two list blocks
4501    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4502        let gap = next.start_line - current.end_line;
4503
4504        match gap {
4505            1 => BlockSpacing::Consecutive,
4506            2 => BlockSpacing::SingleBlank,
4507            _ if gap > 2 => {
4508                if self.has_only_blank_lines_between(current, next) {
4509                    BlockSpacing::MultipleBlanks
4510                } else {
4511                    BlockSpacing::ContentBetween
4512                }
4513            }
4514            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4515        }
4516    }
4517
4518    /// Check if unordered lists can be merged with a single blank line between
4519    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4520        // Check if there are structural separators between the blocks
4521        // If has_meaningful_content_between returns true, it means there are structural separators
4522        if has_meaningful_content_between(self.content, current, next, self.lines) {
4523            return false; // Structural separators prevent merging
4524        }
4525
4526        // Only merge unordered lists with same marker across single blank
4527        !current.is_ordered && current.marker == next.marker
4528    }
4529
4530    /// Check if ordered lists can be merged when there's content between them
4531    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4532        // Do not merge lists if there are structural separators between them
4533        if has_meaningful_content_between(self.content, current, next, self.lines) {
4534            return false; // Structural separators prevent merging
4535        }
4536
4537        // Only consider merging ordered lists if there's no structural content between
4538        current.is_ordered && next.is_ordered
4539    }
4540
4541    /// Check if there are only blank lines between blocks
4542    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4543        for line_num in (current.end_line + 1)..next.start_line {
4544            if let Some(line_info) = self.lines.get(line_num - 1)
4545                && !line_info.content(self.content).trim().is_empty()
4546            {
4547                return false;
4548            }
4549        }
4550        true
4551    }
4552
4553    /// Merge two compatible list blocks into one
4554    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4555        current.end_line = next.end_line;
4556        current.item_lines.extend_from_slice(&next.item_lines);
4557
4558        // Update max marker width
4559        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4560
4561        // Handle marker consistency for unordered lists
4562        if !current.is_ordered && self.markers_differ(&current, next) {
4563            current.marker = None; // Mixed markers
4564        }
4565
4566        current
4567    }
4568
4569    /// Check if two blocks have different markers
4570    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4571        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4572    }
4573}
4574
4575/// Types of spacing between list blocks
4576#[derive(Debug, PartialEq)]
4577enum BlockSpacing {
4578    Consecutive,    // No gap between blocks
4579    SingleBlank,    // One blank line between blocks
4580    MultipleBlanks, // Multiple blank lines but no content
4581    ContentBetween, // Content exists between blocks
4582}
4583
4584/// Check if there's meaningful content (not just blank lines) between two list blocks
4585fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4586    // Check lines between current.end_line and next.start_line
4587    for line_num in (current.end_line + 1)..next.start_line {
4588        if let Some(line_info) = lines.get(line_num - 1) {
4589            // Convert to 0-indexed
4590            let trimmed = line_info.content(content).trim();
4591
4592            // Skip empty lines
4593            if trimmed.is_empty() {
4594                continue;
4595            }
4596
4597            // Check for structural separators that should separate lists (CommonMark compliant)
4598
4599            // Headings separate lists
4600            if line_info.heading.is_some() {
4601                return true; // Has meaningful content - headings separate lists
4602            }
4603
4604            // Horizontal rules separate lists (---, ***, ___)
4605            if is_horizontal_rule(trimmed) {
4606                return true; // Has meaningful content - horizontal rules separate lists
4607            }
4608
4609            // Tables separate lists
4610            if crate::utils::skip_context::is_table_line(trimmed) {
4611                return true; // Has meaningful content - tables separate lists
4612            }
4613
4614            // Blockquotes separate lists
4615            if trimmed.starts_with('>') {
4616                return true; // Has meaningful content - blockquotes separate lists
4617            }
4618
4619            // Code block fences separate lists (unless properly indented as list content)
4620            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4621                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4622
4623                // Check if this code block is properly indented as list continuation
4624                let min_continuation_indent = if current.is_ordered {
4625                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4626                } else {
4627                    current.nesting_level + 2
4628                };
4629
4630                if line_indent < min_continuation_indent {
4631                    // This is a standalone code block that separates lists
4632                    return true; // Has meaningful content - standalone code blocks separate lists
4633                }
4634            }
4635
4636            // Check if this line has proper indentation for list continuation
4637            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4638
4639            // Calculate minimum indentation needed to be list continuation
4640            let min_indent = if current.is_ordered {
4641                current.nesting_level + current.max_marker_width
4642            } else {
4643                current.nesting_level + 2
4644            };
4645
4646            // If the line is not indented enough to be list continuation, it's meaningful content
4647            if line_indent < min_indent {
4648                return true; // Has meaningful content - content not indented as list continuation
4649            }
4650
4651            // If we reach here, the line is properly indented as list continuation
4652            // Continue checking other lines
4653        }
4654    }
4655
4656    // Only blank lines or properly indented list continuation content between blocks
4657    false
4658}
4659
4660/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4661/// CommonMark rules for thematic breaks (horizontal rules):
4662/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4663/// - Must have 3+ of the same character (-, *, or _)
4664/// - May have spaces between characters
4665/// - No other characters allowed
4666pub fn is_horizontal_rule_line(line: &str) -> bool {
4667    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4668    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4669    if leading_spaces > 3 || line.starts_with('\t') {
4670        return false;
4671    }
4672
4673    is_horizontal_rule_content(line.trim())
4674}
4675
4676/// Check if trimmed content matches horizontal rule pattern.
4677/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4678pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4679    if trimmed.len() < 3 {
4680        return false;
4681    }
4682
4683    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4684    let chars: Vec<char> = trimmed.chars().collect();
4685    if let Some(&first_char) = chars.first()
4686        && (first_char == '-' || first_char == '*' || first_char == '_')
4687    {
4688        let mut count = 0;
4689        for &ch in &chars {
4690            if ch == first_char {
4691                count += 1;
4692            } else if ch != ' ' && ch != '\t' {
4693                return false; // Non-matching, non-whitespace character
4694            }
4695        }
4696        return count >= 3;
4697    }
4698    false
4699}
4700
4701/// Backwards-compatible alias for `is_horizontal_rule_content`
4702pub fn is_horizontal_rule(trimmed: &str) -> bool {
4703    is_horizontal_rule_content(trimmed)
4704}
4705
4706/// Check if content contains patterns that cause the markdown crate to panic
4707#[cfg(test)]
4708mod tests {
4709    use super::*;
4710
4711    #[test]
4712    fn test_empty_content() {
4713        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4714        assert_eq!(ctx.content, "");
4715        assert_eq!(ctx.line_offsets, vec![0]);
4716        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4717        assert_eq!(ctx.lines.len(), 0);
4718    }
4719
4720    #[test]
4721    fn test_single_line() {
4722        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4723        assert_eq!(ctx.content, "# Hello");
4724        assert_eq!(ctx.line_offsets, vec![0]);
4725        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4726        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4727    }
4728
4729    #[test]
4730    fn test_multi_line() {
4731        let content = "# Title\n\nSecond line\nThird line";
4732        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4733        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4734        // Test offset to line/col
4735        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4736        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4737        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4738        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4739        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4740    }
4741
4742    #[test]
4743    fn test_line_info() {
4744        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4745        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4746
4747        // Test line info
4748        assert_eq!(ctx.lines.len(), 7);
4749
4750        // Line 1: "# Title"
4751        let line1 = &ctx.lines[0];
4752        assert_eq!(line1.content(ctx.content), "# Title");
4753        assert_eq!(line1.byte_offset, 0);
4754        assert_eq!(line1.indent, 0);
4755        assert!(!line1.is_blank);
4756        assert!(!line1.in_code_block);
4757        assert!(line1.list_item.is_none());
4758
4759        // Line 2: "    indented"
4760        let line2 = &ctx.lines[1];
4761        assert_eq!(line2.content(ctx.content), "    indented");
4762        assert_eq!(line2.byte_offset, 8);
4763        assert_eq!(line2.indent, 4);
4764        assert!(!line2.is_blank);
4765
4766        // Line 3: "" (blank)
4767        let line3 = &ctx.lines[2];
4768        assert_eq!(line3.content(ctx.content), "");
4769        assert!(line3.is_blank);
4770
4771        // Test helper methods
4772        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4773        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4774        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4775        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4776    }
4777
4778    #[test]
4779    fn test_list_item_detection() {
4780        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4781        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4782
4783        // Line 1: "- Unordered item"
4784        let line1 = &ctx.lines[0];
4785        assert!(line1.list_item.is_some());
4786        let list1 = line1.list_item.as_ref().unwrap();
4787        assert_eq!(list1.marker, "-");
4788        assert!(!list1.is_ordered);
4789        assert_eq!(list1.marker_column, 0);
4790        assert_eq!(list1.content_column, 2);
4791
4792        // Line 2: "  * Nested item"
4793        let line2 = &ctx.lines[1];
4794        assert!(line2.list_item.is_some());
4795        let list2 = line2.list_item.as_ref().unwrap();
4796        assert_eq!(list2.marker, "*");
4797        assert_eq!(list2.marker_column, 2);
4798
4799        // Line 3: "1. Ordered item"
4800        let line3 = &ctx.lines[2];
4801        assert!(line3.list_item.is_some());
4802        let list3 = line3.list_item.as_ref().unwrap();
4803        assert_eq!(list3.marker, "1.");
4804        assert!(list3.is_ordered);
4805        assert_eq!(list3.number, Some(1));
4806
4807        // Line 6: "Not a list"
4808        let line6 = &ctx.lines[5];
4809        assert!(line6.list_item.is_none());
4810    }
4811
4812    #[test]
4813    fn test_offset_to_line_col_edge_cases() {
4814        let content = "a\nb\nc";
4815        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4816        // line_offsets: [0, 2, 4]
4817        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4818        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4819        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4820        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4821        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4822        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4823    }
4824
4825    #[test]
4826    fn test_mdx_esm_blocks() {
4827        let content = r##"import {Chart} from './snowfall.js'
4828export const year = 2023
4829
4830# Last year's snowfall
4831
4832In {year}, the snowfall was above average.
4833It was followed by a warm spring which caused
4834flood conditions in many of the nearby rivers.
4835
4836<Chart color="#fcb32c" year={year} />
4837"##;
4838
4839        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4840
4841        // Check that lines 1 and 2 are marked as ESM blocks
4842        assert_eq!(ctx.lines.len(), 10);
4843        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4844        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4845        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4846        assert!(
4847            !ctx.lines[3].in_esm_block,
4848            "Line 4 (heading) should NOT be in_esm_block"
4849        );
4850        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4851        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4852    }
4853
4854    #[test]
4855    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4856        let content = r#"import {Chart} from './snowfall.js'
4857export const year = 2023
4858
4859# Last year's snowfall
4860"#;
4861
4862        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4863
4864        // ESM blocks should NOT be detected in Standard flavor
4865        assert!(
4866            !ctx.lines[0].in_esm_block,
4867            "Line 1 should NOT be in_esm_block in Standard flavor"
4868        );
4869        assert!(
4870            !ctx.lines[1].in_esm_block,
4871            "Line 2 should NOT be in_esm_block in Standard flavor"
4872        );
4873    }
4874
4875    #[test]
4876    fn test_blockquote_with_indented_content() {
4877        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4878        // The content inside the blockquote may also be detected as a code block (which is correct),
4879        // but for MD046 purposes, we need to know the line is inside a blockquote.
4880        let content = r#"# Heading
4881
4882>      -S socket-path
4883>                    More text
4884"#;
4885        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4886
4887        // Line 3 (index 2) should be detected as blockquote
4888        assert!(
4889            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4890            "Line 3 should be a blockquote"
4891        );
4892        // Line 4 (index 3) should also be blockquote
4893        assert!(
4894            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4895            "Line 4 should be a blockquote"
4896        );
4897
4898        // Verify blockquote content is correctly parsed
4899        // Note: spaces_after includes the spaces between `>` and content
4900        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4901        assert_eq!(bq3.content, "-S socket-path");
4902        assert_eq!(bq3.nesting_level, 1);
4903        // 6 spaces after the `>` marker
4904        assert!(bq3.has_multiple_spaces_after_marker);
4905
4906        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4907        assert_eq!(bq4.content, "More text");
4908        assert_eq!(bq4.nesting_level, 1);
4909    }
4910
4911    #[test]
4912    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4913        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4914        let content = r#"# Title
4915
4916A footnote[^1].
4917
4918[^1]: This is the footnote content.
4919
4920[^note]: Another footnote with [link](https://example.com).
4921
4922[regular]: ./path.md "A real reference definition"
4923"#;
4924        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4925
4926        // Should only have one reference definition (the regular one)
4927        assert_eq!(
4928            ctx.reference_defs.len(),
4929            1,
4930            "Footnotes should not be parsed as reference definitions"
4931        );
4932
4933        // The only reference def should be the regular one
4934        assert_eq!(ctx.reference_defs[0].id, "regular");
4935        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4936        assert_eq!(
4937            ctx.reference_defs[0].title,
4938            Some("A real reference definition".to_string())
4939        );
4940    }
4941
4942    #[test]
4943    fn test_footnote_with_inline_link_not_misidentified() {
4944        // Regression test for issue #286: footnote containing an inline link
4945        // was incorrectly parsed as a reference definition with URL "[link](url)"
4946        let content = r#"# Title
4947
4948A footnote[^1].
4949
4950[^1]: [link](https://www.google.com).
4951"#;
4952        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4953
4954        // Should have no reference definitions
4955        assert!(
4956            ctx.reference_defs.is_empty(),
4957            "Footnote with inline link should not create a reference definition"
4958        );
4959    }
4960
4961    #[test]
4962    fn test_various_footnote_formats_excluded() {
4963        // Test various footnote ID formats are all excluded
4964        let content = r#"[^1]: Numeric footnote
4965[^note]: Named footnote
4966[^a]: Single char footnote
4967[^long-footnote-name]: Long named footnote
4968[^123abc]: Mixed alphanumeric
4969
4970[ref1]: ./file1.md
4971[ref2]: ./file2.md
4972"#;
4973        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4974
4975        // Should only have the two regular reference definitions
4976        assert_eq!(
4977            ctx.reference_defs.len(),
4978            2,
4979            "Only regular reference definitions should be parsed"
4980        );
4981
4982        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4983        assert!(ids.contains(&"ref1"));
4984        assert!(ids.contains(&"ref2"));
4985        assert!(!ids.iter().any(|id| id.starts_with('^')));
4986    }
4987
4988    // =========================================================================
4989    // Tests for has_char and char_count methods
4990    // =========================================================================
4991
4992    #[test]
4993    fn test_has_char_tracked_characters() {
4994        // Test all 12 tracked characters
4995        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
4996        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4997
4998        // All tracked characters should be detected
4999        assert!(ctx.has_char('#'), "Should detect hash");
5000        assert!(ctx.has_char('*'), "Should detect asterisk");
5001        assert!(ctx.has_char('_'), "Should detect underscore");
5002        assert!(ctx.has_char('-'), "Should detect hyphen");
5003        assert!(ctx.has_char('+'), "Should detect plus");
5004        assert!(ctx.has_char('>'), "Should detect gt");
5005        assert!(ctx.has_char('|'), "Should detect pipe");
5006        assert!(ctx.has_char('['), "Should detect bracket");
5007        assert!(ctx.has_char('`'), "Should detect backtick");
5008        assert!(ctx.has_char('<'), "Should detect lt");
5009        assert!(ctx.has_char('!'), "Should detect exclamation");
5010        assert!(ctx.has_char('\n'), "Should detect newline");
5011    }
5012
5013    #[test]
5014    fn test_has_char_absent_characters() {
5015        let content = "Simple text without special chars";
5016        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5017
5018        // None of the tracked characters should be present
5019        assert!(!ctx.has_char('#'), "Should not detect hash");
5020        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5021        assert!(!ctx.has_char('_'), "Should not detect underscore");
5022        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5023        assert!(!ctx.has_char('+'), "Should not detect plus");
5024        assert!(!ctx.has_char('>'), "Should not detect gt");
5025        assert!(!ctx.has_char('|'), "Should not detect pipe");
5026        assert!(!ctx.has_char('['), "Should not detect bracket");
5027        assert!(!ctx.has_char('`'), "Should not detect backtick");
5028        assert!(!ctx.has_char('<'), "Should not detect lt");
5029        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5030        // Note: single line content has no newlines
5031        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5032    }
5033
5034    #[test]
5035    fn test_has_char_fallback_for_untracked() {
5036        let content = "Text with @mention and $dollar and %percent";
5037        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5038
5039        // Untracked characters should fall back to content.contains()
5040        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5041        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5042        assert!(ctx.has_char('%'), "Should detect % via fallback");
5043        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5044    }
5045
5046    #[test]
5047    fn test_char_count_tracked_characters() {
5048        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5049        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5050
5051        // Count each tracked character
5052        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5053        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5054        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5055        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5056        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5057        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5058        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5059        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5060        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5061        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5062        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5063        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5064    }
5065
5066    #[test]
5067    fn test_char_count_zero_for_absent() {
5068        let content = "Plain text";
5069        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5070
5071        assert_eq!(ctx.char_count('#'), 0);
5072        assert_eq!(ctx.char_count('*'), 0);
5073        assert_eq!(ctx.char_count('_'), 0);
5074        assert_eq!(ctx.char_count('\n'), 0);
5075    }
5076
5077    #[test]
5078    fn test_char_count_fallback_for_untracked() {
5079        let content = "@@@ $$ %%%";
5080        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5081
5082        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5083        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5084        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5085        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5086    }
5087
5088    #[test]
5089    fn test_char_count_empty_content() {
5090        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5091
5092        assert_eq!(ctx.char_count('#'), 0);
5093        assert_eq!(ctx.char_count('*'), 0);
5094        assert_eq!(ctx.char_count('@'), 0);
5095        assert!(!ctx.has_char('#'));
5096        assert!(!ctx.has_char('@'));
5097    }
5098
5099    // =========================================================================
5100    // Tests for is_in_html_tag method
5101    // =========================================================================
5102
5103    #[test]
5104    fn test_is_in_html_tag_simple() {
5105        let content = "<div>content</div>";
5106        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5107
5108        // Inside opening tag
5109        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5110        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5111        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5112
5113        // Outside tag (in content)
5114        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5115        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5116
5117        // Inside closing tag
5118        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5119        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5120    }
5121
5122    #[test]
5123    fn test_is_in_html_tag_self_closing() {
5124        let content = "Text <br/> more text";
5125        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5126
5127        // Before tag
5128        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5129        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5130
5131        // Inside self-closing tag
5132        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5133        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5134        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5135
5136        // After tag
5137        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5138    }
5139
5140    #[test]
5141    fn test_is_in_html_tag_with_attributes() {
5142        let content = r#"<a href="url" class="link">text</a>"#;
5143        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5144
5145        // All positions inside opening tag with attributes
5146        assert!(ctx.is_in_html_tag(0), "Start of tag");
5147        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5148        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5149        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5150
5151        // Content between tags
5152        assert!(!ctx.is_in_html_tag(27), "Start of content");
5153        assert!(!ctx.is_in_html_tag(30), "End of content");
5154
5155        // Closing tag
5156        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5157    }
5158
5159    #[test]
5160    fn test_is_in_html_tag_multiline() {
5161        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5162        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5163
5164        // Opening tag spans multiple lines
5165        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5166        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5167        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5168
5169        // After closing > of opening tag
5170        let closing_bracket_pos = content.find(">\n").unwrap();
5171        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5172    }
5173
5174    #[test]
5175    fn test_is_in_html_tag_no_tags() {
5176        let content = "Plain text without any HTML";
5177        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5178
5179        // No position should be in an HTML tag
5180        for i in 0..content.len() {
5181            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5182        }
5183    }
5184
5185    // =========================================================================
5186    // Tests for is_in_jinja_range method
5187    // =========================================================================
5188
5189    #[test]
5190    fn test_is_in_jinja_range_expression() {
5191        let content = "Hello {{ name }}!";
5192        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5193
5194        // Before Jinja
5195        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5196        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5197
5198        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5199        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5200        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5201        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5202        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5203        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5204
5205        // After Jinja
5206        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5207    }
5208
5209    #[test]
5210    fn test_is_in_jinja_range_statement() {
5211        let content = "{% if condition %}content{% endif %}";
5212        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5213
5214        // Inside opening statement
5215        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5216        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5217        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5218
5219        // Content between
5220        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5221
5222        // Inside closing statement
5223        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5224        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5225    }
5226
5227    #[test]
5228    fn test_is_in_jinja_range_multiple() {
5229        let content = "{{ a }} and {{ b }}";
5230        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5231
5232        // First Jinja expression
5233        assert!(ctx.is_in_jinja_range(0));
5234        assert!(ctx.is_in_jinja_range(3));
5235        assert!(ctx.is_in_jinja_range(6));
5236
5237        // Between expressions
5238        assert!(!ctx.is_in_jinja_range(8));
5239        assert!(!ctx.is_in_jinja_range(11));
5240
5241        // Second Jinja expression
5242        assert!(ctx.is_in_jinja_range(12));
5243        assert!(ctx.is_in_jinja_range(15));
5244        assert!(ctx.is_in_jinja_range(18));
5245    }
5246
5247    #[test]
5248    fn test_is_in_jinja_range_no_jinja() {
5249        let content = "Plain text with single braces but not Jinja";
5250        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5251
5252        // No position should be in Jinja
5253        for i in 0..content.len() {
5254            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5255        }
5256    }
5257
5258    // =========================================================================
5259    // Tests for is_in_link_title method
5260    // =========================================================================
5261
5262    #[test]
5263    fn test_is_in_link_title_with_title() {
5264        let content = r#"[ref]: https://example.com "Title text"
5265
5266Some content."#;
5267        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5268
5269        // Verify we have a reference def with title
5270        assert_eq!(ctx.reference_defs.len(), 1);
5271        let def = &ctx.reference_defs[0];
5272        assert!(def.title_byte_start.is_some());
5273        assert!(def.title_byte_end.is_some());
5274
5275        let title_start = def.title_byte_start.unwrap();
5276        let title_end = def.title_byte_end.unwrap();
5277
5278        // Before title (in URL)
5279        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5280
5281        // Inside title
5282        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5283        assert!(
5284            ctx.is_in_link_title(title_start + 5),
5285            "Middle of title should be in title"
5286        );
5287        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5288
5289        // After title
5290        assert!(
5291            !ctx.is_in_link_title(title_end),
5292            "After title end should not be in title"
5293        );
5294    }
5295
5296    #[test]
5297    fn test_is_in_link_title_without_title() {
5298        let content = "[ref]: https://example.com\n\nSome content.";
5299        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5300
5301        // Reference def without title
5302        assert_eq!(ctx.reference_defs.len(), 1);
5303        let def = &ctx.reference_defs[0];
5304        assert!(def.title_byte_start.is_none());
5305        assert!(def.title_byte_end.is_none());
5306
5307        // No position should be in a title
5308        for i in 0..content.len() {
5309            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5310        }
5311    }
5312
5313    #[test]
5314    fn test_is_in_link_title_multiple_refs() {
5315        let content = r#"[ref1]: /url1 "Title One"
5316[ref2]: /url2
5317[ref3]: /url3 "Title Three"
5318"#;
5319        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5320
5321        // Should have 3 reference defs
5322        assert_eq!(ctx.reference_defs.len(), 3);
5323
5324        // ref1 has title
5325        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5326        assert!(ref1.title_byte_start.is_some());
5327
5328        // ref2 has no title
5329        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5330        assert!(ref2.title_byte_start.is_none());
5331
5332        // ref3 has title
5333        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5334        assert!(ref3.title_byte_start.is_some());
5335
5336        // Check positions in ref1's title
5337        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5338            assert!(ctx.is_in_link_title(start + 1));
5339            assert!(!ctx.is_in_link_title(end + 5));
5340        }
5341
5342        // Check positions in ref3's title
5343        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5344            assert!(ctx.is_in_link_title(start + 1));
5345        }
5346    }
5347
5348    #[test]
5349    fn test_is_in_link_title_single_quotes() {
5350        let content = "[ref]: /url 'Single quoted title'\n";
5351        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5352
5353        assert_eq!(ctx.reference_defs.len(), 1);
5354        let def = &ctx.reference_defs[0];
5355
5356        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5357            assert!(ctx.is_in_link_title(start));
5358            assert!(ctx.is_in_link_title(start + 5));
5359            assert!(!ctx.is_in_link_title(end));
5360        }
5361    }
5362
5363    #[test]
5364    fn test_is_in_link_title_parentheses() {
5365        // Note: The reference def parser may not support parenthesized titles
5366        // This test verifies the is_in_link_title method works when titles exist
5367        let content = "[ref]: /url (Parenthesized title)\n";
5368        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5369
5370        // Parser behavior: may or may not parse parenthesized titles
5371        // We test that is_in_link_title correctly reflects whatever was parsed
5372        if ctx.reference_defs.is_empty() {
5373            // Parser didn't recognize this as a reference def
5374            for i in 0..content.len() {
5375                assert!(!ctx.is_in_link_title(i));
5376            }
5377        } else {
5378            let def = &ctx.reference_defs[0];
5379            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5380                assert!(ctx.is_in_link_title(start));
5381                assert!(ctx.is_in_link_title(start + 5));
5382                assert!(!ctx.is_in_link_title(end));
5383            } else {
5384                // Title wasn't parsed, so no position should be in title
5385                for i in 0..content.len() {
5386                    assert!(!ctx.is_in_link_title(i));
5387                }
5388            }
5389        }
5390    }
5391
5392    #[test]
5393    fn test_is_in_link_title_no_refs() {
5394        let content = "Just plain text without any reference definitions.";
5395        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5396
5397        assert!(ctx.reference_defs.is_empty());
5398
5399        for i in 0..content.len() {
5400            assert!(!ctx.is_in_link_title(i));
5401        }
5402    }
5403
5404    // =========================================================================
5405    // Math span tests (Issue #289)
5406    // =========================================================================
5407
5408    #[test]
5409    fn test_math_spans_inline() {
5410        let content = "Text with inline math $[f](x)$ in it.";
5411        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5412
5413        let math_spans = ctx.math_spans();
5414        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5415
5416        let span = &math_spans[0];
5417        assert!(!span.is_display, "Should be inline math, not display");
5418        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5419    }
5420
5421    #[test]
5422    fn test_math_spans_display_single_line() {
5423        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5424        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5425
5426        let math_spans = ctx.math_spans();
5427        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5428
5429        let span = &math_spans[0];
5430        assert!(span.is_display, "Should be display math");
5431        assert!(
5432            span.content.contains("[x](\\zeta)"),
5433            "Content should contain the link-like pattern"
5434        );
5435    }
5436
5437    #[test]
5438    fn test_math_spans_display_multiline() {
5439        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5440        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5441
5442        let math_spans = ctx.math_spans();
5443        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5444
5445        let span = &math_spans[0];
5446        assert!(span.is_display, "Should be display math");
5447    }
5448
5449    #[test]
5450    fn test_is_in_math_span() {
5451        let content = "Text $[f](x)$ more text";
5452        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5453
5454        // Position inside the math span
5455        let math_start = content.find('$').unwrap();
5456        let math_end = content.rfind('$').unwrap() + 1;
5457
5458        assert!(
5459            ctx.is_in_math_span(math_start + 1),
5460            "Position inside math span should return true"
5461        );
5462        assert!(
5463            ctx.is_in_math_span(math_start + 3),
5464            "Position inside math span should return true"
5465        );
5466
5467        // Position outside the math span
5468        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5469        assert!(
5470            !ctx.is_in_math_span(math_end + 1),
5471            "Position after math span should return false"
5472        );
5473    }
5474
5475    #[test]
5476    fn test_math_spans_mixed_with_code() {
5477        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5478        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5479
5480        let math_spans = ctx.math_spans();
5481        let code_spans = ctx.code_spans();
5482
5483        assert_eq!(math_spans.len(), 1, "Should have one math span");
5484        assert_eq!(code_spans.len(), 1, "Should have one code span");
5485
5486        // Verify math span content
5487        assert_eq!(math_spans[0].content, "[f](x)");
5488        // Verify code span content
5489        assert_eq!(code_spans[0].content, "[g](y)");
5490    }
5491
5492    #[test]
5493    fn test_math_spans_no_math() {
5494        let content = "Regular text without any math at all.";
5495        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5496
5497        let math_spans = ctx.math_spans();
5498        assert!(math_spans.is_empty(), "Should have no math spans");
5499    }
5500
5501    #[test]
5502    fn test_math_spans_multiple() {
5503        let content = "First $a$ and second $b$ and display $$c$$";
5504        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5505
5506        let math_spans = ctx.math_spans();
5507        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5508
5509        // Two inline, one display
5510        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5511        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5512
5513        assert_eq!(inline_count, 2, "Should have two inline math spans");
5514        assert_eq!(display_count, 1, "Should have one display math span");
5515    }
5516
5517    #[test]
5518    fn test_is_in_math_span_boundary_positions() {
5519        // Test exact boundary positions: $[f](x)$
5520        // Byte positions:                0123456789
5521        let content = "$[f](x)$";
5522        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5523
5524        let math_spans = ctx.math_spans();
5525        assert_eq!(math_spans.len(), 1, "Should have one math span");
5526
5527        let span = &math_spans[0];
5528
5529        // Position at opening $ should be in span (byte 0)
5530        assert!(
5531            ctx.is_in_math_span(span.byte_offset),
5532            "Start position should be in span"
5533        );
5534
5535        // Position just inside should be in span
5536        assert!(
5537            ctx.is_in_math_span(span.byte_offset + 1),
5538            "Position after start should be in span"
5539        );
5540
5541        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5542        assert!(
5543            ctx.is_in_math_span(span.byte_end - 1),
5544            "Position at end-1 should be in span"
5545        );
5546
5547        // Position at byte_end should NOT be in span (exclusive end)
5548        assert!(
5549            !ctx.is_in_math_span(span.byte_end),
5550            "Position at byte_end should NOT be in span (exclusive)"
5551        );
5552    }
5553
5554    #[test]
5555    fn test_math_spans_at_document_start() {
5556        let content = "$x$ text";
5557        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5558
5559        let math_spans = ctx.math_spans();
5560        assert_eq!(math_spans.len(), 1);
5561        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5562    }
5563
5564    #[test]
5565    fn test_math_spans_at_document_end() {
5566        let content = "text $x$";
5567        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5568
5569        let math_spans = ctx.math_spans();
5570        assert_eq!(math_spans.len(), 1);
5571        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5572    }
5573
5574    #[test]
5575    fn test_math_spans_consecutive() {
5576        let content = "$a$$b$";
5577        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5578
5579        let math_spans = ctx.math_spans();
5580        // pulldown-cmark should parse these as separate spans
5581        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5582
5583        // All positions should be in some math span
5584        for i in 0..content.len() {
5585            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5586        }
5587    }
5588
5589    #[test]
5590    fn test_math_spans_currency_not_math() {
5591        // Unbalanced $ should not create math spans
5592        let content = "Price is $100";
5593        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5594
5595        let math_spans = ctx.math_spans();
5596        // pulldown-cmark requires balanced delimiters for math
5597        // $100 alone is not math
5598        assert!(
5599            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5600            "Unbalanced $ should not create math span containing 100"
5601        );
5602    }
5603
5604    // =========================================================================
5605    // Tests for O(1) reference definition lookups via HashMap
5606    // =========================================================================
5607
5608    #[test]
5609    fn test_reference_lookup_o1_basic() {
5610        let content = r#"[ref1]: /url1
5611[REF2]: /url2 "Title"
5612[Ref3]: /url3
5613
5614Use [link][ref1] and [link][REF2]."#;
5615        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5616
5617        // Verify we have 3 reference defs
5618        assert_eq!(ctx.reference_defs.len(), 3);
5619
5620        // Test get_reference_url with various cases
5621        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5622        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5623        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5624        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5625        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5626        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5627        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5628    }
5629
5630    #[test]
5631    fn test_reference_lookup_o1_get_reference_def() {
5632        let content = r#"[myref]: https://example.com "My Title"
5633"#;
5634        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5635
5636        // Test get_reference_def
5637        let def = ctx.get_reference_def("myref").expect("Should find myref");
5638        assert_eq!(def.url, "https://example.com");
5639        assert_eq!(def.title.as_deref(), Some("My Title"));
5640
5641        // Case insensitive
5642        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5643        assert_eq!(def2.url, "https://example.com");
5644
5645        // Non-existent
5646        assert!(ctx.get_reference_def("nonexistent").is_none());
5647    }
5648
5649    #[test]
5650    fn test_reference_lookup_o1_has_reference_def() {
5651        let content = r#"[foo]: /foo
5652[BAR]: /bar
5653"#;
5654        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5655
5656        // Test has_reference_def
5657        assert!(ctx.has_reference_def("foo"));
5658        assert!(ctx.has_reference_def("FOO")); // case insensitive
5659        assert!(ctx.has_reference_def("bar"));
5660        assert!(ctx.has_reference_def("Bar")); // case insensitive
5661        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5662    }
5663
5664    #[test]
5665    fn test_reference_lookup_o1_empty_content() {
5666        let content = "No references here.";
5667        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5668
5669        assert!(ctx.reference_defs.is_empty());
5670        assert_eq!(ctx.get_reference_url("anything"), None);
5671        assert!(ctx.get_reference_def("anything").is_none());
5672        assert!(!ctx.has_reference_def("anything"));
5673    }
5674
5675    #[test]
5676    fn test_reference_lookup_o1_special_characters_in_id() {
5677        let content = r#"[ref-with-dash]: /url1
5678[ref_with_underscore]: /url2
5679[ref.with.dots]: /url3
5680"#;
5681        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5682
5683        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5684        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5685        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5686    }
5687
5688    #[test]
5689    fn test_reference_lookup_o1_unicode_id() {
5690        let content = r#"[日本語]: /japanese
5691[émoji]: /emoji
5692"#;
5693        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5694
5695        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5696        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5697        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5698    }
5699}