Skip to main content

rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14/// Macro for profiling sections - only active in non-WASM builds
15#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17    ($name:expr, $profile:expr, $code:expr) => {{
18        let start = std::time::Instant::now();
19        let result = $code;
20        if $profile {
21            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22        }
23        result
24    }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32// Comprehensive link pattern that captures both inline and reference links
33// Use (?s) flag to make . match newlines
34static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(
36        r#"(?sx)
37        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38        (?:
39            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
40            |
41            \[([^\]]*)\]      # Reference ID in group 6
42        )"#
43    ).unwrap()
44});
45
46// Image pattern (similar to links but with ! prefix)
47// Use (?s) flag to make . match newlines
48static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r#"(?sx)
51        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52        (?:
53            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
54            |
55            \[([^\]]*)\]      # Reference ID in group 6
56        )"#
57    ).unwrap()
58});
59
60// Reference definition pattern
61static REF_DEF_PATTERN: LazyLock<Regex> =
62    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64// Pattern for bare URLs - uses centralized URL pattern from regex_cache
65
66// Pattern for email addresses
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70// Pattern for blockquote prefix in parse_list_blocks
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73/// Pre-computed information about a line
74#[derive(Debug, Clone)]
75pub struct LineInfo {
76    /// Byte offset where this line starts in the document
77    pub byte_offset: usize,
78    /// Length of the line in bytes (without newline)
79    pub byte_len: usize,
80    /// Number of bytes of leading whitespace (for substring extraction)
81    pub indent: usize,
82    /// Visual column width of leading whitespace (with proper tab expansion)
83    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
84    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
85    pub visual_indent: usize,
86    /// Whether the line is blank (empty or only whitespace)
87    pub is_blank: bool,
88    /// Whether this line is inside a code block
89    pub in_code_block: bool,
90    /// Whether this line is inside front matter
91    pub in_front_matter: bool,
92    /// Whether this line is inside an HTML block
93    pub in_html_block: bool,
94    /// Whether this line is inside an HTML comment
95    pub in_html_comment: bool,
96    /// List item information if this line starts a list item
97    pub list_item: Option<ListItemInfo>,
98    /// Heading information if this line is a heading
99    pub heading: Option<HeadingInfo>,
100    /// Blockquote information if this line is a blockquote
101    pub blockquote: Option<BlockquoteInfo>,
102    /// Whether this line is inside a mkdocstrings autodoc block
103    pub in_mkdocstrings: bool,
104    /// Whether this line is part of an ESM import/export block (MDX only)
105    pub in_esm_block: bool,
106    /// Whether this line is a continuation of a multi-line code span from a previous line
107    pub in_code_span_continuation: bool,
108    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
109    /// Pre-computed for consistent detection across all rules
110    pub is_horizontal_rule: bool,
111    /// Whether this line is inside a math block ($$ ... $$)
112    pub in_math_block: bool,
113    /// Whether this line is inside a Quarto div block (::: ... :::)
114    pub in_quarto_div: bool,
115    /// Whether this line contains or is inside a JSX expression (MDX only)
116    pub in_jsx_expression: bool,
117    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
118    pub in_mdx_comment: bool,
119    /// Whether this line is inside a JSX component (MDX only)
120    pub in_jsx_component: bool,
121    /// Whether this line is inside a JSX fragment (MDX only)
122    pub in_jsx_fragment: bool,
123    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
124    pub in_admonition: bool,
125    /// Whether this line is inside an MkDocs content tab block (===)
126    pub in_content_tab: bool,
127    /// Whether this line is a definition list item (: definition)
128    pub in_definition_list: bool,
129}
130
131impl LineInfo {
132    /// Get the line content as a string slice from the source document
133    pub fn content<'a>(&self, source: &'a str) -> &'a str {
134        &source[self.byte_offset..self.byte_offset + self.byte_len]
135    }
136}
137
138/// Information about a list item
139#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141    /// The marker used (*, -, +, or number with . or ))
142    pub marker: String,
143    /// Whether it's ordered (true) or unordered (false)
144    pub is_ordered: bool,
145    /// The number for ordered lists
146    pub number: Option<usize>,
147    /// Column where the marker starts (0-based)
148    pub marker_column: usize,
149    /// Column where content after marker starts
150    pub content_column: usize,
151}
152
153/// Heading style type
154#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156    /// ATX style heading (# Heading)
157    ATX,
158    /// Setext style heading with = underline
159    Setext1,
160    /// Setext style heading with - underline
161    Setext2,
162}
163
164/// Parsed link information
165#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167    /// Line number (1-indexed)
168    pub line: usize,
169    /// Start column (0-indexed) in the line
170    pub start_col: usize,
171    /// End column (0-indexed) in the line
172    pub end_col: usize,
173    /// Byte offset in document
174    pub byte_offset: usize,
175    /// End byte offset in document
176    pub byte_end: usize,
177    /// Link text
178    pub text: Cow<'a, str>,
179    /// Link URL or reference
180    pub url: Cow<'a, str>,
181    /// Whether this is a reference link [text][ref] vs inline [text](url)
182    pub is_reference: bool,
183    /// Reference ID for reference links
184    pub reference_id: Option<Cow<'a, str>>,
185    /// Link type from pulldown-cmark
186    pub link_type: LinkType,
187}
188
189/// Information about a broken link reported by pulldown-cmark
190#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192    /// The reference text that couldn't be resolved
193    pub reference: String,
194    /// Byte span in the source document
195    pub span: std::ops::Range<usize>,
196}
197
198/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
199#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201    /// The footnote ID (without the ^ prefix)
202    pub id: String,
203    /// Line number (1-indexed)
204    pub line: usize,
205    /// Start byte offset in document
206    pub byte_offset: usize,
207    /// End byte offset in document
208    pub byte_end: usize,
209}
210
211/// Parsed image information
212#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214    /// Line number (1-indexed)
215    pub line: usize,
216    /// Start column (0-indexed) in the line
217    pub start_col: usize,
218    /// End column (0-indexed) in the line
219    pub end_col: usize,
220    /// Byte offset in document
221    pub byte_offset: usize,
222    /// End byte offset in document
223    pub byte_end: usize,
224    /// Alt text
225    pub alt_text: Cow<'a, str>,
226    /// Image URL or reference
227    pub url: Cow<'a, str>,
228    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
229    pub is_reference: bool,
230    /// Reference ID for reference images
231    pub reference_id: Option<Cow<'a, str>>,
232    /// Link type from pulldown-cmark
233    pub link_type: LinkType,
234}
235
236/// Reference definition [ref]: url "title"
237#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239    /// Line number (1-indexed)
240    pub line: usize,
241    /// Reference ID (normalized to lowercase)
242    pub id: String,
243    /// URL
244    pub url: String,
245    /// Optional title
246    pub title: Option<String>,
247    /// Byte offset where the reference definition starts
248    pub byte_offset: usize,
249    /// Byte offset where the reference definition ends
250    pub byte_end: usize,
251    /// Byte offset where the title starts (if present, includes quote)
252    pub title_byte_start: Option<usize>,
253    /// Byte offset where the title ends (if present, includes quote)
254    pub title_byte_end: Option<usize>,
255}
256
257/// Parsed code span information
258#[derive(Debug, Clone)]
259pub struct CodeSpan {
260    /// Line number where the code span starts (1-indexed)
261    pub line: usize,
262    /// Line number where the code span ends (1-indexed)
263    pub end_line: usize,
264    /// Start column (0-indexed) in the line
265    pub start_col: usize,
266    /// End column (0-indexed) in the line
267    pub end_col: usize,
268    /// Byte offset in document
269    pub byte_offset: usize,
270    /// End byte offset in document
271    pub byte_end: usize,
272    /// Number of backticks used (1, 2, 3, etc.)
273    pub backtick_count: usize,
274    /// Content inside the code span (without backticks)
275    pub content: String,
276}
277
278/// Parsed math span information (inline $...$ or display $$...$$)
279#[derive(Debug, Clone)]
280pub struct MathSpan {
281    /// Line number where the math span starts (1-indexed)
282    pub line: usize,
283    /// Line number where the math span ends (1-indexed)
284    pub end_line: usize,
285    /// Start column (0-indexed) in the line
286    pub start_col: usize,
287    /// End column (0-indexed) in the line
288    pub end_col: usize,
289    /// Byte offset in document
290    pub byte_offset: usize,
291    /// End byte offset in document
292    pub byte_end: usize,
293    /// Whether this is display math ($$...$$) vs inline ($...$)
294    pub is_display: bool,
295    /// Content inside the math delimiters
296    pub content: String,
297}
298
299/// Information about a heading
300#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302    /// Heading level (1-6 for ATX, 1-2 for Setext)
303    pub level: u8,
304    /// Style of heading
305    pub style: HeadingStyle,
306    /// The heading marker (# characters or underline)
307    pub marker: String,
308    /// Column where the marker starts (0-based)
309    pub marker_column: usize,
310    /// Column where heading text starts
311    pub content_column: usize,
312    /// The heading text (without markers and without custom ID syntax)
313    pub text: String,
314    /// Custom header ID if present (e.g., from {#custom-id} syntax)
315    pub custom_id: Option<String>,
316    /// Original heading text including custom ID syntax
317    pub raw_text: String,
318    /// Whether it has a closing sequence (for ATX)
319    pub has_closing_sequence: bool,
320    /// The closing sequence if present
321    pub closing_sequence: String,
322    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
323    /// False for malformed headings like `#NoSpace` that MD018 should flag
324    pub is_valid: bool,
325}
326
327/// A valid heading from a filtered iteration
328///
329/// Only includes headings that are CommonMark-compliant (have space after #).
330/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
331#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333    /// The 1-indexed line number in the document
334    pub line_num: usize,
335    /// Reference to the heading information
336    pub heading: &'a HeadingInfo,
337    /// Reference to the full line info (for rules that need additional context)
338    pub line_info: &'a LineInfo,
339}
340
341/// Iterator over valid CommonMark headings in a document
342///
343/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
344/// but should not be processed by other heading rules.
345pub struct ValidHeadingsIter<'a> {
346    lines: &'a [LineInfo],
347    current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351    fn new(lines: &'a [LineInfo]) -> Self {
352        Self {
353            lines,
354            current_index: 0,
355        }
356    }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360    type Item = ValidHeading<'a>;
361
362    fn next(&mut self) -> Option<Self::Item> {
363        while self.current_index < self.lines.len() {
364            let idx = self.current_index;
365            self.current_index += 1;
366
367            let line_info = &self.lines[idx];
368            if let Some(heading) = &line_info.heading
369                && heading.is_valid
370            {
371                return Some(ValidHeading {
372                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
373                    heading,
374                    line_info,
375                });
376            }
377        }
378        None
379    }
380}
381
382/// Information about a blockquote line
383#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385    /// Nesting level (1 for >, 2 for >>, etc.)
386    pub nesting_level: usize,
387    /// The indentation before the blockquote marker
388    pub indent: String,
389    /// Column where the first > starts (0-based)
390    pub marker_column: usize,
391    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
392    pub prefix: String,
393    /// Content after the blockquote marker(s)
394    pub content: String,
395    /// Whether the line has no space after the marker
396    pub has_no_space_after_marker: bool,
397    /// Whether the line has multiple spaces after the marker
398    pub has_multiple_spaces_after_marker: bool,
399    /// Whether this is an empty blockquote line needing MD028 fix
400    pub needs_md028_fix: bool,
401}
402
403/// Information about a list block
404#[derive(Debug, Clone)]
405pub struct ListBlock {
406    /// Line number where the list starts (1-indexed)
407    pub start_line: usize,
408    /// Line number where the list ends (1-indexed)
409    pub end_line: usize,
410    /// Whether it's ordered or unordered
411    pub is_ordered: bool,
412    /// The consistent marker for unordered lists (if any)
413    pub marker: Option<String>,
414    /// Blockquote prefix for this list (empty if not in blockquote)
415    pub blockquote_prefix: String,
416    /// Lines that are list items within this block
417    pub item_lines: Vec<usize>,
418    /// Nesting level (0 for top-level lists)
419    pub nesting_level: usize,
420    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
421    pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
427type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429/// Type alias for byte ranges used in JSX expression and MDX comment detection
430type ByteRanges = Vec<(usize, usize)>;
431
432/// Character frequency data for fast content analysis
433#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435    /// Count of # characters (headings)
436    pub hash_count: usize,
437    /// Count of * characters (emphasis, lists, horizontal rules)
438    pub asterisk_count: usize,
439    /// Count of _ characters (emphasis, horizontal rules)
440    pub underscore_count: usize,
441    /// Count of - characters (lists, horizontal rules, setext headings)
442    pub hyphen_count: usize,
443    /// Count of + characters (lists)
444    pub plus_count: usize,
445    /// Count of > characters (blockquotes)
446    pub gt_count: usize,
447    /// Count of | characters (tables)
448    pub pipe_count: usize,
449    /// Count of [ characters (links, images)
450    pub bracket_count: usize,
451    /// Count of ` characters (code spans, code blocks)
452    pub backtick_count: usize,
453    /// Count of < characters (HTML tags, autolinks)
454    pub lt_count: usize,
455    /// Count of ! characters (images)
456    pub exclamation_count: usize,
457    /// Count of newline characters
458    pub newline_count: usize,
459}
460
461/// Pre-parsed HTML tag information
462#[derive(Debug, Clone)]
463pub struct HtmlTag {
464    /// Line number (1-indexed)
465    pub line: usize,
466    /// Start column (0-indexed) in the line
467    pub start_col: usize,
468    /// End column (0-indexed) in the line
469    pub end_col: usize,
470    /// Byte offset in document
471    pub byte_offset: usize,
472    /// End byte offset in document
473    pub byte_end: usize,
474    /// Tag name (e.g., "div", "img", "br")
475    pub tag_name: String,
476    /// Whether it's a closing tag (`</tag>`)
477    pub is_closing: bool,
478    /// Whether it's self-closing (`<tag />`)
479    pub is_self_closing: bool,
480    /// Raw tag content
481    pub raw_content: String,
482}
483
484/// Pre-parsed emphasis span information
485#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487    /// Line number (1-indexed)
488    pub line: usize,
489    /// Start column (0-indexed) in the line
490    pub start_col: usize,
491    /// End column (0-indexed) in the line
492    pub end_col: usize,
493    /// Byte offset in document
494    pub byte_offset: usize,
495    /// End byte offset in document
496    pub byte_end: usize,
497    /// Type of emphasis ('*' or '_')
498    pub marker: char,
499    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
500    pub marker_count: usize,
501    /// Content inside the emphasis
502    pub content: String,
503}
504
505/// Pre-parsed table row information
506#[derive(Debug, Clone)]
507pub struct TableRow {
508    /// Line number (1-indexed)
509    pub line: usize,
510    /// Whether this is a separator row (contains only |, -, :, and spaces)
511    pub is_separator: bool,
512    /// Number of columns (pipe-separated cells)
513    pub column_count: usize,
514    /// Alignment info from separator row
515    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
516}
517
518/// Pre-parsed bare URL information (not in links)
519#[derive(Debug, Clone)]
520pub struct BareUrl {
521    /// Line number (1-indexed)
522    pub line: usize,
523    /// Start column (0-indexed) in the line
524    pub start_col: usize,
525    /// End column (0-indexed) in the line
526    pub end_col: usize,
527    /// Byte offset in document
528    pub byte_offset: usize,
529    /// End byte offset in document
530    pub byte_end: usize,
531    /// The URL string
532    pub url: String,
533    /// Type of URL ("http", "https", "ftp", "email")
534    pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538    pub content: &'a str,
539    pub line_offsets: Vec<usize>,
540    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
541    pub lines: Vec<LineInfo>,             // Pre-computed line information
542    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
543    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
544    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
545    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
546    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
547    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
548    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
549    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
550    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
551    pub char_frequency: CharFrequency,    // Character frequency analysis
552    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
553    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
554    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
555    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
556    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
557    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
558    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
559    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
560    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
561    pub flavor: MarkdownFlavor,           // Markdown flavor being used
562    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
563    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
564    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
565    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
566    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
567    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
568}
569
570/// Detailed blockquote parse result with all components
571struct BlockquoteComponents<'a> {
572    indent: &'a str,
573    markers: &'a str,
574    spaces_after: &'a str,
575    content: &'a str,
576}
577
578/// Parse blockquote prefix with detailed components using manual parsing
579#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581    let bytes = line.as_bytes();
582    let mut pos = 0;
583
584    // Parse leading whitespace (indent)
585    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586        pos += 1;
587    }
588    let indent_end = pos;
589
590    // Must have at least one '>' marker
591    if pos >= bytes.len() || bytes[pos] != b'>' {
592        return None;
593    }
594
595    // Parse '>' markers
596    while pos < bytes.len() && bytes[pos] == b'>' {
597        pos += 1;
598    }
599    let markers_end = pos;
600
601    // Parse spaces after markers
602    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603        pos += 1;
604    }
605    let spaces_end = pos;
606
607    Some(BlockquoteComponents {
608        indent: &line[0..indent_end],
609        markers: &line[indent_end..markers_end],
610        spaces_after: &line[markers_end..spaces_end],
611        content: &line[spaces_end..],
612    })
613}
614
615impl<'a> LintContext<'a> {
616    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617        #[cfg(not(target_arch = "wasm32"))]
618        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619        #[cfg(target_arch = "wasm32")]
620        let profile = false;
621
622        let line_offsets = profile_section!("Line offsets", profile, {
623            let mut offsets = vec![0];
624            for (i, c) in content.char_indices() {
625                if c == '\n' {
626                    offsets.push(i + 1);
627                }
628            }
629            offsets
630        });
631
632        // Detect code blocks and code spans once and cache them
633        let (code_blocks, code_span_ranges) = profile_section!(
634            "Code blocks",
635            profile,
636            CodeBlockUtils::detect_code_blocks_and_spans(content)
637        );
638
639        // Pre-compute HTML comment ranges ONCE for all operations
640        let html_comment_ranges = profile_section!(
641            "HTML comment ranges",
642            profile,
643            crate::utils::skip_context::compute_html_comment_ranges(content)
644        );
645
646        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
647        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
648            if flavor == MarkdownFlavor::MkDocs {
649                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
650            } else {
651                Vec::new()
652            }
653        });
654
655        // Pre-compute Quarto div block ranges for Quarto flavor
656        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
657            if flavor == MarkdownFlavor::Quarto {
658                crate::utils::quarto_divs::detect_div_block_ranges(content)
659            } else {
660                Vec::new()
661            }
662        });
663
664        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
665        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
666        let (mut lines, emphasis_spans) = profile_section!(
667            "Basic line info",
668            profile,
669            Self::compute_basic_line_info(
670                content,
671                &line_offsets,
672                &code_blocks,
673                flavor,
674                &html_comment_ranges,
675                &autodoc_ranges,
676                &quarto_div_ranges,
677            )
678        );
679
680        // Detect HTML blocks BEFORE heading detection
681        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
682
683        // Detect ESM import/export blocks in MDX files BEFORE heading detection
684        profile_section!(
685            "ESM blocks",
686            profile,
687            Self::detect_esm_blocks(content, &mut lines, flavor)
688        );
689
690        // Detect JSX expressions and MDX comments in MDX files
691        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
692            "JSX/MDX detection",
693            profile,
694            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
695        );
696
697        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
698        profile_section!(
699            "MkDocs constructs",
700            profile,
701            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
702        );
703
704        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
705        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
706
707        // Now detect headings and blockquotes
708        profile_section!(
709            "Headings & blockquotes",
710            profile,
711            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
712        );
713
714        // Parse code spans early so we can exclude them from link/image parsing
715        let code_spans = profile_section!(
716            "Code spans",
717            profile,
718            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
719        );
720
721        // Mark lines that are continuations of multi-line code spans
722        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
723        for span in &code_spans {
724            if span.end_line > span.line {
725                // Mark lines after the first line as continuations
726                for line_num in (span.line + 1)..=span.end_line {
727                    if let Some(line_info) = lines.get_mut(line_num - 1) {
728                        line_info.in_code_span_continuation = true;
729                    }
730                }
731            }
732        }
733
734        // Parse links, images, references, and list blocks
735        let (links, broken_links, footnote_refs) = profile_section!(
736            "Links",
737            profile,
738            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
739        );
740
741        let images = profile_section!(
742            "Images",
743            profile,
744            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
745        );
746
747        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
748
749        // Build O(1) lookup map for reference definitions by lowercase ID
750        let reference_defs_map: HashMap<String, usize> = reference_defs
751            .iter()
752            .enumerate()
753            .map(|(idx, def)| (def.id.to_lowercase(), idx))
754            .collect();
755
756        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
757
758        // Compute character frequency for fast content analysis
759        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
760
761        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
762        let table_blocks = profile_section!(
763            "Table blocks",
764            profile,
765            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
766                content,
767                &code_blocks,
768                &code_spans,
769                &html_comment_ranges,
770            )
771        );
772
773        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
774        let line_index = profile_section!(
775            "Line index",
776            profile,
777            crate::utils::range_utils::LineIndex::new(content)
778        );
779
780        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
781        let jinja_ranges = profile_section!(
782            "Jinja ranges",
783            profile,
784            crate::utils::jinja_utils::find_jinja_ranges(content)
785        );
786
787        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
788        let citation_ranges = profile_section!("Citation ranges", profile, {
789            if flavor == MarkdownFlavor::Quarto {
790                crate::utils::quarto_divs::find_citation_ranges(content)
791            } else {
792                Vec::new()
793            }
794        });
795
796        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
797        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
798            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
799            let mut ranges = Vec::new();
800            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
801                ranges.push((mat.start(), mat.end()));
802            }
803            ranges
804        });
805
806        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
807
808        Self {
809            content,
810            line_offsets,
811            code_blocks,
812            lines,
813            links,
814            images,
815            broken_links,
816            footnote_refs,
817            reference_defs,
818            reference_defs_map,
819            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
820            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
821            list_blocks,
822            char_frequency,
823            html_tags_cache: OnceLock::new(),
824            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
825            table_rows_cache: OnceLock::new(),
826            bare_urls_cache: OnceLock::new(),
827            has_mixed_list_nesting_cache: OnceLock::new(),
828            html_comment_ranges,
829            table_blocks,
830            line_index,
831            jinja_ranges,
832            flavor,
833            source_file,
834            jsx_expression_ranges,
835            mdx_comment_ranges,
836            citation_ranges,
837            shortcode_ranges,
838            inline_config,
839        }
840    }
841
842    /// Check if a rule is disabled at a specific line number (1-indexed)
843    ///
844    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
845    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
846    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
847        self.inline_config.is_rule_disabled(rule_name, line_number)
848    }
849
850    /// Get code spans - computed lazily on first access
851    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
852        Arc::clone(
853            self.code_spans_cache
854                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
855        )
856    }
857
858    /// Get math spans - computed lazily on first access
859    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
860        Arc::clone(
861            self.math_spans_cache
862                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
863        )
864    }
865
866    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
867    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
868        let math_spans = self.math_spans();
869        math_spans
870            .iter()
871            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
872    }
873
874    /// Get HTML comment ranges - pre-computed during LintContext construction
875    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
876        &self.html_comment_ranges
877    }
878
879    /// Get HTML tags - computed lazily on first access
880    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
881        Arc::clone(self.html_tags_cache.get_or_init(|| {
882            Arc::new(Self::parse_html_tags(
883                self.content,
884                &self.lines,
885                &self.code_blocks,
886                self.flavor,
887            ))
888        }))
889    }
890
891    /// Get emphasis spans - pre-computed during construction
892    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
893        Arc::clone(
894            self.emphasis_spans_cache
895                .get()
896                .expect("emphasis_spans_cache initialized during construction"),
897        )
898    }
899
900    /// Get table rows - computed lazily on first access
901    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
902        Arc::clone(
903            self.table_rows_cache
904                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
905        )
906    }
907
908    /// Get bare URLs - computed lazily on first access
909    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
910        Arc::clone(
911            self.bare_urls_cache
912                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
913        )
914    }
915
916    /// Check if document has mixed ordered/unordered list nesting.
917    /// Result is cached after first computation (document-level invariant).
918    /// This is used by MD007 for smart style auto-detection.
919    pub fn has_mixed_list_nesting(&self) -> bool {
920        *self
921            .has_mixed_list_nesting_cache
922            .get_or_init(|| self.compute_mixed_list_nesting())
923    }
924
925    /// Internal computation for mixed list nesting (only called once per LintContext).
926    fn compute_mixed_list_nesting(&self) -> bool {
927        // Track parent list items by their marker position and type
928        // Using marker_column instead of indent because it works correctly
929        // for blockquoted content where indent doesn't account for the prefix
930        // Stack stores: (marker_column, is_ordered)
931        let mut stack: Vec<(usize, bool)> = Vec::new();
932        let mut last_was_blank = false;
933
934        for line_info in &self.lines {
935            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
936            if line_info.in_code_block
937                || line_info.in_front_matter
938                || line_info.in_mkdocstrings
939                || line_info.in_html_comment
940                || line_info.in_esm_block
941            {
942                continue;
943            }
944
945            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
946            if line_info.is_blank {
947                last_was_blank = true;
948                continue;
949            }
950
951            if let Some(list_item) = &line_info.list_item {
952                // Normalize column 1 to column 0 (consistent with MD007 check function)
953                let current_pos = if list_item.marker_column == 1 {
954                    0
955                } else {
956                    list_item.marker_column
957                };
958
959                // If there was a blank line and this item is at root level, reset stack
960                if last_was_blank && current_pos == 0 {
961                    stack.clear();
962                }
963                last_was_blank = false;
964
965                // Pop items at same or greater position (they're siblings or deeper, not parents)
966                while let Some(&(pos, _)) = stack.last() {
967                    if pos >= current_pos {
968                        stack.pop();
969                    } else {
970                        break;
971                    }
972                }
973
974                // Check if immediate parent has different type - this is mixed nesting
975                if let Some(&(_, parent_is_ordered)) = stack.last()
976                    && parent_is_ordered != list_item.is_ordered
977                {
978                    return true; // Found mixed nesting - early exit
979                }
980
981                stack.push((current_pos, list_item.is_ordered));
982            } else {
983                // Non-list line (but not blank) - could be paragraph or other content
984                last_was_blank = false;
985            }
986        }
987
988        false
989    }
990
991    /// Map a byte offset to (line, column)
992    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
993        match self.line_offsets.binary_search(&offset) {
994            Ok(line) => (line + 1, 1),
995            Err(line) => {
996                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
997                (line, offset - line_start + 1)
998            }
999        }
1000    }
1001
1002    /// Check if a position is within a code block or code span
1003    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1004        // Check code blocks first
1005        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1006            return true;
1007        }
1008
1009        // Check inline code spans (lazy load if needed)
1010        self.code_spans()
1011            .iter()
1012            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1013    }
1014
1015    /// Get line information by line number (1-indexed)
1016    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1017        if line_num > 0 {
1018            self.lines.get(line_num - 1)
1019        } else {
1020            None
1021        }
1022    }
1023
1024    /// Get byte offset for a line number (1-indexed)
1025    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1026        self.line_info(line_num).map(|info| info.byte_offset)
1027    }
1028
1029    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1030    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1031        let normalized_id = ref_id.to_lowercase();
1032        self.reference_defs_map
1033            .get(&normalized_id)
1034            .map(|&idx| self.reference_defs[idx].url.as_str())
1035    }
1036
1037    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1038    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1039        let normalized_id = ref_id.to_lowercase();
1040        self.reference_defs_map
1041            .get(&normalized_id)
1042            .map(|&idx| &self.reference_defs[idx])
1043    }
1044
1045    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1046    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1047        let normalized_id = ref_id.to_lowercase();
1048        self.reference_defs_map.contains_key(&normalized_id)
1049    }
1050
1051    /// Check if a line is part of a list block
1052    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1053        self.list_blocks
1054            .iter()
1055            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1056    }
1057
1058    /// Get the list block containing a specific line
1059    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1060        self.list_blocks
1061            .iter()
1062            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1063    }
1064
1065    // Compatibility methods for DocumentStructure migration
1066
1067    /// Check if a line is within a code block
1068    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1069        if line_num == 0 || line_num > self.lines.len() {
1070            return false;
1071        }
1072        self.lines[line_num - 1].in_code_block
1073    }
1074
1075    /// Check if a line is within front matter
1076    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1077        if line_num == 0 || line_num > self.lines.len() {
1078            return false;
1079        }
1080        self.lines[line_num - 1].in_front_matter
1081    }
1082
1083    /// Check if a line is within an HTML block
1084    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1085        if line_num == 0 || line_num > self.lines.len() {
1086            return false;
1087        }
1088        self.lines[line_num - 1].in_html_block
1089    }
1090
1091    /// Check if a line and column is within a code span
1092    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1093        if line_num == 0 || line_num > self.lines.len() {
1094            return false;
1095        }
1096
1097        // Use the code spans cache to check
1098        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1099        // Convert col to 0-indexed for comparison
1100        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1101        let code_spans = self.code_spans();
1102        code_spans.iter().any(|span| {
1103            // Check if line is within the span's line range
1104            if line_num < span.line || line_num > span.end_line {
1105                return false;
1106            }
1107
1108            if span.line == span.end_line {
1109                // Single-line span: check column bounds
1110                col_0indexed >= span.start_col && col_0indexed < span.end_col
1111            } else if line_num == span.line {
1112                // First line of multi-line span: anything after start_col is in span
1113                col_0indexed >= span.start_col
1114            } else if line_num == span.end_line {
1115                // Last line of multi-line span: anything before end_col is in span
1116                col_0indexed < span.end_col
1117            } else {
1118                // Middle line of multi-line span: entire line is in span
1119                true
1120            }
1121        })
1122    }
1123
1124    /// Check if a byte offset is within a code span
1125    #[inline]
1126    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1127        let code_spans = self.code_spans();
1128        code_spans
1129            .iter()
1130            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1131    }
1132
1133    /// Check if a byte position is within a reference definition
1134    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1135    #[inline]
1136    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1137        self.reference_defs
1138            .iter()
1139            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1140    }
1141
1142    /// Check if a byte position is within an HTML comment
1143    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1144    /// where k is the number of HTML comments (typically very small)
1145    #[inline]
1146    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1147        self.html_comment_ranges
1148            .iter()
1149            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1150    }
1151
1152    /// Check if a byte position is within an HTML tag (including multiline tags)
1153    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1154    #[inline]
1155    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1156        self.html_tags()
1157            .iter()
1158            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1159    }
1160
1161    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1162    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1163        self.jinja_ranges
1164            .iter()
1165            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1166    }
1167
1168    /// Check if a byte position is within a JSX expression (MDX: {expression})
1169    #[inline]
1170    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1171        self.jsx_expression_ranges
1172            .iter()
1173            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174    }
1175
1176    /// Check if a byte position is within an MDX comment ({/* ... */})
1177    #[inline]
1178    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1179        self.mdx_comment_ranges
1180            .iter()
1181            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182    }
1183
1184    /// Get all JSX expression byte ranges
1185    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1186        &self.jsx_expression_ranges
1187    }
1188
1189    /// Get all MDX comment byte ranges
1190    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1191        &self.mdx_comment_ranges
1192    }
1193
1194    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1195    /// Only active in Quarto flavor
1196    #[inline]
1197    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1198        self.citation_ranges
1199            .iter()
1200            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201    }
1202
1203    /// Get all citation byte ranges (Quarto flavor only)
1204    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1205        &self.citation_ranges
1206    }
1207
1208    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1209    #[inline]
1210    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1211        self.shortcode_ranges
1212            .iter()
1213            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214    }
1215
1216    /// Get all shortcode byte ranges
1217    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1218        &self.shortcode_ranges
1219    }
1220
1221    /// Check if a byte position is within a link reference definition title
1222    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1223        self.reference_defs.iter().any(|def| {
1224            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1225                byte_pos >= start && byte_pos < end
1226            } else {
1227                false
1228            }
1229        })
1230    }
1231
1232    /// Check if content has any instances of a specific character (fast)
1233    pub fn has_char(&self, ch: char) -> bool {
1234        match ch {
1235            '#' => self.char_frequency.hash_count > 0,
1236            '*' => self.char_frequency.asterisk_count > 0,
1237            '_' => self.char_frequency.underscore_count > 0,
1238            '-' => self.char_frequency.hyphen_count > 0,
1239            '+' => self.char_frequency.plus_count > 0,
1240            '>' => self.char_frequency.gt_count > 0,
1241            '|' => self.char_frequency.pipe_count > 0,
1242            '[' => self.char_frequency.bracket_count > 0,
1243            '`' => self.char_frequency.backtick_count > 0,
1244            '<' => self.char_frequency.lt_count > 0,
1245            '!' => self.char_frequency.exclamation_count > 0,
1246            '\n' => self.char_frequency.newline_count > 0,
1247            _ => self.content.contains(ch), // Fallback for other characters
1248        }
1249    }
1250
1251    /// Get count of a specific character (fast)
1252    pub fn char_count(&self, ch: char) -> usize {
1253        match ch {
1254            '#' => self.char_frequency.hash_count,
1255            '*' => self.char_frequency.asterisk_count,
1256            '_' => self.char_frequency.underscore_count,
1257            '-' => self.char_frequency.hyphen_count,
1258            '+' => self.char_frequency.plus_count,
1259            '>' => self.char_frequency.gt_count,
1260            '|' => self.char_frequency.pipe_count,
1261            '[' => self.char_frequency.bracket_count,
1262            '`' => self.char_frequency.backtick_count,
1263            '<' => self.char_frequency.lt_count,
1264            '!' => self.char_frequency.exclamation_count,
1265            '\n' => self.char_frequency.newline_count,
1266            _ => self.content.matches(ch).count(), // Fallback for other characters
1267        }
1268    }
1269
1270    /// Check if content likely contains headings (fast)
1271    pub fn likely_has_headings(&self) -> bool {
1272        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1273    }
1274
1275    /// Check if content likely contains lists (fast)
1276    pub fn likely_has_lists(&self) -> bool {
1277        self.char_frequency.asterisk_count > 0
1278            || self.char_frequency.hyphen_count > 0
1279            || self.char_frequency.plus_count > 0
1280    }
1281
1282    /// Check if content likely contains emphasis (fast)
1283    pub fn likely_has_emphasis(&self) -> bool {
1284        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1285    }
1286
1287    /// Check if content likely contains tables (fast)
1288    pub fn likely_has_tables(&self) -> bool {
1289        self.char_frequency.pipe_count > 2
1290    }
1291
1292    /// Check if content likely contains blockquotes (fast)
1293    pub fn likely_has_blockquotes(&self) -> bool {
1294        self.char_frequency.gt_count > 0
1295    }
1296
1297    /// Check if content likely contains code (fast)
1298    pub fn likely_has_code(&self) -> bool {
1299        self.char_frequency.backtick_count > 0
1300    }
1301
1302    /// Check if content likely contains links or images (fast)
1303    pub fn likely_has_links_or_images(&self) -> bool {
1304        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1305    }
1306
1307    /// Check if content likely contains HTML (fast)
1308    pub fn likely_has_html(&self) -> bool {
1309        self.char_frequency.lt_count > 0
1310    }
1311
1312    /// Get the blockquote prefix for inserting a blank line at the given line index.
1313    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1314    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1315    /// Returns an empty string if the line is not inside a blockquote.
1316    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1317        if let Some(line_info) = self.lines.get(line_idx)
1318            && let Some(ref bq) = line_info.blockquote
1319        {
1320            bq.prefix.trim_end().to_string()
1321        } else {
1322            String::new()
1323        }
1324    }
1325
1326    /// Get HTML tags on a specific line
1327    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1328        self.html_tags()
1329            .iter()
1330            .filter(|tag| tag.line == line_num)
1331            .cloned()
1332            .collect()
1333    }
1334
1335    /// Get emphasis spans on a specific line
1336    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1337        self.emphasis_spans()
1338            .iter()
1339            .filter(|span| span.line == line_num)
1340            .cloned()
1341            .collect()
1342    }
1343
1344    /// Get table rows on a specific line
1345    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1346        self.table_rows()
1347            .iter()
1348            .filter(|row| row.line == line_num)
1349            .cloned()
1350            .collect()
1351    }
1352
1353    /// Get bare URLs on a specific line
1354    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1355        self.bare_urls()
1356            .iter()
1357            .filter(|url| url.line == line_num)
1358            .cloned()
1359            .collect()
1360    }
1361
1362    /// Find the line index for a given byte offset using binary search.
1363    /// Returns (line_index, line_number, column) where:
1364    /// - line_index is the 0-based index in the lines array
1365    /// - line_number is the 1-based line number
1366    /// - column is the byte offset within that line
1367    #[inline]
1368    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1369        // Binary search to find the line containing this byte offset
1370        let idx = match lines.binary_search_by(|line| {
1371            if byte_offset < line.byte_offset {
1372                std::cmp::Ordering::Greater
1373            } else if byte_offset > line.byte_offset + line.byte_len {
1374                std::cmp::Ordering::Less
1375            } else {
1376                std::cmp::Ordering::Equal
1377            }
1378        }) {
1379            Ok(idx) => idx,
1380            Err(idx) => idx.saturating_sub(1),
1381        };
1382
1383        let line = &lines[idx];
1384        let line_num = idx + 1;
1385        let col = byte_offset.saturating_sub(line.byte_offset);
1386
1387        (idx, line_num, col)
1388    }
1389
1390    /// Check if a byte offset is within a code span using binary search
1391    #[inline]
1392    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1393        // Since spans are sorted by byte_offset, use partition_point for binary search
1394        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1395
1396        // Check the span that starts at or before our offset
1397        if idx > 0 {
1398            let span = &code_spans[idx - 1];
1399            if offset >= span.byte_offset && offset < span.byte_end {
1400                return true;
1401            }
1402        }
1403
1404        false
1405    }
1406
1407    /// Collect byte ranges of all links using pulldown-cmark
1408    /// This is used to skip heading detection for lines that fall within link syntax
1409    /// (e.g., multiline links like `[text](url\n#fragment)`)
1410    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1411        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1412
1413        let mut link_ranges = Vec::new();
1414        let mut options = Options::empty();
1415        options.insert(Options::ENABLE_WIKILINKS);
1416        options.insert(Options::ENABLE_FOOTNOTES);
1417
1418        let parser = Parser::new_ext(content, options).into_offset_iter();
1419        let mut link_stack: Vec<usize> = Vec::new();
1420
1421        for (event, range) in parser {
1422            match event {
1423                Event::Start(Tag::Link { .. }) => {
1424                    link_stack.push(range.start);
1425                }
1426                Event::End(TagEnd::Link) => {
1427                    if let Some(start_pos) = link_stack.pop() {
1428                        link_ranges.push((start_pos, range.end));
1429                    }
1430                }
1431                _ => {}
1432            }
1433        }
1434
1435        link_ranges
1436    }
1437
1438    /// Parse all links in the content
1439    fn parse_links(
1440        content: &'a str,
1441        lines: &[LineInfo],
1442        code_blocks: &[(usize, usize)],
1443        code_spans: &[CodeSpan],
1444        flavor: MarkdownFlavor,
1445        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1446    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1447        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1448        use std::collections::HashSet;
1449
1450        let mut links = Vec::with_capacity(content.len() / 500);
1451        let mut broken_links = Vec::new();
1452        let mut footnote_refs = Vec::new();
1453
1454        // Track byte positions of links found by pulldown-cmark
1455        let mut found_positions = HashSet::new();
1456
1457        // Use pulldown-cmark's streaming parser with BrokenLink callback
1458        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1459        // This automatically handles:
1460        // - Escaped links (won't generate events)
1461        // - Links in code blocks/spans (won't generate Link events)
1462        // - Images (generates Tag::Image instead)
1463        // - Reference resolution (dest_url is already resolved!)
1464        // - Broken references (callback is invoked)
1465        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1466        let mut options = Options::empty();
1467        options.insert(Options::ENABLE_WIKILINKS);
1468        options.insert(Options::ENABLE_FOOTNOTES);
1469
1470        let parser = Parser::new_with_broken_link_callback(
1471            content,
1472            options,
1473            Some(|link: BrokenLink<'_>| {
1474                broken_links.push(BrokenLinkInfo {
1475                    reference: link.reference.to_string(),
1476                    span: link.span.clone(),
1477                });
1478                None
1479            }),
1480        )
1481        .into_offset_iter();
1482
1483        let mut link_stack: Vec<(
1484            usize,
1485            usize,
1486            pulldown_cmark::CowStr<'a>,
1487            LinkType,
1488            pulldown_cmark::CowStr<'a>,
1489        )> = Vec::new();
1490        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1491
1492        for (event, range) in parser {
1493            match event {
1494                Event::Start(Tag::Link {
1495                    link_type,
1496                    dest_url,
1497                    id,
1498                    ..
1499                }) => {
1500                    // Link start - record position, URL, and reference ID
1501                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1502                    text_chunks.clear();
1503                }
1504                Event::Text(text) if !link_stack.is_empty() => {
1505                    // Track text content with its byte range
1506                    text_chunks.push((text.to_string(), range.start, range.end));
1507                }
1508                Event::Code(code) if !link_stack.is_empty() => {
1509                    // Include inline code in link text (with backticks)
1510                    let code_text = format!("`{code}`");
1511                    text_chunks.push((code_text, range.start, range.end));
1512                }
1513                Event::End(TagEnd::Link) => {
1514                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1515                        // Skip if in HTML comment
1516                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1517                            text_chunks.clear();
1518                            continue;
1519                        }
1520
1521                        // Find line and column information
1522                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1523
1524                        // Skip if this link is on a MkDocs snippet line
1525                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1526                            text_chunks.clear();
1527                            continue;
1528                        }
1529
1530                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1531
1532                        let is_reference = matches!(
1533                            link_type,
1534                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1535                        );
1536
1537                        // Extract link text directly from source bytes to preserve escaping
1538                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1539                        let link_text = if start_pos < content.len() {
1540                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1541
1542                            // Find MATCHING ] by tracking bracket depth for nested brackets
1543                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1544                            // Brackets inside code spans (between backticks) should be ignored
1545                            let mut close_pos = None;
1546                            let mut depth = 0;
1547                            let mut in_code_span = false;
1548
1549                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1550                                // Count preceding backslashes
1551                                let mut backslash_count = 0;
1552                                let mut j = i;
1553                                while j > 0 && link_bytes[j - 1] == b'\\' {
1554                                    backslash_count += 1;
1555                                    j -= 1;
1556                                }
1557                                let is_escaped = backslash_count % 2 != 0;
1558
1559                                // Track code spans - backticks toggle in/out of code
1560                                if byte == b'`' && !is_escaped {
1561                                    in_code_span = !in_code_span;
1562                                }
1563
1564                                // Only count brackets when NOT in a code span
1565                                if !is_escaped && !in_code_span {
1566                                    if byte == b'[' {
1567                                        depth += 1;
1568                                    } else if byte == b']' {
1569                                        if depth == 0 {
1570                                            // Found the matching closing bracket
1571                                            close_pos = Some(i);
1572                                            break;
1573                                        } else {
1574                                            depth -= 1;
1575                                        }
1576                                    }
1577                                }
1578                            }
1579
1580                            if let Some(pos) = close_pos {
1581                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1582                            } else {
1583                                Cow::Borrowed("")
1584                            }
1585                        } else {
1586                            Cow::Borrowed("")
1587                        };
1588
1589                        // For reference links, use the actual reference ID from pulldown-cmark
1590                        let reference_id = if is_reference && !ref_id.is_empty() {
1591                            Some(Cow::Owned(ref_id.to_lowercase()))
1592                        } else if is_reference {
1593                            // For collapsed/shortcut references without explicit ID, use the link text
1594                            Some(Cow::Owned(link_text.to_lowercase()))
1595                        } else {
1596                            None
1597                        };
1598
1599                        // Track this position as found
1600                        found_positions.insert(start_pos);
1601
1602                        links.push(ParsedLink {
1603                            line: line_num,
1604                            start_col: col_start,
1605                            end_col: col_end,
1606                            byte_offset: start_pos,
1607                            byte_end: range.end,
1608                            text: link_text,
1609                            url: Cow::Owned(url.to_string()),
1610                            is_reference,
1611                            reference_id,
1612                            link_type,
1613                        });
1614
1615                        text_chunks.clear();
1616                    }
1617                }
1618                Event::FootnoteReference(footnote_id) => {
1619                    // Capture footnote references like [^1], [^note]
1620                    // Skip if in HTML comment
1621                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1622                        continue;
1623                    }
1624
1625                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1626                    footnote_refs.push(FootnoteRef {
1627                        id: footnote_id.to_string(),
1628                        line: line_num,
1629                        byte_offset: range.start,
1630                        byte_end: range.end,
1631                    });
1632                }
1633                _ => {}
1634            }
1635        }
1636
1637        // Also find undefined references using regex
1638        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1639        // because the reference is undefined
1640        for cap in LINK_PATTERN.captures_iter(content) {
1641            let full_match = cap.get(0).unwrap();
1642            let match_start = full_match.start();
1643            let match_end = full_match.end();
1644
1645            // Skip if this was already found by pulldown-cmark (it's a valid link)
1646            if found_positions.contains(&match_start) {
1647                continue;
1648            }
1649
1650            // Skip if escaped
1651            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652                continue;
1653            }
1654
1655            // Skip if it's an image
1656            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1657                continue;
1658            }
1659
1660            // Skip if in code block
1661            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1662                continue;
1663            }
1664
1665            // Skip if in code span
1666            if Self::is_offset_in_code_span(code_spans, match_start) {
1667                continue;
1668            }
1669
1670            // Skip if in HTML comment
1671            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1672                continue;
1673            }
1674
1675            // Find line and column information
1676            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1677
1678            // Skip if this link is on a MkDocs snippet line
1679            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1680                continue;
1681            }
1682
1683            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1684
1685            let text = cap.get(1).map_or("", |m| m.as_str());
1686
1687            // Only process reference links (group 6)
1688            if let Some(ref_id) = cap.get(6) {
1689                let ref_id_str = ref_id.as_str();
1690                let normalized_ref = if ref_id_str.is_empty() {
1691                    Cow::Owned(text.to_lowercase()) // Implicit reference
1692                } else {
1693                    Cow::Owned(ref_id_str.to_lowercase())
1694                };
1695
1696                // This is an undefined reference (pulldown-cmark didn't parse it)
1697                links.push(ParsedLink {
1698                    line: line_num,
1699                    start_col: col_start,
1700                    end_col: col_end,
1701                    byte_offset: match_start,
1702                    byte_end: match_end,
1703                    text: Cow::Borrowed(text),
1704                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1705                    is_reference: true,
1706                    reference_id: Some(normalized_ref),
1707                    link_type: LinkType::Reference, // Undefined references are reference-style
1708                });
1709            }
1710        }
1711
1712        (links, broken_links, footnote_refs)
1713    }
1714
1715    /// Parse all images in the content
1716    fn parse_images(
1717        content: &'a str,
1718        lines: &[LineInfo],
1719        code_blocks: &[(usize, usize)],
1720        code_spans: &[CodeSpan],
1721        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1722    ) -> Vec<ParsedImage<'a>> {
1723        use crate::utils::skip_context::is_in_html_comment_ranges;
1724        use std::collections::HashSet;
1725
1726        // Pre-size based on a heuristic: images are less common than links
1727        let mut images = Vec::with_capacity(content.len() / 1000);
1728        let mut found_positions = HashSet::new();
1729
1730        // Use pulldown-cmark for parsing - more accurate and faster
1731        let parser = Parser::new(content).into_offset_iter();
1732        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1733            Vec::new();
1734        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1735
1736        for (event, range) in parser {
1737            match event {
1738                Event::Start(Tag::Image {
1739                    link_type,
1740                    dest_url,
1741                    id,
1742                    ..
1743                }) => {
1744                    image_stack.push((range.start, dest_url, link_type, id));
1745                    text_chunks.clear();
1746                }
1747                Event::Text(text) if !image_stack.is_empty() => {
1748                    text_chunks.push((text.to_string(), range.start, range.end));
1749                }
1750                Event::Code(code) if !image_stack.is_empty() => {
1751                    let code_text = format!("`{code}`");
1752                    text_chunks.push((code_text, range.start, range.end));
1753                }
1754                Event::End(TagEnd::Image) => {
1755                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1756                        // Skip if in code block
1757                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1758                            continue;
1759                        }
1760
1761                        // Skip if in code span
1762                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1763                            continue;
1764                        }
1765
1766                        // Skip if in HTML comment
1767                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1768                            continue;
1769                        }
1770
1771                        // Find line and column using binary search
1772                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1773                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1774
1775                        let is_reference = matches!(
1776                            link_type,
1777                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1778                        );
1779
1780                        // Extract alt text directly from source bytes to preserve escaping
1781                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1782                        let alt_text = if start_pos < content.len() {
1783                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1784
1785                            // Find MATCHING ] by tracking bracket depth for nested brackets
1786                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1787                            let mut close_pos = None;
1788                            let mut depth = 0;
1789
1790                            if image_bytes.len() > 2 {
1791                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1792                                    // Count preceding backslashes
1793                                    let mut backslash_count = 0;
1794                                    let mut j = i;
1795                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1796                                        backslash_count += 1;
1797                                        j -= 1;
1798                                    }
1799                                    let is_escaped = backslash_count % 2 != 0;
1800
1801                                    if !is_escaped {
1802                                        if byte == b'[' {
1803                                            depth += 1;
1804                                        } else if byte == b']' {
1805                                            if depth == 0 {
1806                                                // Found the matching closing bracket
1807                                                close_pos = Some(i);
1808                                                break;
1809                                            } else {
1810                                                depth -= 1;
1811                                            }
1812                                        }
1813                                    }
1814                                }
1815                            }
1816
1817                            if let Some(pos) = close_pos {
1818                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1819                            } else {
1820                                Cow::Borrowed("")
1821                            }
1822                        } else {
1823                            Cow::Borrowed("")
1824                        };
1825
1826                        let reference_id = if is_reference && !ref_id.is_empty() {
1827                            Some(Cow::Owned(ref_id.to_lowercase()))
1828                        } else if is_reference {
1829                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1830                        } else {
1831                            None
1832                        };
1833
1834                        found_positions.insert(start_pos);
1835                        images.push(ParsedImage {
1836                            line: line_num,
1837                            start_col: col_start,
1838                            end_col: col_end,
1839                            byte_offset: start_pos,
1840                            byte_end: range.end,
1841                            alt_text,
1842                            url: Cow::Owned(url.to_string()),
1843                            is_reference,
1844                            reference_id,
1845                            link_type,
1846                        });
1847                    }
1848                }
1849                _ => {}
1850            }
1851        }
1852
1853        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1854        for cap in IMAGE_PATTERN.captures_iter(content) {
1855            let full_match = cap.get(0).unwrap();
1856            let match_start = full_match.start();
1857            let match_end = full_match.end();
1858
1859            // Skip if already found by pulldown-cmark
1860            if found_positions.contains(&match_start) {
1861                continue;
1862            }
1863
1864            // Skip if the ! is escaped
1865            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1866                continue;
1867            }
1868
1869            // Skip if in code block, code span, or HTML comment
1870            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1871                || Self::is_offset_in_code_span(code_spans, match_start)
1872                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1873            {
1874                continue;
1875            }
1876
1877            // Only process reference images (undefined references not found by pulldown-cmark)
1878            if let Some(ref_id) = cap.get(6) {
1879                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1880                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1881                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1882                let ref_id_str = ref_id.as_str();
1883                let normalized_ref = if ref_id_str.is_empty() {
1884                    Cow::Owned(alt_text.to_lowercase())
1885                } else {
1886                    Cow::Owned(ref_id_str.to_lowercase())
1887                };
1888
1889                images.push(ParsedImage {
1890                    line: line_num,
1891                    start_col: col_start,
1892                    end_col: col_end,
1893                    byte_offset: match_start,
1894                    byte_end: match_end,
1895                    alt_text: Cow::Borrowed(alt_text),
1896                    url: Cow::Borrowed(""),
1897                    is_reference: true,
1898                    reference_id: Some(normalized_ref),
1899                    link_type: LinkType::Reference, // Undefined references are reference-style
1900                });
1901            }
1902        }
1903
1904        images
1905    }
1906
1907    /// Parse reference definitions
1908    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1909        // Pre-size based on lines count as reference definitions are line-based
1910        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1911
1912        for (line_idx, line_info) in lines.iter().enumerate() {
1913            // Skip lines in code blocks
1914            if line_info.in_code_block {
1915                continue;
1916            }
1917
1918            let line = line_info.content(content);
1919            let line_num = line_idx + 1;
1920
1921            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1922                let id_raw = cap.get(1).unwrap().as_str();
1923
1924                // Skip footnote definitions - they use [^id]: syntax and are semantically
1925                // different from reference link definitions
1926                if id_raw.starts_with('^') {
1927                    continue;
1928                }
1929
1930                let id = id_raw.to_lowercase();
1931                let url = cap.get(2).unwrap().as_str().to_string();
1932                let title_match = cap.get(3).or_else(|| cap.get(4));
1933                let title = title_match.map(|m| m.as_str().to_string());
1934
1935                // Calculate byte positions
1936                // The match starts at the beginning of the line (0) and extends to the end
1937                let match_obj = cap.get(0).unwrap();
1938                let byte_offset = line_info.byte_offset + match_obj.start();
1939                let byte_end = line_info.byte_offset + match_obj.end();
1940
1941                // Calculate title byte positions (includes the quote character before content)
1942                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1943                    // The match is the content inside quotes, so we include the quote before
1944                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1945                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1946                    (Some(start), Some(end))
1947                } else {
1948                    (None, None)
1949                };
1950
1951                refs.push(ReferenceDef {
1952                    line: line_num,
1953                    id,
1954                    url,
1955                    title,
1956                    byte_offset,
1957                    byte_end,
1958                    title_byte_start,
1959                    title_byte_end,
1960                });
1961            }
1962        }
1963
1964        refs
1965    }
1966
1967    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1968    /// Handles nested blockquotes like `> > > content`
1969    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1970    #[inline]
1971    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1972        let trimmed_start = line.trim_start();
1973        if !trimmed_start.starts_with('>') {
1974            return None;
1975        }
1976
1977        // Track total prefix length to handle nested blockquotes
1978        let mut remaining = line;
1979        let mut total_prefix_len = 0;
1980
1981        loop {
1982            let trimmed = remaining.trim_start();
1983            if !trimmed.starts_with('>') {
1984                break;
1985            }
1986
1987            // Add leading whitespace + '>' to prefix
1988            let leading_ws_len = remaining.len() - trimmed.len();
1989            total_prefix_len += leading_ws_len + 1;
1990
1991            let after_gt = &trimmed[1..];
1992
1993            // Handle optional whitespace after '>' (space or tab)
1994            if let Some(stripped) = after_gt.strip_prefix(' ') {
1995                total_prefix_len += 1;
1996                remaining = stripped;
1997            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1998                total_prefix_len += 1;
1999                remaining = stripped;
2000            } else {
2001                remaining = after_gt;
2002            }
2003        }
2004
2005        Some((&line[..total_prefix_len], remaining))
2006    }
2007
2008    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2009    ///
2010    /// Returns a HashMap keyed by line byte offset, containing:
2011    /// `(is_ordered, marker, marker_column, content_column, number)`
2012    ///
2013    /// ## Why pulldown-cmark?
2014    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2015    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2016    /// This fixes issue #253 where continuation lines were falsely detected.
2017    ///
2018    /// ## Tab indentation quirk
2019    /// Pulldown-cmark reports nested list items at the newline character position
2020    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2021    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2022    /// We detect this and advance to the correct line.
2023    ///
2024    /// ## HashMap key strategy
2025    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2026    /// that resolve to the same line (after newline adjustment). The first event
2027    /// for each line is authoritative.
2028    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2029    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2030    /// This avoids a separate parse for emphasis detection.
2031    fn detect_list_items_and_emphasis_with_pulldown(
2032        content: &str,
2033        line_offsets: &[usize],
2034        flavor: MarkdownFlavor,
2035        front_matter_end: usize,
2036        code_blocks: &[(usize, usize)],
2037    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2038        use std::collections::HashMap;
2039
2040        let mut list_items = HashMap::new();
2041        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2042
2043        let mut options = Options::empty();
2044        options.insert(Options::ENABLE_TABLES);
2045        options.insert(Options::ENABLE_FOOTNOTES);
2046        options.insert(Options::ENABLE_STRIKETHROUGH);
2047        options.insert(Options::ENABLE_TASKLISTS);
2048        // Always enable GFM features for consistency with existing behavior
2049        options.insert(Options::ENABLE_GFM);
2050
2051        // Suppress unused variable warning
2052        let _ = flavor;
2053
2054        let parser = Parser::new_ext(content, options).into_offset_iter();
2055        let mut list_depth: usize = 0;
2056        let mut list_stack: Vec<bool> = Vec::new();
2057
2058        for (event, range) in parser {
2059            match event {
2060                // Capture emphasis spans (for MD030's emphasis detection)
2061                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2062                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2063                        2
2064                    } else {
2065                        1
2066                    };
2067                    let match_start = range.start;
2068                    let match_end = range.end;
2069
2070                    // Skip if in code block
2071                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2072                        // Determine marker character by looking at the content at the start
2073                        let marker = content[match_start..].chars().next().unwrap_or('*');
2074                        if marker == '*' || marker == '_' {
2075                            // Extract content between markers
2076                            let content_start = match_start + marker_count;
2077                            let content_end = if match_end >= marker_count {
2078                                match_end - marker_count
2079                            } else {
2080                                match_end
2081                            };
2082                            let content_part = if content_start < content_end && content_end <= content.len() {
2083                                &content[content_start..content_end]
2084                            } else {
2085                                ""
2086                            };
2087
2088                            // Find which line this emphasis is on using line_offsets
2089                            let line_idx = match line_offsets.binary_search(&match_start) {
2090                                Ok(idx) => idx,
2091                                Err(idx) => idx.saturating_sub(1),
2092                            };
2093                            let line_num = line_idx + 1;
2094                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2095                            let col_start = match_start - line_start;
2096                            let col_end = match_end - line_start;
2097
2098                            emphasis_spans.push(EmphasisSpan {
2099                                line: line_num,
2100                                start_col: col_start,
2101                                end_col: col_end,
2102                                byte_offset: match_start,
2103                                byte_end: match_end,
2104                                marker,
2105                                marker_count,
2106                                content: content_part.to_string(),
2107                            });
2108                        }
2109                    }
2110                }
2111                Event::Start(Tag::List(start_number)) => {
2112                    list_depth += 1;
2113                    list_stack.push(start_number.is_some());
2114                }
2115                Event::End(TagEnd::List(_)) => {
2116                    list_depth = list_depth.saturating_sub(1);
2117                    list_stack.pop();
2118                }
2119                Event::Start(Tag::Item) if list_depth > 0 => {
2120                    // Get the ordered state for the CURRENT (innermost) list
2121                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2122                    // Find which line this byte offset corresponds to
2123                    let item_start = range.start;
2124
2125                    // Binary search to find the line number
2126                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2127                        Ok(idx) => idx,
2128                        Err(idx) => idx.saturating_sub(1),
2129                    };
2130
2131                    // Pulldown-cmark reports nested list items at the newline before the item
2132                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2133                    // Advance to the actual content line in this case.
2134                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2135                        line_idx += 1;
2136                    }
2137
2138                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2139                    if front_matter_end > 0 && line_idx < front_matter_end {
2140                        continue;
2141                    }
2142
2143                    if line_idx < line_offsets.len() {
2144                        let line_start_byte = line_offsets[line_idx];
2145                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2146                        let line = &content[line_start_byte..line_end.min(content.len())];
2147
2148                        // Strip trailing newline
2149                        let line = line
2150                            .strip_suffix('\n')
2151                            .or_else(|| line.strip_suffix("\r\n"))
2152                            .unwrap_or(line);
2153
2154                        // Strip blockquote prefix if present
2155                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2156                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2157                            (prefix.len(), content)
2158                        } else {
2159                            (0, line)
2160                        };
2161
2162                        // Parse the list marker from the actual line
2163                        if current_list_is_ordered {
2164                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2165                                Self::parse_ordered_list(line_to_parse)
2166                            {
2167                                let marker = format!("{number_str}{delimiter}");
2168                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2169                                let content_column = marker_column + marker.len() + spacing.len();
2170                                let number = number_str.parse().ok();
2171
2172                                list_items.entry(line_start_byte).or_insert((
2173                                    true,
2174                                    marker,
2175                                    marker_column,
2176                                    content_column,
2177                                    number,
2178                                ));
2179                            }
2180                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2181                            Self::parse_unordered_list(line_to_parse)
2182                        {
2183                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2184                            let content_column = marker_column + 1 + spacing.len();
2185
2186                            list_items.entry(line_start_byte).or_insert((
2187                                false,
2188                                marker.to_string(),
2189                                marker_column,
2190                                content_column,
2191                                None,
2192                            ));
2193                        }
2194                    }
2195                }
2196                _ => {}
2197            }
2198        }
2199
2200        (list_items, emphasis_spans)
2201    }
2202
2203    /// Fast unordered list parser - replaces regex for 5-10x speedup
2204    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2205    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2206    #[inline]
2207    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2208        let bytes = line.as_bytes();
2209        let mut i = 0;
2210
2211        // Skip leading whitespace
2212        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2213            i += 1;
2214        }
2215
2216        // Check for marker
2217        if i >= bytes.len() {
2218            return None;
2219        }
2220        let marker = bytes[i] as char;
2221        if marker != '-' && marker != '*' && marker != '+' {
2222            return None;
2223        }
2224        let marker_pos = i;
2225        i += 1;
2226
2227        // Collect spacing after marker (space or tab only)
2228        let spacing_start = i;
2229        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2230            i += 1;
2231        }
2232
2233        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2234    }
2235
2236    /// Fast ordered list parser - replaces regex for 5-10x speedup
2237    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2238    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2239    #[inline]
2240    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2241        let bytes = line.as_bytes();
2242        let mut i = 0;
2243
2244        // Skip leading whitespace
2245        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2246            i += 1;
2247        }
2248
2249        // Collect digits
2250        let number_start = i;
2251        while i < bytes.len() && bytes[i].is_ascii_digit() {
2252            i += 1;
2253        }
2254        if i == number_start {
2255            return None; // No digits found
2256        }
2257
2258        // Check for delimiter
2259        if i >= bytes.len() {
2260            return None;
2261        }
2262        let delimiter = bytes[i] as char;
2263        if delimiter != '.' && delimiter != ')' {
2264            return None;
2265        }
2266        let delimiter_pos = i;
2267        i += 1;
2268
2269        // Collect spacing after delimiter (space or tab only)
2270        let spacing_start = i;
2271        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272            i += 1;
2273        }
2274
2275        Some((
2276            &line[..number_start],
2277            &line[number_start..delimiter_pos],
2278            delimiter,
2279            &line[spacing_start..i],
2280            &line[i..],
2281        ))
2282    }
2283
2284    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2285    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2286    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2287        let num_lines = line_offsets.len();
2288        let mut in_code_block = vec![false; num_lines];
2289
2290        // For each code block, mark all lines within it
2291        for &(start, end) in code_blocks {
2292            // Ensure we're at valid UTF-8 boundaries
2293            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2294                let mut boundary = start;
2295                while boundary > 0 && !content.is_char_boundary(boundary) {
2296                    boundary -= 1;
2297                }
2298                boundary
2299            } else {
2300                start
2301            };
2302
2303            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2304                let mut boundary = end;
2305                while boundary < content.len() && !content.is_char_boundary(boundary) {
2306                    boundary += 1;
2307                }
2308                boundary
2309            } else {
2310                end.min(content.len())
2311            };
2312
2313            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2314            // That function now has proper list context awareness (see code_block_utils.rs)
2315            // and correctly distinguishes between:
2316            // - Fenced code blocks (``` or ~~~)
2317            // - Indented code blocks at document level (4 spaces + blank line before)
2318            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2319            //
2320            // We no longer need to re-validate here. The original validation logic
2321            // was causing false positives by marking list continuation paragraphs as
2322            // code blocks when they have 4 spaces of indentation.
2323
2324            // Use binary search to find the first and last line indices
2325            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2326            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2327            //
2328            // Find the line that CONTAINS safe_start: the line with the largest
2329            // start offset that is <= safe_start. partition_point gives us the
2330            // first line that starts AFTER safe_start, so we subtract 1.
2331            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2332            let first_line = first_line_after.saturating_sub(1);
2333            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2334
2335            // Mark all lines in the range at once
2336            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2337                *flag = true;
2338            }
2339        }
2340
2341        in_code_block
2342    }
2343
2344    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2345    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2346    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2347        let content_lines: Vec<&str> = content.lines().collect();
2348        let num_lines = content_lines.len();
2349        let mut in_math_block = vec![false; num_lines];
2350
2351        let mut inside_math = false;
2352
2353        for (i, line) in content_lines.iter().enumerate() {
2354            // Skip lines that are in code blocks - math delimiters inside code are literal
2355            if code_block_map.get(i).copied().unwrap_or(false) {
2356                continue;
2357            }
2358
2359            let trimmed = line.trim();
2360
2361            // Check for math block delimiter ($$)
2362            // A line with just $$ toggles the math block state
2363            if trimmed == "$$" {
2364                if inside_math {
2365                    // Closing delimiter - this line is still part of the math block
2366                    in_math_block[i] = true;
2367                    inside_math = false;
2368                } else {
2369                    // Opening delimiter - this line starts the math block
2370                    in_math_block[i] = true;
2371                    inside_math = true;
2372                }
2373            } else if inside_math {
2374                // Content inside math block
2375                in_math_block[i] = true;
2376            }
2377        }
2378
2379        in_math_block
2380    }
2381
2382    /// Pre-compute basic line information (without headings/blockquotes)
2383    /// Also returns emphasis spans detected during the pulldown-cmark parse
2384    fn compute_basic_line_info(
2385        content: &str,
2386        line_offsets: &[usize],
2387        code_blocks: &[(usize, usize)],
2388        flavor: MarkdownFlavor,
2389        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2390        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2391        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2392    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2393        let content_lines: Vec<&str> = content.lines().collect();
2394        let mut lines = Vec::with_capacity(content_lines.len());
2395
2396        // Pre-compute which lines are in code blocks
2397        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2398
2399        // Pre-compute which lines are in math blocks ($$ ... $$)
2400        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2401
2402        // Detect front matter boundaries FIRST, before any other parsing
2403        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2404        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2405
2406        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2407        // (context-aware, eliminates false positives)
2408        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2409            content,
2410            line_offsets,
2411            flavor,
2412            front_matter_end,
2413            code_blocks,
2414        );
2415
2416        for (i, line) in content_lines.iter().enumerate() {
2417            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2418            let indent = line.len() - line.trim_start().len();
2419            // Compute visual indent with proper CommonMark tab expansion
2420            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2421
2422            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2423            let blockquote_parse = Self::parse_blockquote_prefix(line);
2424
2425            // For blank detection, consider blockquote context
2426            let is_blank = if let Some((_, content)) = blockquote_parse {
2427                // In blockquote context, check if content after prefix is blank
2428                content.trim().is_empty()
2429            } else {
2430                line.trim().is_empty()
2431            };
2432
2433            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2434            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2435
2436            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2437            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2438                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2439            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2440            // This ensures content after `-->` on the same line is not incorrectly skipped
2441            let line_end_offset = byte_offset + line.len();
2442            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2443                html_comment_ranges,
2444                byte_offset,
2445                line_end_offset,
2446            );
2447            // Use pulldown-cmark's list detection for context-aware parsing
2448            // This eliminates false positives on continuation lines (issue #253)
2449            let list_item =
2450                list_item_map
2451                    .get(&byte_offset)
2452                    .map(
2453                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2454                            marker: marker.clone(),
2455                            is_ordered: *is_ordered,
2456                            number: *number,
2457                            marker_column: *marker_column,
2458                            content_column: *content_column,
2459                        },
2460                    );
2461
2462            // Detect horizontal rules (only outside code blocks and frontmatter)
2463            // Uses CommonMark-compliant check including leading indentation validation
2464            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2465            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2466
2467            // Get math block status for this line
2468            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2469
2470            // Check if line is inside a Quarto div block
2471            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2472                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2473
2474            lines.push(LineInfo {
2475                byte_offset,
2476                byte_len: line.len(),
2477                indent,
2478                visual_indent,
2479                is_blank,
2480                in_code_block,
2481                in_front_matter,
2482                in_html_block: false, // Will be populated after line creation
2483                in_html_comment,
2484                list_item,
2485                heading: None,    // Will be populated in second pass for Setext headings
2486                blockquote: None, // Will be populated after line creation
2487                in_mkdocstrings,
2488                in_esm_block: false, // Will be populated after line creation for MDX files
2489                in_code_span_continuation: false, // Will be populated after code spans are parsed
2490                is_horizontal_rule: is_hr,
2491                in_math_block,
2492                in_quarto_div,
2493                in_jsx_expression: false,  // Will be populated for MDX files
2494                in_mdx_comment: false,     // Will be populated for MDX files
2495                in_jsx_component: false,   // Will be populated for MDX files
2496                in_jsx_fragment: false,    // Will be populated for MDX files
2497                in_admonition: false,      // Will be populated for MkDocs files
2498                in_content_tab: false,     // Will be populated for MkDocs files
2499                in_definition_list: false, // Will be populated for MkDocs files
2500            });
2501        }
2502
2503        (lines, emphasis_spans)
2504    }
2505
2506    /// Detect headings and blockquotes (called after HTML block detection)
2507    fn detect_headings_and_blockquotes(
2508        content: &str,
2509        lines: &mut [LineInfo],
2510        flavor: MarkdownFlavor,
2511        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2512        link_byte_ranges: &[(usize, usize)],
2513    ) {
2514        // Regex for heading detection
2515        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2516            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2517        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2518            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2519
2520        let content_lines: Vec<&str> = content.lines().collect();
2521
2522        // Detect front matter boundaries to skip those lines
2523        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2524
2525        // Detect headings (including Setext which needs look-ahead) and blockquotes
2526        for i in 0..lines.len() {
2527            let line = content_lines[i];
2528
2529            // Detect blockquotes FIRST, before any skip conditions.
2530            // A line can be both a blockquote AND contain a code block inside it.
2531            // We need to know about the blockquote marker regardless of code block status.
2532            // Skip only frontmatter lines - those are never blockquotes.
2533            if !(front_matter_end > 0 && i < front_matter_end)
2534                && let Some(bq) = parse_blockquote_detailed(line)
2535            {
2536                let nesting_level = bq.markers.len();
2537                let marker_column = bq.indent.len();
2538                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2539                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2540                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2541                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2542
2543                lines[i].blockquote = Some(BlockquoteInfo {
2544                    nesting_level,
2545                    indent: bq.indent.to_string(),
2546                    marker_column,
2547                    prefix,
2548                    content: bq.content.to_string(),
2549                    has_no_space_after_marker: has_no_space,
2550                    has_multiple_spaces_after_marker: has_multiple_spaces,
2551                    needs_md028_fix,
2552                });
2553
2554                // Update is_horizontal_rule for blockquote content
2555                // The original detection doesn't strip blockquote prefix, so we need to check here
2556                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2557                    lines[i].is_horizontal_rule = true;
2558                }
2559            }
2560
2561            // Now apply skip conditions for heading detection
2562            if lines[i].in_code_block {
2563                continue;
2564            }
2565
2566            // Skip lines in front matter
2567            if front_matter_end > 0 && i < front_matter_end {
2568                continue;
2569            }
2570
2571            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2572            if lines[i].in_html_block {
2573                continue;
2574            }
2575
2576            // Skip heading detection for blank lines
2577            if lines[i].is_blank {
2578                continue;
2579            }
2580
2581            // Check for ATX headings (but skip MkDocs snippet lines)
2582            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2583            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2584                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2585                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2586            } else {
2587                false
2588            };
2589
2590            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2591                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2592                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2593                    continue;
2594                }
2595                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2596                // This prevents false positives where `#fragment` is detected as a heading
2597                let line_offset = lines[i].byte_offset;
2598                if link_byte_ranges
2599                    .iter()
2600                    .any(|&(start, end)| line_offset > start && line_offset < end)
2601                {
2602                    continue;
2603                }
2604                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2605                let hashes = caps.get(2).map_or("", |m| m.as_str());
2606                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2607                let rest = caps.get(4).map_or("", |m| m.as_str());
2608
2609                let level = hashes.len() as u8;
2610                let marker_column = leading_spaces.len();
2611
2612                // Check for closing sequence, but handle custom IDs that might come after
2613                let (text, has_closing, closing_seq) = {
2614                    // First check if there's a custom ID at the end
2615                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2616                        // Check if this looks like a valid custom ID (ends with })
2617                        if rest[id_start..].trim_end().ends_with('}') {
2618                            // Split off the custom ID
2619                            (&rest[..id_start], &rest[id_start..])
2620                        } else {
2621                            (rest, "")
2622                        }
2623                    } else {
2624                        (rest, "")
2625                    };
2626
2627                    // Now look for closing hashes in the part before the custom ID
2628                    let trimmed_rest = rest_without_id.trim_end();
2629                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2630                        // Find the start of the hash sequence by walking backwards
2631                        // Use char_indices to get byte positions at char boundaries
2632                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2633
2634                        // Find which char index corresponds to last_hash_byte_pos
2635                        let last_hash_char_idx = char_positions
2636                            .iter()
2637                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2638
2639                        if let Some(mut char_idx) = last_hash_char_idx {
2640                            // Walk backwards to find start of hash sequence
2641                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2642                                char_idx -= 1;
2643                            }
2644
2645                            // Get the byte position of the start of hashes
2646                            let start_of_hashes = char_positions[char_idx].0;
2647
2648                            // Check if there's at least one space before the closing hashes
2649                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2650
2651                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2652                            let potential_closing = &trimmed_rest[start_of_hashes..];
2653                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2654
2655                            if is_all_hashes && has_space_before {
2656                                // This is a closing sequence
2657                                let closing_hashes = potential_closing.to_string();
2658                                // The text is everything before the closing hashes
2659                                // Don't include the custom ID here - it will be extracted later
2660                                let text_part = if !custom_id_part.is_empty() {
2661                                    // If we have a custom ID, append it back to get the full rest
2662                                    // This allows the extract_header_id function to handle it properly
2663                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2664                                } else {
2665                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2666                                };
2667                                (text_part, true, closing_hashes)
2668                            } else {
2669                                // Not a valid closing sequence, return the full content
2670                                (rest.to_string(), false, String::new())
2671                            }
2672                        } else {
2673                            // Couldn't find char boundary, return the full content
2674                            (rest.to_string(), false, String::new())
2675                        }
2676                    } else {
2677                        // No hashes found, return the full content
2678                        (rest.to_string(), false, String::new())
2679                    }
2680                };
2681
2682                let content_column = marker_column + hashes.len() + spaces_after.len();
2683
2684                // Extract custom header ID if present
2685                let raw_text = text.trim().to_string();
2686                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2687
2688                // If no custom ID was found on the header line, check the next line for standalone attr-list
2689                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2690                    let next_line = content_lines[i + 1];
2691                    if !lines[i + 1].in_code_block
2692                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2693                        && let Some(next_line_id) =
2694                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2695                    {
2696                        custom_id = Some(next_line_id);
2697                    }
2698                }
2699
2700                // ATX heading is "valid" for processing by heading rules if:
2701                // 1. Has space after # (CommonMark compliant): `# Heading`
2702                // 2. Is empty (just hashes): `#`
2703                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2704                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2705                //
2706                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2707                // - `#tag` - single # with lowercase (social hashtag)
2708                // - `#123` - single # with number (GitHub issue ref)
2709                let is_valid = !spaces_after.is_empty()
2710                    || rest.is_empty()
2711                    || level > 1
2712                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2713
2714                lines[i].heading = Some(HeadingInfo {
2715                    level,
2716                    style: HeadingStyle::ATX,
2717                    marker: hashes.to_string(),
2718                    marker_column,
2719                    content_column,
2720                    text: clean_text,
2721                    custom_id,
2722                    raw_text,
2723                    has_closing_sequence: has_closing,
2724                    closing_sequence: closing_seq,
2725                    is_valid,
2726                });
2727            }
2728            // Check for Setext headings (need to look at next line)
2729            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2730                let next_line = content_lines[i + 1];
2731                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2732                    // Skip if next line is front matter delimiter
2733                    if front_matter_end > 0 && i < front_matter_end {
2734                        continue;
2735                    }
2736
2737                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2738                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2739                    {
2740                        continue;
2741                    }
2742
2743                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2744                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2745                    let content_line = line.trim();
2746
2747                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2748                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2749                        continue;
2750                    }
2751
2752                    // Skip underscore thematic breaks (___)
2753                    if content_line.starts_with('_') {
2754                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2755                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2756                            continue;
2757                        }
2758                    }
2759
2760                    // Skip numbered lists (1. Item, 2. Item, etc.)
2761                    if let Some(first_char) = content_line.chars().next()
2762                        && first_char.is_ascii_digit()
2763                    {
2764                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2765                        if num_end < content_line.len() {
2766                            let next = content_line.chars().nth(num_end);
2767                            if next == Some('.') || next == Some(')') {
2768                                continue;
2769                            }
2770                        }
2771                    }
2772
2773                    // Skip ATX headings
2774                    if ATX_HEADING_REGEX.is_match(line) {
2775                        continue;
2776                    }
2777
2778                    // Skip blockquotes
2779                    if content_line.starts_with('>') {
2780                        continue;
2781                    }
2782
2783                    // Skip code fences
2784                    let trimmed_start = line.trim_start();
2785                    if trimmed_start.len() >= 3 {
2786                        let first_three: String = trimmed_start.chars().take(3).collect();
2787                        if first_three == "```" || first_three == "~~~" {
2788                            continue;
2789                        }
2790                    }
2791
2792                    // Skip HTML blocks
2793                    if content_line.starts_with('<') {
2794                        continue;
2795                    }
2796
2797                    let underline = next_line.trim();
2798
2799                    let level = if underline.starts_with('=') { 1 } else { 2 };
2800                    let style = if level == 1 {
2801                        HeadingStyle::Setext1
2802                    } else {
2803                        HeadingStyle::Setext2
2804                    };
2805
2806                    // Extract custom header ID if present
2807                    let raw_text = line.trim().to_string();
2808                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2809
2810                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2811                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2812                        let attr_line = content_lines[i + 2];
2813                        if !lines[i + 2].in_code_block
2814                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2815                            && let Some(attr_line_id) =
2816                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2817                        {
2818                            custom_id = Some(attr_line_id);
2819                        }
2820                    }
2821
2822                    lines[i].heading = Some(HeadingInfo {
2823                        level,
2824                        style,
2825                        marker: underline.to_string(),
2826                        marker_column: next_line.len() - next_line.trim_start().len(),
2827                        content_column: lines[i].indent,
2828                        text: clean_text,
2829                        custom_id,
2830                        raw_text,
2831                        has_closing_sequence: false,
2832                        closing_sequence: String::new(),
2833                        is_valid: true, // Setext headings are always valid
2834                    });
2835                }
2836            }
2837        }
2838    }
2839
2840    /// Detect HTML blocks in the content
2841    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2842        // HTML block elements that trigger block context
2843        // Includes HTML5 media, embedded content, and interactive elements
2844        const BLOCK_ELEMENTS: &[&str] = &[
2845            "address",
2846            "article",
2847            "aside",
2848            "audio",
2849            "blockquote",
2850            "canvas",
2851            "details",
2852            "dialog",
2853            "dd",
2854            "div",
2855            "dl",
2856            "dt",
2857            "embed",
2858            "fieldset",
2859            "figcaption",
2860            "figure",
2861            "footer",
2862            "form",
2863            "h1",
2864            "h2",
2865            "h3",
2866            "h4",
2867            "h5",
2868            "h6",
2869            "header",
2870            "hr",
2871            "iframe",
2872            "li",
2873            "main",
2874            "menu",
2875            "nav",
2876            "noscript",
2877            "object",
2878            "ol",
2879            "p",
2880            "picture",
2881            "pre",
2882            "script",
2883            "search",
2884            "section",
2885            "source",
2886            "style",
2887            "summary",
2888            "svg",
2889            "table",
2890            "tbody",
2891            "td",
2892            "template",
2893            "textarea",
2894            "tfoot",
2895            "th",
2896            "thead",
2897            "tr",
2898            "track",
2899            "ul",
2900            "video",
2901        ];
2902
2903        let mut i = 0;
2904        while i < lines.len() {
2905            // Skip if already in code block or front matter
2906            if lines[i].in_code_block || lines[i].in_front_matter {
2907                i += 1;
2908                continue;
2909            }
2910
2911            let trimmed = lines[i].content(content).trim_start();
2912
2913            // Check if line starts with an HTML tag
2914            if trimmed.starts_with('<') && trimmed.len() > 1 {
2915                // Extract tag name safely
2916                let after_bracket = &trimmed[1..];
2917                let is_closing = after_bracket.starts_with('/');
2918                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2919
2920                // Extract tag name (stop at space, >, /, or end of string)
2921                let tag_name = tag_start
2922                    .chars()
2923                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2924                    .collect::<String>()
2925                    .to_lowercase();
2926
2927                // Check if it's a block element
2928                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2929                    // Mark this line as in HTML block
2930                    lines[i].in_html_block = true;
2931
2932                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2933                    // This avoids complex nesting logic that might cause infinite loops
2934                    // Only search for closing tag on subsequent lines if the opening tag
2935                    // does NOT have its closing tag on the same line
2936                    if !is_closing {
2937                        let closing_tag = format!("</{tag_name}>");
2938
2939                        // Check if closing tag is on the same line as opening tag
2940                        // (e.g., <script src="..."></script> or <style>.class{}</style>)
2941                        let same_line_close = lines[i].content(content).contains(&closing_tag);
2942
2943                        // Only search subsequent lines if the tag isn't self-closed on this line
2944                        if !same_line_close {
2945                            // style and script tags can contain blank lines (CSS/JS formatting)
2946                            let allow_blank_lines = tag_name == "style" || tag_name == "script";
2947                            let mut j = i + 1;
2948                            let mut found_closing_tag = false;
2949                            while j < lines.len() && j < i + 100 {
2950                                // Limit search to 100 lines
2951                                // Stop at blank lines (except for style/script tags)
2952                                if !allow_blank_lines && lines[j].is_blank {
2953                                    break;
2954                                }
2955
2956                                lines[j].in_html_block = true;
2957
2958                                // Check if this line contains the closing tag
2959                                if lines[j].content(content).contains(&closing_tag) {
2960                                    found_closing_tag = true;
2961                                }
2962
2963                                // After finding closing tag, continue marking lines as
2964                                // in_html_block until blank line (per CommonMark spec)
2965                                if found_closing_tag {
2966                                    j += 1;
2967                                    // Continue marking subsequent lines until blank
2968                                    while j < lines.len() && j < i + 100 {
2969                                        if lines[j].is_blank {
2970                                            break;
2971                                        }
2972                                        lines[j].in_html_block = true;
2973                                        j += 1;
2974                                    }
2975                                    break;
2976                                }
2977                                j += 1;
2978                            }
2979                        }
2980                    }
2981                }
2982            }
2983
2984            i += 1;
2985        }
2986    }
2987
2988    /// Detect ESM import/export blocks anywhere in MDX files
2989    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
2990    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2991        // Only process MDX files
2992        if !flavor.supports_esm_blocks() {
2993            return;
2994        }
2995
2996        let mut in_multiline_import = false;
2997
2998        for line in lines.iter_mut() {
2999            // Skip code blocks, front matter, and HTML comments
3000            if line.in_code_block || line.in_front_matter || line.in_html_comment {
3001                in_multiline_import = false;
3002                continue;
3003            }
3004
3005            let line_content = line.content(content);
3006            let trimmed = line_content.trim();
3007
3008            // Handle continuation of multi-line import/export
3009            if in_multiline_import {
3010                line.in_esm_block = true;
3011                // Check if this line completes the statement
3012                // Multi-line import ends when we see the closing quote + optional semicolon
3013                if trimmed.ends_with('\'')
3014                    || trimmed.ends_with('"')
3015                    || trimmed.ends_with("';")
3016                    || trimmed.ends_with("\";")
3017                    || line_content.contains(';')
3018                {
3019                    in_multiline_import = false;
3020                }
3021                continue;
3022            }
3023
3024            // Skip blank lines
3025            if line.is_blank {
3026                continue;
3027            }
3028
3029            // Check if line starts with import or export
3030            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3031                line.in_esm_block = true;
3032
3033                // Determine if this is a complete single-line statement or starts a multi-line one
3034                // Multi-line imports look like:
3035                //   import {
3036                //     Foo,
3037                //     Bar
3038                //   } from 'module'
3039                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3040                let is_import = trimmed.starts_with("import ");
3041
3042                // Check for simple complete statements
3043                let is_complete =
3044                    // Ends with semicolon
3045                    trimmed.ends_with(';')
3046                    // import/export with from clause that ends with quote
3047                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3048                    // Simple export (export const/let/var/function/class without from)
3049                    || (!is_import && !trimmed.contains(" from ") && (
3050                        trimmed.starts_with("export const ")
3051                        || trimmed.starts_with("export let ")
3052                        || trimmed.starts_with("export var ")
3053                        || trimmed.starts_with("export function ")
3054                        || trimmed.starts_with("export class ")
3055                        || trimmed.starts_with("export default ")
3056                    ));
3057
3058                if !is_complete && is_import {
3059                    // Only imports can span multiple lines in the typical case
3060                    // Check if it looks like the start of a multi-line import
3061                    // e.g., "import {" or "import type {"
3062                    if trimmed.contains('{') && !trimmed.contains('}') {
3063                        in_multiline_import = true;
3064                    }
3065                }
3066            }
3067        }
3068    }
3069
3070    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3071    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3072    fn detect_jsx_and_mdx_comments(
3073        content: &str,
3074        lines: &mut [LineInfo],
3075        flavor: MarkdownFlavor,
3076        code_blocks: &[(usize, usize)],
3077    ) -> (ByteRanges, ByteRanges) {
3078        // Only process MDX files
3079        if !flavor.supports_jsx() {
3080            return (Vec::new(), Vec::new());
3081        }
3082
3083        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3084        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3085
3086        // Quick check - if no braces, no JSX expressions or MDX comments
3087        if !content.contains('{') {
3088            return (jsx_expression_ranges, mdx_comment_ranges);
3089        }
3090
3091        let bytes = content.as_bytes();
3092        let mut i = 0;
3093
3094        while i < bytes.len() {
3095            if bytes[i] == b'{' {
3096                // Check if we're in a code block
3097                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3098                    i += 1;
3099                    continue;
3100                }
3101
3102                let start = i;
3103
3104                // Check if it's an MDX comment: {/* ... */}
3105                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3106                    // Find the closing */}
3107                    let mut j = i + 3;
3108                    while j + 2 < bytes.len() {
3109                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3110                            let end = j + 3;
3111                            mdx_comment_ranges.push((start, end));
3112
3113                            // Mark lines as in MDX comment
3114                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3115                                line.in_mdx_comment = true;
3116                            });
3117
3118                            i = end;
3119                            break;
3120                        }
3121                        j += 1;
3122                    }
3123                    if j + 2 >= bytes.len() {
3124                        // Unclosed MDX comment - mark rest as comment
3125                        mdx_comment_ranges.push((start, bytes.len()));
3126                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3127                            line.in_mdx_comment = true;
3128                        });
3129                        break;
3130                    }
3131                } else {
3132                    // Regular JSX expression: { ... }
3133                    // Need to handle nested braces
3134                    let mut brace_depth = 1;
3135                    let mut j = i + 1;
3136                    let mut in_string = false;
3137                    let mut string_char = b'"';
3138
3139                    while j < bytes.len() && brace_depth > 0 {
3140                        let c = bytes[j];
3141
3142                        // Handle strings to avoid counting braces inside them
3143                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3144                            in_string = true;
3145                            string_char = c;
3146                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3147                            in_string = false;
3148                        } else if !in_string {
3149                            if c == b'{' {
3150                                brace_depth += 1;
3151                            } else if c == b'}' {
3152                                brace_depth -= 1;
3153                            }
3154                        }
3155                        j += 1;
3156                    }
3157
3158                    if brace_depth == 0 {
3159                        let end = j;
3160                        jsx_expression_ranges.push((start, end));
3161
3162                        // Mark lines as in JSX expression
3163                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3164                            line.in_jsx_expression = true;
3165                        });
3166
3167                        i = end;
3168                    } else {
3169                        i += 1;
3170                    }
3171                }
3172            } else {
3173                i += 1;
3174            }
3175        }
3176
3177        (jsx_expression_ranges, mdx_comment_ranges)
3178    }
3179
3180    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3181    /// and populate the corresponding fields in LineInfo
3182    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3183        if flavor != MarkdownFlavor::MkDocs {
3184            return;
3185        }
3186
3187        use crate::utils::mkdocs_admonitions;
3188        use crate::utils::mkdocs_definition_lists;
3189        use crate::utils::mkdocs_tabs;
3190
3191        let content_lines: Vec<&str> = content.lines().collect();
3192
3193        // Track admonition context
3194        let mut in_admonition = false;
3195        let mut admonition_indent = 0;
3196
3197        // Track tab context
3198        let mut in_tab = false;
3199        let mut tab_indent = 0;
3200
3201        // Track definition list context
3202        let mut in_definition = false;
3203
3204        for (i, line) in content_lines.iter().enumerate() {
3205            if i >= lines.len() {
3206                break;
3207            }
3208
3209            // Skip lines in code blocks
3210            if lines[i].in_code_block {
3211                continue;
3212            }
3213
3214            // Check for admonition markers
3215            if mkdocs_admonitions::is_admonition_start(line) {
3216                in_admonition = true;
3217                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3218                lines[i].in_admonition = true;
3219            } else if in_admonition {
3220                // Check if still in admonition content
3221                if line.trim().is_empty() {
3222                    // Blank lines are part of admonitions
3223                    lines[i].in_admonition = true;
3224                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3225                    lines[i].in_admonition = true;
3226                } else {
3227                    // End of admonition
3228                    in_admonition = false;
3229                    // Check if this line starts a new admonition
3230                    if mkdocs_admonitions::is_admonition_start(line) {
3231                        in_admonition = true;
3232                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3233                        lines[i].in_admonition = true;
3234                    }
3235                }
3236            }
3237
3238            // Check for tab markers
3239            if mkdocs_tabs::is_tab_marker(line) {
3240                in_tab = true;
3241                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3242                lines[i].in_content_tab = true;
3243            } else if in_tab {
3244                // Check if still in tab content
3245                if line.trim().is_empty() {
3246                    // Blank lines are part of tabs
3247                    lines[i].in_content_tab = true;
3248                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3249                    lines[i].in_content_tab = true;
3250                } else {
3251                    // End of tab content
3252                    in_tab = false;
3253                    // Check if this line starts a new tab
3254                    if mkdocs_tabs::is_tab_marker(line) {
3255                        in_tab = true;
3256                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3257                        lines[i].in_content_tab = true;
3258                    }
3259                }
3260            }
3261
3262            // Check for definition list items
3263            if mkdocs_definition_lists::is_definition_line(line) {
3264                in_definition = true;
3265                lines[i].in_definition_list = true;
3266            } else if in_definition {
3267                // Check if continuation
3268                if mkdocs_definition_lists::is_definition_continuation(line) {
3269                    lines[i].in_definition_list = true;
3270                } else if line.trim().is_empty() {
3271                    // Blank line might continue definition
3272                    lines[i].in_definition_list = true;
3273                } else if mkdocs_definition_lists::could_be_term_line(line) {
3274                    // This could be a new term - check if followed by definition
3275                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3276                    {
3277                        lines[i].in_definition_list = true;
3278                    } else {
3279                        in_definition = false;
3280                    }
3281                } else {
3282                    in_definition = false;
3283                }
3284            } else if mkdocs_definition_lists::could_be_term_line(line) {
3285                // Check if this is a term followed by a definition
3286                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3287                    lines[i].in_definition_list = true;
3288                    in_definition = true;
3289                }
3290            }
3291        }
3292    }
3293
3294    /// Helper to mark lines within a byte range
3295    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3296    where
3297        F: FnMut(&mut LineInfo),
3298    {
3299        // Find lines that overlap with the range
3300        for line in lines.iter_mut() {
3301            let line_start = line.byte_offset;
3302            let line_end = line.byte_offset + line.byte_len;
3303
3304            // Check if this line overlaps with the range
3305            if line_start < end && line_end > start {
3306                f(line);
3307            }
3308        }
3309
3310        // Silence unused warning for content (needed for signature consistency)
3311        let _ = content;
3312    }
3313
3314    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3315    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3316        // Quick check - if no backticks, no code spans
3317        if !content.contains('`') {
3318            return Vec::new();
3319        }
3320
3321        // Use pulldown-cmark's streaming parser with byte offsets
3322        let parser = Parser::new(content).into_offset_iter();
3323        let mut ranges = Vec::new();
3324
3325        for (event, range) in parser {
3326            if let Event::Code(_) = event {
3327                ranges.push((range.start, range.end));
3328            }
3329        }
3330
3331        Self::build_code_spans_from_ranges(content, lines, &ranges)
3332    }
3333
3334    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3335        let mut code_spans = Vec::new();
3336        if ranges.is_empty() {
3337            return code_spans;
3338        }
3339
3340        for &(start_pos, end_pos) in ranges {
3341            // The range includes the backticks, extract the actual content
3342            let full_span = &content[start_pos..end_pos];
3343            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3344
3345            // Extract content between backticks, preserving spaces
3346            let content_start = start_pos + backtick_count;
3347            let content_end = end_pos - backtick_count;
3348            let span_content = if content_start < content_end {
3349                content[content_start..content_end].to_string()
3350            } else {
3351                String::new()
3352            };
3353
3354            // Use binary search to find line number - O(log n) instead of O(n)
3355            // Find the rightmost line whose byte_offset <= start_pos
3356            let line_idx = lines
3357                .partition_point(|line| line.byte_offset <= start_pos)
3358                .saturating_sub(1);
3359            let line_num = line_idx + 1;
3360            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3361
3362            // Find end column using binary search
3363            let end_line_idx = lines
3364                .partition_point(|line| line.byte_offset <= end_pos)
3365                .saturating_sub(1);
3366            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3367
3368            // Convert byte offsets to character positions for correct Unicode handling
3369            // This ensures consistency with warning.column which uses character positions
3370            let line_content = lines[line_idx].content(content);
3371            let col_start = if byte_col_start <= line_content.len() {
3372                line_content[..byte_col_start].chars().count()
3373            } else {
3374                line_content.chars().count()
3375            };
3376
3377            let end_line_content = lines[end_line_idx].content(content);
3378            let col_end = if byte_col_end <= end_line_content.len() {
3379                end_line_content[..byte_col_end].chars().count()
3380            } else {
3381                end_line_content.chars().count()
3382            };
3383
3384            code_spans.push(CodeSpan {
3385                line: line_num,
3386                end_line: end_line_idx + 1,
3387                start_col: col_start,
3388                end_col: col_end,
3389                byte_offset: start_pos,
3390                byte_end: end_pos,
3391                backtick_count,
3392                content: span_content,
3393            });
3394        }
3395
3396        // Sort by position to ensure consistent ordering
3397        code_spans.sort_by_key(|span| span.byte_offset);
3398
3399        code_spans
3400    }
3401
3402    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3403    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3404        let mut math_spans = Vec::new();
3405
3406        // Quick check - if no $ signs, no math spans
3407        if !content.contains('$') {
3408            return math_spans;
3409        }
3410
3411        // Use pulldown-cmark with ENABLE_MATH option
3412        let mut options = Options::empty();
3413        options.insert(Options::ENABLE_MATH);
3414        let parser = Parser::new_ext(content, options).into_offset_iter();
3415
3416        for (event, range) in parser {
3417            let (is_display, math_content) = match &event {
3418                Event::InlineMath(text) => (false, text.as_ref()),
3419                Event::DisplayMath(text) => (true, text.as_ref()),
3420                _ => continue,
3421            };
3422
3423            let start_pos = range.start;
3424            let end_pos = range.end;
3425
3426            // Use binary search to find line number - O(log n) instead of O(n)
3427            let line_idx = lines
3428                .partition_point(|line| line.byte_offset <= start_pos)
3429                .saturating_sub(1);
3430            let line_num = line_idx + 1;
3431            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3432
3433            // Find end column using binary search
3434            let end_line_idx = lines
3435                .partition_point(|line| line.byte_offset <= end_pos)
3436                .saturating_sub(1);
3437            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3438
3439            // Convert byte offsets to character positions for correct Unicode handling
3440            let line_content = lines[line_idx].content(content);
3441            let col_start = if byte_col_start <= line_content.len() {
3442                line_content[..byte_col_start].chars().count()
3443            } else {
3444                line_content.chars().count()
3445            };
3446
3447            let end_line_content = lines[end_line_idx].content(content);
3448            let col_end = if byte_col_end <= end_line_content.len() {
3449                end_line_content[..byte_col_end].chars().count()
3450            } else {
3451                end_line_content.chars().count()
3452            };
3453
3454            math_spans.push(MathSpan {
3455                line: line_num,
3456                end_line: end_line_idx + 1,
3457                start_col: col_start,
3458                end_col: col_end,
3459                byte_offset: start_pos,
3460                byte_end: end_pos,
3461                is_display,
3462                content: math_content.to_string(),
3463            });
3464        }
3465
3466        // Sort by position to ensure consistent ordering
3467        math_spans.sort_by_key(|span| span.byte_offset);
3468
3469        math_spans
3470    }
3471
3472    /// Parse all list blocks in the content (legacy line-by-line approach)
3473    ///
3474    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3475    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3476    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3477    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3478    ///   treated as list continuation (based on the list marker width)
3479    ///
3480    /// When a new list item is encountered, we check if list-breaking content was seen
3481    /// since the last item. If so, we start a new list block.
3482    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3483        // Minimum indentation for unordered list continuation per CommonMark spec
3484        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3485
3486        /// Initialize or reset the forward-scanning tracking state.
3487        /// This helper eliminates code duplication across three initialization sites.
3488        #[inline]
3489        fn reset_tracking_state(
3490            list_item: &ListItemInfo,
3491            has_list_breaking_content: &mut bool,
3492            min_continuation: &mut usize,
3493        ) {
3494            *has_list_breaking_content = false;
3495            let marker_width = if list_item.is_ordered {
3496                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3497            } else {
3498                list_item.marker.len()
3499            };
3500            *min_continuation = if list_item.is_ordered {
3501                marker_width
3502            } else {
3503                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3504            };
3505        }
3506
3507        // Pre-size based on lines that could be list items
3508        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3509        let mut current_block: Option<ListBlock> = None;
3510        let mut last_list_item_line = 0;
3511        let mut current_indent_level = 0;
3512        let mut last_marker_width = 0;
3513
3514        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3515        let mut has_list_breaking_content_since_last_item = false;
3516        let mut min_continuation_for_tracking = 0;
3517
3518        for (line_idx, line_info) in lines.iter().enumerate() {
3519            let line_num = line_idx + 1;
3520
3521            // Enhanced code block handling using Design #3's context analysis
3522            if line_info.in_code_block {
3523                if let Some(ref mut block) = current_block {
3524                    // Calculate minimum indentation for list continuation
3525                    let min_continuation_indent =
3526                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3527
3528                    // Analyze code block context using the three-tier classification
3529                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3530
3531                    match context {
3532                        CodeBlockContext::Indented => {
3533                            // Code block is properly indented - continues the list
3534                            block.end_line = line_num;
3535                            continue;
3536                        }
3537                        CodeBlockContext::Standalone => {
3538                            // Code block separates lists - end current block
3539                            let completed_block = current_block.take().unwrap();
3540                            list_blocks.push(completed_block);
3541                            continue;
3542                        }
3543                        CodeBlockContext::Adjacent => {
3544                            // Edge case - use conservative behavior (continue list)
3545                            block.end_line = line_num;
3546                            continue;
3547                        }
3548                    }
3549                } else {
3550                    // No current list block - skip code block lines
3551                    continue;
3552                }
3553            }
3554
3555            // Extract blockquote prefix if any
3556            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3557                caps.get(0).unwrap().as_str().to_string()
3558            } else {
3559                String::new()
3560            };
3561
3562            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3563            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3564            if let Some(ref block) = current_block
3565                && line_info.list_item.is_none()
3566                && !line_info.is_blank
3567                && !line_info.in_code_span_continuation
3568            {
3569                let line_content = line_info.content(content).trim();
3570
3571                // Check for structural separators that break lists
3572                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3573                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3574                // as they indicate improper indentation rather than lazy continuation.
3575                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3576
3577                // Check if blockquote context changes (different prefix than current block)
3578                // Lines within the SAME blockquote context don't break lists
3579                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3580
3581                let breaks_list = line_info.heading.is_some()
3582                    || line_content.starts_with("---")
3583                    || line_content.starts_with("***")
3584                    || line_content.starts_with("___")
3585                    || crate::utils::skip_context::is_table_line(line_content)
3586                    || blockquote_prefix_changes
3587                    || (line_info.indent > 0
3588                        && line_info.indent < min_continuation_for_tracking
3589                        && !is_lazy_continuation);
3590
3591                if breaks_list {
3592                    has_list_breaking_content_since_last_item = true;
3593                }
3594            }
3595
3596            // If this line is a code span continuation within an active list block,
3597            // extend the block's end_line to include this line (maintains list continuity)
3598            if line_info.in_code_span_continuation
3599                && line_info.list_item.is_none()
3600                && let Some(ref mut block) = current_block
3601            {
3602                block.end_line = line_num;
3603            }
3604
3605            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3606            // properly indented lines within the list). This ensures the workaround at line 2448
3607            // works correctly when there are multiple continuation lines before a nested list item.
3608            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3609            // For blockquote lines, compute effective indent after stripping the prefix
3610            let effective_continuation_indent = if let Some(ref block) = current_block {
3611                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3612                let line_content = line_info.content(content);
3613                let line_bq_level = line_content
3614                    .chars()
3615                    .take_while(|c| *c == '>' || c.is_whitespace())
3616                    .filter(|&c| c == '>')
3617                    .count();
3618                if line_bq_level > 0 && line_bq_level == block_bq_level {
3619                    // Compute indent after blockquote markers
3620                    let mut pos = 0;
3621                    let mut found_markers = 0;
3622                    for c in line_content.chars() {
3623                        pos += c.len_utf8();
3624                        if c == '>' {
3625                            found_markers += 1;
3626                            if found_markers == line_bq_level {
3627                                if line_content.get(pos..pos + 1) == Some(" ") {
3628                                    pos += 1;
3629                                }
3630                                break;
3631                            }
3632                        }
3633                    }
3634                    let after_bq = &line_content[pos..];
3635                    after_bq.len() - after_bq.trim_start().len()
3636                } else {
3637                    line_info.indent
3638                }
3639            } else {
3640                line_info.indent
3641            };
3642            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3643                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3644                if block_bq_level > 0 {
3645                    if block.is_ordered { last_marker_width } else { 2 }
3646                } else {
3647                    min_continuation_for_tracking
3648                }
3649            } else {
3650                min_continuation_for_tracking
3651            };
3652            // Lazy continuation allows unindented text to continue a list item,
3653            // but NOT structural elements like headings, code fences, or horizontal rules
3654            let is_structural_element = line_info.heading.is_some()
3655                || line_info.content(content).trim().starts_with("```")
3656                || line_info.content(content).trim().starts_with("~~~");
3657            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3658                || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3659
3660            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3661                eprintln!(
3662                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3663                    line_num,
3664                    effective_continuation_indent,
3665                    adjusted_min_continuation_for_tracking,
3666                    is_valid_continuation,
3667                    line_info.in_code_span_continuation,
3668                    line_info.in_code_block,
3669                    current_block.is_some()
3670                );
3671            }
3672
3673            if !line_info.in_code_span_continuation
3674                && line_info.list_item.is_none()
3675                && !line_info.is_blank
3676                && !line_info.in_code_block
3677                && is_valid_continuation
3678                && let Some(ref mut block) = current_block
3679            {
3680                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3681                    eprintln!(
3682                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3683                        line_num, block.end_line, line_num
3684                    );
3685                }
3686                block.end_line = line_num;
3687            }
3688
3689            // Check if this line is a list item
3690            if let Some(list_item) = &line_info.list_item {
3691                // Calculate nesting level based on indentation
3692                let item_indent = list_item.marker_column;
3693                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3694
3695                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3696                    eprintln!(
3697                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3698                        line_num, list_item.marker, item_indent
3699                    );
3700                }
3701
3702                if let Some(ref mut block) = current_block {
3703                    // Check if this continues the current block
3704                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3705                    // or a continuation at the same or lower level
3706                    let is_nested = nesting > block.nesting_level;
3707                    let same_type =
3708                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3709                    let same_context = block.blockquote_prefix == blockquote_prefix;
3710                    // Allow one blank line after last item, or lines immediately after block content
3711                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3712
3713                    // For unordered lists, also check marker consistency
3714                    let marker_compatible =
3715                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3716
3717                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3718                    // This eliminates the quadratic bottleneck from issue #148
3719                    let has_non_list_content = has_list_breaking_content_since_last_item;
3720
3721                    // A list continues if:
3722                    // 1. It's a nested item (indented more than the parent), OR
3723                    // 2. It's the same type at the same level with reasonable distance
3724                    let mut continues_list = if is_nested {
3725                        // Nested items always continue the list if they're in the same context
3726                        same_context && reasonable_distance && !has_non_list_content
3727                    } else {
3728                        // Same-level items need to match type and markers
3729                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3730                    };
3731
3732                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3733                        eprintln!(
3734                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3735                            line_num,
3736                            continues_list,
3737                            is_nested,
3738                            same_type,
3739                            same_context,
3740                            reasonable_distance,
3741                            marker_compatible,
3742                            has_non_list_content,
3743                            last_list_item_line,
3744                            block.end_line
3745                        );
3746                    }
3747
3748                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3749                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3750                    // Apply for: nested items (different types OK), OR same-level same-type items
3751                    if !continues_list
3752                        && (is_nested || same_type)
3753                        && reasonable_distance
3754                        && line_num > 0
3755                        && block.end_line == line_num - 1
3756                    {
3757                        // Check if the previous line was a list item or a continuation of a list item
3758                        // (including lazy continuation lines)
3759                        if block.item_lines.contains(&(line_num - 1)) {
3760                            // They're consecutive list items - force them to be in the same list
3761                            continues_list = true;
3762                        } else {
3763                            // Previous line is a continuation line within this block
3764                            // (e.g., lazy continuation with indent=0)
3765                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3766                            continues_list = true;
3767                        }
3768                    }
3769
3770                    if continues_list {
3771                        // Extend current block
3772                        block.end_line = line_num;
3773                        block.item_lines.push(line_num);
3774
3775                        // Update max marker width
3776                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3777                            list_item.marker.len() + 1
3778                        } else {
3779                            list_item.marker.len()
3780                        });
3781
3782                        // Update marker consistency for unordered lists
3783                        if !block.is_ordered
3784                            && block.marker.is_some()
3785                            && block.marker.as_ref() != Some(&list_item.marker)
3786                        {
3787                            // Mixed markers, clear the marker field
3788                            block.marker = None;
3789                        }
3790
3791                        // Reset tracked state for issue #148 optimization
3792                        reset_tracking_state(
3793                            list_item,
3794                            &mut has_list_breaking_content_since_last_item,
3795                            &mut min_continuation_for_tracking,
3796                        );
3797                    } else {
3798                        // End current block and start a new one
3799                        // When a different list type starts AT THE SAME LEVEL (not nested),
3800                        // trim back lazy continuation lines (they become part of the gap, not the list)
3801                        // For nested items, different types are fine - they're sub-lists
3802                        if !same_type
3803                            && !is_nested
3804                            && let Some(&last_item) = block.item_lines.last()
3805                        {
3806                            block.end_line = last_item;
3807                        }
3808
3809                        list_blocks.push(block.clone());
3810
3811                        *block = ListBlock {
3812                            start_line: line_num,
3813                            end_line: line_num,
3814                            is_ordered: list_item.is_ordered,
3815                            marker: if list_item.is_ordered {
3816                                None
3817                            } else {
3818                                Some(list_item.marker.clone())
3819                            },
3820                            blockquote_prefix: blockquote_prefix.clone(),
3821                            item_lines: vec![line_num],
3822                            nesting_level: nesting,
3823                            max_marker_width: if list_item.is_ordered {
3824                                list_item.marker.len() + 1
3825                            } else {
3826                                list_item.marker.len()
3827                            },
3828                        };
3829
3830                        // Initialize tracked state for new block (issue #148 optimization)
3831                        reset_tracking_state(
3832                            list_item,
3833                            &mut has_list_breaking_content_since_last_item,
3834                            &mut min_continuation_for_tracking,
3835                        );
3836                    }
3837                } else {
3838                    // Start a new block
3839                    current_block = Some(ListBlock {
3840                        start_line: line_num,
3841                        end_line: line_num,
3842                        is_ordered: list_item.is_ordered,
3843                        marker: if list_item.is_ordered {
3844                            None
3845                        } else {
3846                            Some(list_item.marker.clone())
3847                        },
3848                        blockquote_prefix,
3849                        item_lines: vec![line_num],
3850                        nesting_level: nesting,
3851                        max_marker_width: list_item.marker.len(),
3852                    });
3853
3854                    // Initialize tracked state for new block (issue #148 optimization)
3855                    reset_tracking_state(
3856                        list_item,
3857                        &mut has_list_breaking_content_since_last_item,
3858                        &mut min_continuation_for_tracking,
3859                    );
3860                }
3861
3862                last_list_item_line = line_num;
3863                current_indent_level = item_indent;
3864                last_marker_width = if list_item.is_ordered {
3865                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3866                } else {
3867                    list_item.marker.len()
3868                };
3869            } else if let Some(ref mut block) = current_block {
3870                // Not a list item - check if it continues the current block
3871                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3872                    eprintln!(
3873                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3874                        line_num, line_info.is_blank
3875                    );
3876                }
3877
3878                // For MD032 compatibility, we use a simple approach:
3879                // - Indented lines continue the list
3880                // - Blank lines followed by indented content continue the list
3881                // - Everything else ends the list
3882
3883                // Check if the last line in the list block ended with a backslash (hard line break)
3884                // This handles cases where list items use backslash for hard line breaks
3885                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3886                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3887                } else {
3888                    false
3889                };
3890
3891                // Calculate minimum indentation for list continuation
3892                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3893                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3894                let min_continuation_indent = if block.is_ordered {
3895                    current_indent_level + last_marker_width
3896                } else {
3897                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3898                };
3899
3900                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3901                    // Indented line or backslash continuation continues the list
3902                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3903                        eprintln!(
3904                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3905                            line_num, line_info.indent, min_continuation_indent
3906                        );
3907                    }
3908                    block.end_line = line_num;
3909                } else if line_info.is_blank {
3910                    // Blank line - check if it's internal to the list or ending it
3911                    // We only include blank lines that are followed by more list content
3912                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3913                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3914                    }
3915                    let mut check_idx = line_idx + 1;
3916                    let mut found_continuation = false;
3917
3918                    // Skip additional blank lines
3919                    while check_idx < lines.len() && lines[check_idx].is_blank {
3920                        check_idx += 1;
3921                    }
3922
3923                    if check_idx < lines.len() {
3924                        let next_line = &lines[check_idx];
3925                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3926                        let next_content = next_line.content(content);
3927                        // Use blockquote level (count of >) to compare, not the full prefix
3928                        // This avoids issues where the regex captures extra whitespace
3929                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3930                        let next_bq_level_for_indent = next_content
3931                            .chars()
3932                            .take_while(|c| *c == '>' || c.is_whitespace())
3933                            .filter(|&c| c == '>')
3934                            .count();
3935                        let effective_indent =
3936                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3937                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3938                                // Find position after ">" and one space
3939                                let mut pos = 0;
3940                                let mut found_markers = 0;
3941                                for c in next_content.chars() {
3942                                    pos += c.len_utf8();
3943                                    if c == '>' {
3944                                        found_markers += 1;
3945                                        if found_markers == next_bq_level_for_indent {
3946                                            // Skip optional space after last >
3947                                            if next_content.get(pos..pos + 1) == Some(" ") {
3948                                                pos += 1;
3949                                            }
3950                                            break;
3951                                        }
3952                                    }
3953                                }
3954                                let after_blockquote_marker = &next_content[pos..];
3955                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3956                            } else {
3957                                next_line.indent
3958                            };
3959                        // Also adjust min_continuation_indent for blockquote lists
3960                        // The marker_column includes blockquote prefix, so subtract it
3961                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3962                            // For blockquote lists, the continuation is relative to blockquote content
3963                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3964                            if block.is_ordered { last_marker_width } else { 2 }
3965                        } else {
3966                            min_continuation_indent
3967                        };
3968                        // Check if followed by indented content (list continuation)
3969                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3970                            eprintln!(
3971                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3972                                line_num,
3973                                check_idx + 1,
3974                                effective_indent,
3975                                adjusted_min_continuation,
3976                                next_line.list_item.is_some(),
3977                                next_line.in_code_block
3978                            );
3979                        }
3980                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3981                            found_continuation = true;
3982                        }
3983                        // Check if followed by another list item at the same level
3984                        else if !next_line.in_code_block
3985                            && next_line.list_item.is_some()
3986                            && let Some(item) = &next_line.list_item
3987                        {
3988                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3989                                .find(next_line.content(content))
3990                                .map_or(String::new(), |m| m.as_str().to_string());
3991                            if item.marker_column == current_indent_level
3992                                && item.is_ordered == block.is_ordered
3993                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3994                            {
3995                                // Check if there was meaningful content between the list items (unused now)
3996                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3997                                // Pre-compute block's blockquote level for use in closures
3998                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3999                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4000                                    if let Some(between_line) = lines.get(idx) {
4001                                        let between_content = between_line.content(content);
4002                                        let trimmed = between_content.trim();
4003                                        // Skip empty lines
4004                                        if trimmed.is_empty() {
4005                                            return false;
4006                                        }
4007                                        // Check for meaningful content
4008                                        let line_indent = between_content.len() - between_content.trim_start().len();
4009
4010                                        // Check if blockquote level changed (not just if line starts with ">")
4011                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4012                                            .find(between_content)
4013                                            .map_or(String::new(), |m| m.as_str().to_string());
4014                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4015                                        let blockquote_level_changed =
4016                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4017
4018                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4019                                        if trimmed.starts_with("```")
4020                                            || trimmed.starts_with("~~~")
4021                                            || trimmed.starts_with("---")
4022                                            || trimmed.starts_with("***")
4023                                            || trimmed.starts_with("___")
4024                                            || blockquote_level_changed
4025                                            || crate::utils::skip_context::is_table_line(trimmed)
4026                                            || between_line.heading.is_some()
4027                                        {
4028                                            return true; // These are structural separators - meaningful content that breaks lists
4029                                        }
4030
4031                                        // Only properly indented content continues the list
4032                                        line_indent >= min_continuation_indent
4033                                    } else {
4034                                        false
4035                                    }
4036                                });
4037
4038                                if block.is_ordered {
4039                                    // For ordered lists: don't continue if there are structural separators
4040                                    // Check if there are structural separators between the list items
4041                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4042                                        if let Some(between_line) = lines.get(idx) {
4043                                            let between_content = between_line.content(content);
4044                                            let trimmed = between_content.trim();
4045                                            if trimmed.is_empty() {
4046                                                return false;
4047                                            }
4048                                            // Check if blockquote level changed (not just if line starts with ">")
4049                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4050                                                .find(between_content)
4051                                                .map_or(String::new(), |m| m.as_str().to_string());
4052                                            let between_bq_level =
4053                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4054                                            let blockquote_level_changed =
4055                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4056                                            // Check for structural separators that break lists
4057                                            trimmed.starts_with("```")
4058                                                || trimmed.starts_with("~~~")
4059                                                || trimmed.starts_with("---")
4060                                                || trimmed.starts_with("***")
4061                                                || trimmed.starts_with("___")
4062                                                || blockquote_level_changed
4063                                                || crate::utils::skip_context::is_table_line(trimmed)
4064                                                || between_line.heading.is_some()
4065                                        } else {
4066                                            false
4067                                        }
4068                                    });
4069                                    found_continuation = !has_structural_separators;
4070                                } else {
4071                                    // For unordered lists: also check for structural separators
4072                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4073                                        if let Some(between_line) = lines.get(idx) {
4074                                            let between_content = between_line.content(content);
4075                                            let trimmed = between_content.trim();
4076                                            if trimmed.is_empty() {
4077                                                return false;
4078                                            }
4079                                            // Check if blockquote level changed (not just if line starts with ">")
4080                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4081                                                .find(between_content)
4082                                                .map_or(String::new(), |m| m.as_str().to_string());
4083                                            let between_bq_level =
4084                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4085                                            let blockquote_level_changed =
4086                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4087                                            // Check for structural separators that break lists
4088                                            trimmed.starts_with("```")
4089                                                || trimmed.starts_with("~~~")
4090                                                || trimmed.starts_with("---")
4091                                                || trimmed.starts_with("***")
4092                                                || trimmed.starts_with("___")
4093                                                || blockquote_level_changed
4094                                                || crate::utils::skip_context::is_table_line(trimmed)
4095                                                || between_line.heading.is_some()
4096                                        } else {
4097                                            false
4098                                        }
4099                                    });
4100                                    found_continuation = !has_structural_separators;
4101                                }
4102                            }
4103                        }
4104                    }
4105
4106                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4107                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4108                    }
4109                    if found_continuation {
4110                        // Include the blank line in the block
4111                        block.end_line = line_num;
4112                    } else {
4113                        // Blank line ends the list - don't include it
4114                        list_blocks.push(block.clone());
4115                        current_block = None;
4116                    }
4117                } else {
4118                    // Check for lazy continuation - non-indented line immediately after a list item
4119                    // But only if the line has sufficient indentation for the list type
4120                    let min_required_indent = if block.is_ordered {
4121                        current_indent_level + last_marker_width
4122                    } else {
4123                        current_indent_level + 2
4124                    };
4125
4126                    // For lazy continuation to apply, the line must either:
4127                    // 1. Have no indentation (true lazy continuation)
4128                    // 2. Have sufficient indentation for the list type
4129                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4130                    let line_content = line_info.content(content).trim();
4131
4132                    // Check for table-like patterns
4133                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4134
4135                    // Check if blockquote level changed (not just if line starts with ">")
4136                    // Lines within the same blockquote level are NOT structural separators
4137                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4138                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4139                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4140
4141                    let is_structural_separator = line_info.heading.is_some()
4142                        || line_content.starts_with("```")
4143                        || line_content.starts_with("~~~")
4144                        || line_content.starts_with("---")
4145                        || line_content.starts_with("***")
4146                        || line_content.starts_with("___")
4147                        || blockquote_level_changed
4148                        || looks_like_table;
4149
4150                    // Allow lazy continuation if we're still within the same list block
4151                    // (not just immediately after a list item)
4152                    // Also treat code span continuations as valid continuations regardless of indent
4153                    let is_lazy_continuation = !is_structural_separator
4154                        && !line_info.is_blank
4155                        && (line_info.indent == 0
4156                            || line_info.indent >= min_required_indent
4157                            || line_info.in_code_span_continuation);
4158
4159                    if is_lazy_continuation {
4160                        // Per CommonMark, lazy continuation continues until a blank line
4161                        // or structural element, regardless of uppercase at line start
4162                        block.end_line = line_num;
4163                    } else {
4164                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4165                        list_blocks.push(block.clone());
4166                        current_block = None;
4167                    }
4168                }
4169            }
4170        }
4171
4172        // Don't forget the last block
4173        if let Some(block) = current_block {
4174            list_blocks.push(block);
4175        }
4176
4177        // Merge adjacent blocks that should be one
4178        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4179
4180        list_blocks
4181    }
4182
4183    /// Compute character frequency for fast content analysis
4184    fn compute_char_frequency(content: &str) -> CharFrequency {
4185        let mut frequency = CharFrequency::default();
4186
4187        for ch in content.chars() {
4188            match ch {
4189                '#' => frequency.hash_count += 1,
4190                '*' => frequency.asterisk_count += 1,
4191                '_' => frequency.underscore_count += 1,
4192                '-' => frequency.hyphen_count += 1,
4193                '+' => frequency.plus_count += 1,
4194                '>' => frequency.gt_count += 1,
4195                '|' => frequency.pipe_count += 1,
4196                '[' => frequency.bracket_count += 1,
4197                '`' => frequency.backtick_count += 1,
4198                '<' => frequency.lt_count += 1,
4199                '!' => frequency.exclamation_count += 1,
4200                '\n' => frequency.newline_count += 1,
4201                _ => {}
4202            }
4203        }
4204
4205        frequency
4206    }
4207
4208    /// Parse HTML tags in the content
4209    fn parse_html_tags(
4210        content: &str,
4211        lines: &[LineInfo],
4212        code_blocks: &[(usize, usize)],
4213        flavor: MarkdownFlavor,
4214    ) -> Vec<HtmlTag> {
4215        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4216            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4217
4218        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4219
4220        for cap in HTML_TAG_REGEX.captures_iter(content) {
4221            let full_match = cap.get(0).unwrap();
4222            let match_start = full_match.start();
4223            let match_end = full_match.end();
4224
4225            // Skip if in code block
4226            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4227                continue;
4228            }
4229
4230            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4231            let tag_name_original = cap.get(2).unwrap().as_str();
4232            let tag_name = tag_name_original.to_lowercase();
4233            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4234
4235            // Skip JSX components in MDX files (tags starting with uppercase letter)
4236            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4237            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4238                continue;
4239            }
4240
4241            // Find which line this tag is on
4242            let mut line_num = 1;
4243            let mut col_start = match_start;
4244            let mut col_end = match_end;
4245            for (idx, line_info) in lines.iter().enumerate() {
4246                if match_start >= line_info.byte_offset {
4247                    line_num = idx + 1;
4248                    col_start = match_start - line_info.byte_offset;
4249                    col_end = match_end - line_info.byte_offset;
4250                } else {
4251                    break;
4252                }
4253            }
4254
4255            html_tags.push(HtmlTag {
4256                line: line_num,
4257                start_col: col_start,
4258                end_col: col_end,
4259                byte_offset: match_start,
4260                byte_end: match_end,
4261                tag_name,
4262                is_closing,
4263                is_self_closing,
4264                raw_content: full_match.as_str().to_string(),
4265            });
4266        }
4267
4268        html_tags
4269    }
4270
4271    /// Parse table rows in the content
4272    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4273        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4274
4275        for (line_idx, line_info) in lines.iter().enumerate() {
4276            // Skip lines in code blocks or blank lines
4277            if line_info.in_code_block || line_info.is_blank {
4278                continue;
4279            }
4280
4281            let line = line_info.content(content);
4282            let line_num = line_idx + 1;
4283
4284            // Check if this line contains pipes (potential table row)
4285            if !line.contains('|') {
4286                continue;
4287            }
4288
4289            // Count columns by splitting on pipes
4290            let parts: Vec<&str> = line.split('|').collect();
4291            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4292
4293            // Check if this is a separator row
4294            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4295            let mut column_alignments = Vec::new();
4296
4297            if is_separator {
4298                for part in &parts[1..parts.len() - 1] {
4299                    // Skip first and last empty parts
4300                    let trimmed = part.trim();
4301                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4302                        "center".to_string()
4303                    } else if trimmed.ends_with(':') {
4304                        "right".to_string()
4305                    } else if trimmed.starts_with(':') {
4306                        "left".to_string()
4307                    } else {
4308                        "none".to_string()
4309                    };
4310                    column_alignments.push(alignment);
4311                }
4312            }
4313
4314            table_rows.push(TableRow {
4315                line: line_num,
4316                is_separator,
4317                column_count,
4318                column_alignments,
4319            });
4320        }
4321
4322        table_rows
4323    }
4324
4325    /// Parse bare URLs and emails in the content
4326    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4327        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4328
4329        // Check for bare URLs (not in angle brackets or markdown links)
4330        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4331            let full_match = cap.get(0).unwrap();
4332            let match_start = full_match.start();
4333            let match_end = full_match.end();
4334
4335            // Skip if in code block
4336            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4337                continue;
4338            }
4339
4340            // Skip if already in angle brackets or markdown links
4341            let preceding_char = if match_start > 0 {
4342                content.chars().nth(match_start - 1)
4343            } else {
4344                None
4345            };
4346            let following_char = content.chars().nth(match_end);
4347
4348            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4349                continue;
4350            }
4351            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4352                continue;
4353            }
4354
4355            let url = full_match.as_str();
4356            let url_type = if url.starts_with("https://") {
4357                "https"
4358            } else if url.starts_with("http://") {
4359                "http"
4360            } else if url.starts_with("ftp://") {
4361                "ftp"
4362            } else {
4363                "other"
4364            };
4365
4366            // Find which line this URL is on
4367            let mut line_num = 1;
4368            let mut col_start = match_start;
4369            let mut col_end = match_end;
4370            for (idx, line_info) in lines.iter().enumerate() {
4371                if match_start >= line_info.byte_offset {
4372                    line_num = idx + 1;
4373                    col_start = match_start - line_info.byte_offset;
4374                    col_end = match_end - line_info.byte_offset;
4375                } else {
4376                    break;
4377                }
4378            }
4379
4380            bare_urls.push(BareUrl {
4381                line: line_num,
4382                start_col: col_start,
4383                end_col: col_end,
4384                byte_offset: match_start,
4385                byte_end: match_end,
4386                url: url.to_string(),
4387                url_type: url_type.to_string(),
4388            });
4389        }
4390
4391        // Check for bare email addresses
4392        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4393            let full_match = cap.get(0).unwrap();
4394            let match_start = full_match.start();
4395            let match_end = full_match.end();
4396
4397            // Skip if in code block
4398            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4399                continue;
4400            }
4401
4402            // Skip if already in angle brackets or markdown links
4403            let preceding_char = if match_start > 0 {
4404                content.chars().nth(match_start - 1)
4405            } else {
4406                None
4407            };
4408            let following_char = content.chars().nth(match_end);
4409
4410            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4411                continue;
4412            }
4413            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4414                continue;
4415            }
4416
4417            let email = full_match.as_str();
4418
4419            // Find which line this email is on
4420            let mut line_num = 1;
4421            let mut col_start = match_start;
4422            let mut col_end = match_end;
4423            for (idx, line_info) in lines.iter().enumerate() {
4424                if match_start >= line_info.byte_offset {
4425                    line_num = idx + 1;
4426                    col_start = match_start - line_info.byte_offset;
4427                    col_end = match_end - line_info.byte_offset;
4428                } else {
4429                    break;
4430                }
4431            }
4432
4433            bare_urls.push(BareUrl {
4434                line: line_num,
4435                start_col: col_start,
4436                end_col: col_end,
4437                byte_offset: match_start,
4438                byte_end: match_end,
4439                url: email.to_string(),
4440                url_type: "email".to_string(),
4441            });
4442        }
4443
4444        bare_urls
4445    }
4446
4447    /// Get an iterator over valid CommonMark headings
4448    ///
4449    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4450    /// that should be flagged by MD018 but should not be processed by other heading rules.
4451    ///
4452    /// # Examples
4453    ///
4454    /// ```rust
4455    /// use rumdl_lib::lint_context::LintContext;
4456    /// use rumdl_lib::config::MarkdownFlavor;
4457    ///
4458    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4459    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4460    ///
4461    /// for heading in ctx.valid_headings() {
4462    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4463    /// }
4464    /// // Only prints valid headings, skips `#NoSpace`
4465    /// ```
4466    #[must_use]
4467    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4468        ValidHeadingsIter::new(&self.lines)
4469    }
4470
4471    /// Check if the document contains any valid CommonMark headings
4472    ///
4473    /// Returns `true` if there is at least one heading with proper space after `#`.
4474    #[must_use]
4475    pub fn has_valid_headings(&self) -> bool {
4476        self.lines
4477            .iter()
4478            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4479    }
4480}
4481
4482/// Merge adjacent list blocks that should be treated as one
4483fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4484    if list_blocks.len() < 2 {
4485        return;
4486    }
4487
4488    let mut merger = ListBlockMerger::new(content, lines);
4489    *list_blocks = merger.merge(list_blocks);
4490}
4491
4492/// Helper struct to manage the complex logic of merging list blocks
4493struct ListBlockMerger<'a> {
4494    content: &'a str,
4495    lines: &'a [LineInfo],
4496}
4497
4498impl<'a> ListBlockMerger<'a> {
4499    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4500        Self { content, lines }
4501    }
4502
4503    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4504        let mut merged = Vec::with_capacity(list_blocks.len());
4505        let mut current = list_blocks[0].clone();
4506
4507        for next in list_blocks.iter().skip(1) {
4508            if self.should_merge_blocks(&current, next) {
4509                current = self.merge_two_blocks(current, next);
4510            } else {
4511                merged.push(current);
4512                current = next.clone();
4513            }
4514        }
4515
4516        merged.push(current);
4517        merged
4518    }
4519
4520    /// Determine if two adjacent list blocks should be merged
4521    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4522        // Basic compatibility checks
4523        if !self.blocks_are_compatible(current, next) {
4524            return false;
4525        }
4526
4527        // Check spacing and content between blocks
4528        let spacing = self.analyze_spacing_between(current, next);
4529        match spacing {
4530            BlockSpacing::Consecutive => true,
4531            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4532            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4533                self.can_merge_with_content_between(current, next)
4534            }
4535        }
4536    }
4537
4538    /// Check if blocks have compatible structure for merging
4539    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4540        current.is_ordered == next.is_ordered
4541            && current.blockquote_prefix == next.blockquote_prefix
4542            && current.nesting_level == next.nesting_level
4543    }
4544
4545    /// Analyze the spacing between two list blocks
4546    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4547        let gap = next.start_line - current.end_line;
4548
4549        match gap {
4550            1 => BlockSpacing::Consecutive,
4551            2 => BlockSpacing::SingleBlank,
4552            _ if gap > 2 => {
4553                if self.has_only_blank_lines_between(current, next) {
4554                    BlockSpacing::MultipleBlanks
4555                } else {
4556                    BlockSpacing::ContentBetween
4557                }
4558            }
4559            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4560        }
4561    }
4562
4563    /// Check if unordered lists can be merged with a single blank line between
4564    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4565        // Check if there are structural separators between the blocks
4566        // If has_meaningful_content_between returns true, it means there are structural separators
4567        if has_meaningful_content_between(self.content, current, next, self.lines) {
4568            return false; // Structural separators prevent merging
4569        }
4570
4571        // Only merge unordered lists with same marker across single blank
4572        !current.is_ordered && current.marker == next.marker
4573    }
4574
4575    /// Check if ordered lists can be merged when there's content between them
4576    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4577        // Do not merge lists if there are structural separators between them
4578        if has_meaningful_content_between(self.content, current, next, self.lines) {
4579            return false; // Structural separators prevent merging
4580        }
4581
4582        // Only consider merging ordered lists if there's no structural content between
4583        current.is_ordered && next.is_ordered
4584    }
4585
4586    /// Check if there are only blank lines between blocks
4587    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4588        for line_num in (current.end_line + 1)..next.start_line {
4589            if let Some(line_info) = self.lines.get(line_num - 1)
4590                && !line_info.content(self.content).trim().is_empty()
4591            {
4592                return false;
4593            }
4594        }
4595        true
4596    }
4597
4598    /// Merge two compatible list blocks into one
4599    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4600        current.end_line = next.end_line;
4601        current.item_lines.extend_from_slice(&next.item_lines);
4602
4603        // Update max marker width
4604        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4605
4606        // Handle marker consistency for unordered lists
4607        if !current.is_ordered && self.markers_differ(&current, next) {
4608            current.marker = None; // Mixed markers
4609        }
4610
4611        current
4612    }
4613
4614    /// Check if two blocks have different markers
4615    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4616        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4617    }
4618}
4619
4620/// Types of spacing between list blocks
4621#[derive(Debug, PartialEq)]
4622enum BlockSpacing {
4623    Consecutive,    // No gap between blocks
4624    SingleBlank,    // One blank line between blocks
4625    MultipleBlanks, // Multiple blank lines but no content
4626    ContentBetween, // Content exists between blocks
4627}
4628
4629/// Check if there's meaningful content (not just blank lines) between two list blocks
4630fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4631    // Check lines between current.end_line and next.start_line
4632    for line_num in (current.end_line + 1)..next.start_line {
4633        if let Some(line_info) = lines.get(line_num - 1) {
4634            // Convert to 0-indexed
4635            let trimmed = line_info.content(content).trim();
4636
4637            // Skip empty lines
4638            if trimmed.is_empty() {
4639                continue;
4640            }
4641
4642            // Check for structural separators that should separate lists (CommonMark compliant)
4643
4644            // Headings separate lists
4645            if line_info.heading.is_some() {
4646                return true; // Has meaningful content - headings separate lists
4647            }
4648
4649            // Horizontal rules separate lists (---, ***, ___)
4650            if is_horizontal_rule(trimmed) {
4651                return true; // Has meaningful content - horizontal rules separate lists
4652            }
4653
4654            // Tables separate lists
4655            if crate::utils::skip_context::is_table_line(trimmed) {
4656                return true; // Has meaningful content - tables separate lists
4657            }
4658
4659            // Blockquotes separate lists
4660            if trimmed.starts_with('>') {
4661                return true; // Has meaningful content - blockquotes separate lists
4662            }
4663
4664            // Code block fences separate lists (unless properly indented as list content)
4665            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4666                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4667
4668                // Check if this code block is properly indented as list continuation
4669                let min_continuation_indent = if current.is_ordered {
4670                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4671                } else {
4672                    current.nesting_level + 2
4673                };
4674
4675                if line_indent < min_continuation_indent {
4676                    // This is a standalone code block that separates lists
4677                    return true; // Has meaningful content - standalone code blocks separate lists
4678                }
4679            }
4680
4681            // Check if this line has proper indentation for list continuation
4682            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4683
4684            // Calculate minimum indentation needed to be list continuation
4685            let min_indent = if current.is_ordered {
4686                current.nesting_level + current.max_marker_width
4687            } else {
4688                current.nesting_level + 2
4689            };
4690
4691            // If the line is not indented enough to be list continuation, it's meaningful content
4692            if line_indent < min_indent {
4693                return true; // Has meaningful content - content not indented as list continuation
4694            }
4695
4696            // If we reach here, the line is properly indented as list continuation
4697            // Continue checking other lines
4698        }
4699    }
4700
4701    // Only blank lines or properly indented list continuation content between blocks
4702    false
4703}
4704
4705/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4706/// CommonMark rules for thematic breaks (horizontal rules):
4707/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4708/// - Must have 3+ of the same character (-, *, or _)
4709/// - May have spaces between characters
4710/// - No other characters allowed
4711pub fn is_horizontal_rule_line(line: &str) -> bool {
4712    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4713    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4714    if leading_spaces > 3 || line.starts_with('\t') {
4715        return false;
4716    }
4717
4718    is_horizontal_rule_content(line.trim())
4719}
4720
4721/// Check if trimmed content matches horizontal rule pattern.
4722/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4723pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4724    if trimmed.len() < 3 {
4725        return false;
4726    }
4727
4728    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4729    let chars: Vec<char> = trimmed.chars().collect();
4730    if let Some(&first_char) = chars.first()
4731        && (first_char == '-' || first_char == '*' || first_char == '_')
4732    {
4733        let mut count = 0;
4734        for &ch in &chars {
4735            if ch == first_char {
4736                count += 1;
4737            } else if ch != ' ' && ch != '\t' {
4738                return false; // Non-matching, non-whitespace character
4739            }
4740        }
4741        return count >= 3;
4742    }
4743    false
4744}
4745
4746/// Backwards-compatible alias for `is_horizontal_rule_content`
4747pub fn is_horizontal_rule(trimmed: &str) -> bool {
4748    is_horizontal_rule_content(trimmed)
4749}
4750
4751/// Check if content contains patterns that cause the markdown crate to panic
4752#[cfg(test)]
4753mod tests {
4754    use super::*;
4755
4756    #[test]
4757    fn test_empty_content() {
4758        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4759        assert_eq!(ctx.content, "");
4760        assert_eq!(ctx.line_offsets, vec![0]);
4761        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4762        assert_eq!(ctx.lines.len(), 0);
4763    }
4764
4765    #[test]
4766    fn test_single_line() {
4767        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4768        assert_eq!(ctx.content, "# Hello");
4769        assert_eq!(ctx.line_offsets, vec![0]);
4770        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4771        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4772    }
4773
4774    #[test]
4775    fn test_multi_line() {
4776        let content = "# Title\n\nSecond line\nThird line";
4777        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4778        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4779        // Test offset to line/col
4780        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4781        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4782        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4783        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4784        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4785    }
4786
4787    #[test]
4788    fn test_line_info() {
4789        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4790        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4791
4792        // Test line info
4793        assert_eq!(ctx.lines.len(), 7);
4794
4795        // Line 1: "# Title"
4796        let line1 = &ctx.lines[0];
4797        assert_eq!(line1.content(ctx.content), "# Title");
4798        assert_eq!(line1.byte_offset, 0);
4799        assert_eq!(line1.indent, 0);
4800        assert!(!line1.is_blank);
4801        assert!(!line1.in_code_block);
4802        assert!(line1.list_item.is_none());
4803
4804        // Line 2: "    indented"
4805        let line2 = &ctx.lines[1];
4806        assert_eq!(line2.content(ctx.content), "    indented");
4807        assert_eq!(line2.byte_offset, 8);
4808        assert_eq!(line2.indent, 4);
4809        assert!(!line2.is_blank);
4810
4811        // Line 3: "" (blank)
4812        let line3 = &ctx.lines[2];
4813        assert_eq!(line3.content(ctx.content), "");
4814        assert!(line3.is_blank);
4815
4816        // Test helper methods
4817        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4818        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4819        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4820        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4821    }
4822
4823    #[test]
4824    fn test_list_item_detection() {
4825        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4826        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4827
4828        // Line 1: "- Unordered item"
4829        let line1 = &ctx.lines[0];
4830        assert!(line1.list_item.is_some());
4831        let list1 = line1.list_item.as_ref().unwrap();
4832        assert_eq!(list1.marker, "-");
4833        assert!(!list1.is_ordered);
4834        assert_eq!(list1.marker_column, 0);
4835        assert_eq!(list1.content_column, 2);
4836
4837        // Line 2: "  * Nested item"
4838        let line2 = &ctx.lines[1];
4839        assert!(line2.list_item.is_some());
4840        let list2 = line2.list_item.as_ref().unwrap();
4841        assert_eq!(list2.marker, "*");
4842        assert_eq!(list2.marker_column, 2);
4843
4844        // Line 3: "1. Ordered item"
4845        let line3 = &ctx.lines[2];
4846        assert!(line3.list_item.is_some());
4847        let list3 = line3.list_item.as_ref().unwrap();
4848        assert_eq!(list3.marker, "1.");
4849        assert!(list3.is_ordered);
4850        assert_eq!(list3.number, Some(1));
4851
4852        // Line 6: "Not a list"
4853        let line6 = &ctx.lines[5];
4854        assert!(line6.list_item.is_none());
4855    }
4856
4857    #[test]
4858    fn test_offset_to_line_col_edge_cases() {
4859        let content = "a\nb\nc";
4860        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4861        // line_offsets: [0, 2, 4]
4862        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4863        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4864        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4865        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4866        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4867        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4868    }
4869
4870    #[test]
4871    fn test_mdx_esm_blocks() {
4872        let content = r##"import {Chart} from './snowfall.js'
4873export const year = 2023
4874
4875# Last year's snowfall
4876
4877In {year}, the snowfall was above average.
4878It was followed by a warm spring which caused
4879flood conditions in many of the nearby rivers.
4880
4881<Chart color="#fcb32c" year={year} />
4882"##;
4883
4884        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4885
4886        // Check that lines 1 and 2 are marked as ESM blocks
4887        assert_eq!(ctx.lines.len(), 10);
4888        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4889        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4890        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4891        assert!(
4892            !ctx.lines[3].in_esm_block,
4893            "Line 4 (heading) should NOT be in_esm_block"
4894        );
4895        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4896        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4897    }
4898
4899    #[test]
4900    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4901        let content = r#"import {Chart} from './snowfall.js'
4902export const year = 2023
4903
4904# Last year's snowfall
4905"#;
4906
4907        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4908
4909        // ESM blocks should NOT be detected in Standard flavor
4910        assert!(
4911            !ctx.lines[0].in_esm_block,
4912            "Line 1 should NOT be in_esm_block in Standard flavor"
4913        );
4914        assert!(
4915            !ctx.lines[1].in_esm_block,
4916            "Line 2 should NOT be in_esm_block in Standard flavor"
4917        );
4918    }
4919
4920    #[test]
4921    fn test_blockquote_with_indented_content() {
4922        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4923        // The content inside the blockquote may also be detected as a code block (which is correct),
4924        // but for MD046 purposes, we need to know the line is inside a blockquote.
4925        let content = r#"# Heading
4926
4927>      -S socket-path
4928>                    More text
4929"#;
4930        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4931
4932        // Line 3 (index 2) should be detected as blockquote
4933        assert!(
4934            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4935            "Line 3 should be a blockquote"
4936        );
4937        // Line 4 (index 3) should also be blockquote
4938        assert!(
4939            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4940            "Line 4 should be a blockquote"
4941        );
4942
4943        // Verify blockquote content is correctly parsed
4944        // Note: spaces_after includes the spaces between `>` and content
4945        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4946        assert_eq!(bq3.content, "-S socket-path");
4947        assert_eq!(bq3.nesting_level, 1);
4948        // 6 spaces after the `>` marker
4949        assert!(bq3.has_multiple_spaces_after_marker);
4950
4951        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4952        assert_eq!(bq4.content, "More text");
4953        assert_eq!(bq4.nesting_level, 1);
4954    }
4955
4956    #[test]
4957    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4958        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4959        let content = r#"# Title
4960
4961A footnote[^1].
4962
4963[^1]: This is the footnote content.
4964
4965[^note]: Another footnote with [link](https://example.com).
4966
4967[regular]: ./path.md "A real reference definition"
4968"#;
4969        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4970
4971        // Should only have one reference definition (the regular one)
4972        assert_eq!(
4973            ctx.reference_defs.len(),
4974            1,
4975            "Footnotes should not be parsed as reference definitions"
4976        );
4977
4978        // The only reference def should be the regular one
4979        assert_eq!(ctx.reference_defs[0].id, "regular");
4980        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4981        assert_eq!(
4982            ctx.reference_defs[0].title,
4983            Some("A real reference definition".to_string())
4984        );
4985    }
4986
4987    #[test]
4988    fn test_footnote_with_inline_link_not_misidentified() {
4989        // Regression test for issue #286: footnote containing an inline link
4990        // was incorrectly parsed as a reference definition with URL "[link](url)"
4991        let content = r#"# Title
4992
4993A footnote[^1].
4994
4995[^1]: [link](https://www.google.com).
4996"#;
4997        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4998
4999        // Should have no reference definitions
5000        assert!(
5001            ctx.reference_defs.is_empty(),
5002            "Footnote with inline link should not create a reference definition"
5003        );
5004    }
5005
5006    #[test]
5007    fn test_various_footnote_formats_excluded() {
5008        // Test various footnote ID formats are all excluded
5009        let content = r#"[^1]: Numeric footnote
5010[^note]: Named footnote
5011[^a]: Single char footnote
5012[^long-footnote-name]: Long named footnote
5013[^123abc]: Mixed alphanumeric
5014
5015[ref1]: ./file1.md
5016[ref2]: ./file2.md
5017"#;
5018        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5019
5020        // Should only have the two regular reference definitions
5021        assert_eq!(
5022            ctx.reference_defs.len(),
5023            2,
5024            "Only regular reference definitions should be parsed"
5025        );
5026
5027        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5028        assert!(ids.contains(&"ref1"));
5029        assert!(ids.contains(&"ref2"));
5030        assert!(!ids.iter().any(|id| id.starts_with('^')));
5031    }
5032
5033    // =========================================================================
5034    // Tests for has_char and char_count methods
5035    // =========================================================================
5036
5037    #[test]
5038    fn test_has_char_tracked_characters() {
5039        // Test all 12 tracked characters
5040        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5041        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5042
5043        // All tracked characters should be detected
5044        assert!(ctx.has_char('#'), "Should detect hash");
5045        assert!(ctx.has_char('*'), "Should detect asterisk");
5046        assert!(ctx.has_char('_'), "Should detect underscore");
5047        assert!(ctx.has_char('-'), "Should detect hyphen");
5048        assert!(ctx.has_char('+'), "Should detect plus");
5049        assert!(ctx.has_char('>'), "Should detect gt");
5050        assert!(ctx.has_char('|'), "Should detect pipe");
5051        assert!(ctx.has_char('['), "Should detect bracket");
5052        assert!(ctx.has_char('`'), "Should detect backtick");
5053        assert!(ctx.has_char('<'), "Should detect lt");
5054        assert!(ctx.has_char('!'), "Should detect exclamation");
5055        assert!(ctx.has_char('\n'), "Should detect newline");
5056    }
5057
5058    #[test]
5059    fn test_has_char_absent_characters() {
5060        let content = "Simple text without special chars";
5061        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5062
5063        // None of the tracked characters should be present
5064        assert!(!ctx.has_char('#'), "Should not detect hash");
5065        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5066        assert!(!ctx.has_char('_'), "Should not detect underscore");
5067        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5068        assert!(!ctx.has_char('+'), "Should not detect plus");
5069        assert!(!ctx.has_char('>'), "Should not detect gt");
5070        assert!(!ctx.has_char('|'), "Should not detect pipe");
5071        assert!(!ctx.has_char('['), "Should not detect bracket");
5072        assert!(!ctx.has_char('`'), "Should not detect backtick");
5073        assert!(!ctx.has_char('<'), "Should not detect lt");
5074        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5075        // Note: single line content has no newlines
5076        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5077    }
5078
5079    #[test]
5080    fn test_has_char_fallback_for_untracked() {
5081        let content = "Text with @mention and $dollar and %percent";
5082        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5083
5084        // Untracked characters should fall back to content.contains()
5085        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5086        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5087        assert!(ctx.has_char('%'), "Should detect % via fallback");
5088        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5089    }
5090
5091    #[test]
5092    fn test_char_count_tracked_characters() {
5093        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5094        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5095
5096        // Count each tracked character
5097        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5098        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5099        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5100        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5101        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5102        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5103        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5104        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5105        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5106        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5107        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5108        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5109    }
5110
5111    #[test]
5112    fn test_char_count_zero_for_absent() {
5113        let content = "Plain text";
5114        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5115
5116        assert_eq!(ctx.char_count('#'), 0);
5117        assert_eq!(ctx.char_count('*'), 0);
5118        assert_eq!(ctx.char_count('_'), 0);
5119        assert_eq!(ctx.char_count('\n'), 0);
5120    }
5121
5122    #[test]
5123    fn test_char_count_fallback_for_untracked() {
5124        let content = "@@@ $$ %%%";
5125        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5126
5127        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5128        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5129        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5130        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5131    }
5132
5133    #[test]
5134    fn test_char_count_empty_content() {
5135        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5136
5137        assert_eq!(ctx.char_count('#'), 0);
5138        assert_eq!(ctx.char_count('*'), 0);
5139        assert_eq!(ctx.char_count('@'), 0);
5140        assert!(!ctx.has_char('#'));
5141        assert!(!ctx.has_char('@'));
5142    }
5143
5144    // =========================================================================
5145    // Tests for is_in_html_tag method
5146    // =========================================================================
5147
5148    #[test]
5149    fn test_is_in_html_tag_simple() {
5150        let content = "<div>content</div>";
5151        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5152
5153        // Inside opening tag
5154        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5155        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5156        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5157
5158        // Outside tag (in content)
5159        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5160        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5161
5162        // Inside closing tag
5163        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5164        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5165    }
5166
5167    #[test]
5168    fn test_is_in_html_tag_self_closing() {
5169        let content = "Text <br/> more text";
5170        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5171
5172        // Before tag
5173        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5174        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5175
5176        // Inside self-closing tag
5177        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5178        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5179        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5180
5181        // After tag
5182        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5183    }
5184
5185    #[test]
5186    fn test_is_in_html_tag_with_attributes() {
5187        let content = r#"<a href="url" class="link">text</a>"#;
5188        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5189
5190        // All positions inside opening tag with attributes
5191        assert!(ctx.is_in_html_tag(0), "Start of tag");
5192        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5193        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5194        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5195
5196        // Content between tags
5197        assert!(!ctx.is_in_html_tag(27), "Start of content");
5198        assert!(!ctx.is_in_html_tag(30), "End of content");
5199
5200        // Closing tag
5201        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5202    }
5203
5204    #[test]
5205    fn test_is_in_html_tag_multiline() {
5206        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5207        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5208
5209        // Opening tag spans multiple lines
5210        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5211        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5212        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5213
5214        // After closing > of opening tag
5215        let closing_bracket_pos = content.find(">\n").unwrap();
5216        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5217    }
5218
5219    #[test]
5220    fn test_is_in_html_tag_no_tags() {
5221        let content = "Plain text without any HTML";
5222        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5223
5224        // No position should be in an HTML tag
5225        for i in 0..content.len() {
5226            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5227        }
5228    }
5229
5230    // =========================================================================
5231    // Tests for is_in_jinja_range method
5232    // =========================================================================
5233
5234    #[test]
5235    fn test_is_in_jinja_range_expression() {
5236        let content = "Hello {{ name }}!";
5237        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5238
5239        // Before Jinja
5240        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5241        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5242
5243        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5244        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5245        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5246        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5247        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5248        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5249
5250        // After Jinja
5251        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5252    }
5253
5254    #[test]
5255    fn test_is_in_jinja_range_statement() {
5256        let content = "{% if condition %}content{% endif %}";
5257        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5258
5259        // Inside opening statement
5260        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5261        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5262        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5263
5264        // Content between
5265        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5266
5267        // Inside closing statement
5268        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5269        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5270    }
5271
5272    #[test]
5273    fn test_is_in_jinja_range_multiple() {
5274        let content = "{{ a }} and {{ b }}";
5275        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5276
5277        // First Jinja expression
5278        assert!(ctx.is_in_jinja_range(0));
5279        assert!(ctx.is_in_jinja_range(3));
5280        assert!(ctx.is_in_jinja_range(6));
5281
5282        // Between expressions
5283        assert!(!ctx.is_in_jinja_range(8));
5284        assert!(!ctx.is_in_jinja_range(11));
5285
5286        // Second Jinja expression
5287        assert!(ctx.is_in_jinja_range(12));
5288        assert!(ctx.is_in_jinja_range(15));
5289        assert!(ctx.is_in_jinja_range(18));
5290    }
5291
5292    #[test]
5293    fn test_is_in_jinja_range_no_jinja() {
5294        let content = "Plain text with single braces but not Jinja";
5295        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5296
5297        // No position should be in Jinja
5298        for i in 0..content.len() {
5299            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5300        }
5301    }
5302
5303    // =========================================================================
5304    // Tests for is_in_link_title method
5305    // =========================================================================
5306
5307    #[test]
5308    fn test_is_in_link_title_with_title() {
5309        let content = r#"[ref]: https://example.com "Title text"
5310
5311Some content."#;
5312        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5313
5314        // Verify we have a reference def with title
5315        assert_eq!(ctx.reference_defs.len(), 1);
5316        let def = &ctx.reference_defs[0];
5317        assert!(def.title_byte_start.is_some());
5318        assert!(def.title_byte_end.is_some());
5319
5320        let title_start = def.title_byte_start.unwrap();
5321        let title_end = def.title_byte_end.unwrap();
5322
5323        // Before title (in URL)
5324        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5325
5326        // Inside title
5327        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5328        assert!(
5329            ctx.is_in_link_title(title_start + 5),
5330            "Middle of title should be in title"
5331        );
5332        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5333
5334        // After title
5335        assert!(
5336            !ctx.is_in_link_title(title_end),
5337            "After title end should not be in title"
5338        );
5339    }
5340
5341    #[test]
5342    fn test_is_in_link_title_without_title() {
5343        let content = "[ref]: https://example.com\n\nSome content.";
5344        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5345
5346        // Reference def without title
5347        assert_eq!(ctx.reference_defs.len(), 1);
5348        let def = &ctx.reference_defs[0];
5349        assert!(def.title_byte_start.is_none());
5350        assert!(def.title_byte_end.is_none());
5351
5352        // No position should be in a title
5353        for i in 0..content.len() {
5354            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5355        }
5356    }
5357
5358    #[test]
5359    fn test_is_in_link_title_multiple_refs() {
5360        let content = r#"[ref1]: /url1 "Title One"
5361[ref2]: /url2
5362[ref3]: /url3 "Title Three"
5363"#;
5364        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5365
5366        // Should have 3 reference defs
5367        assert_eq!(ctx.reference_defs.len(), 3);
5368
5369        // ref1 has title
5370        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5371        assert!(ref1.title_byte_start.is_some());
5372
5373        // ref2 has no title
5374        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5375        assert!(ref2.title_byte_start.is_none());
5376
5377        // ref3 has title
5378        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5379        assert!(ref3.title_byte_start.is_some());
5380
5381        // Check positions in ref1's title
5382        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5383            assert!(ctx.is_in_link_title(start + 1));
5384            assert!(!ctx.is_in_link_title(end + 5));
5385        }
5386
5387        // Check positions in ref3's title
5388        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5389            assert!(ctx.is_in_link_title(start + 1));
5390        }
5391    }
5392
5393    #[test]
5394    fn test_is_in_link_title_single_quotes() {
5395        let content = "[ref]: /url 'Single quoted title'\n";
5396        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5397
5398        assert_eq!(ctx.reference_defs.len(), 1);
5399        let def = &ctx.reference_defs[0];
5400
5401        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5402            assert!(ctx.is_in_link_title(start));
5403            assert!(ctx.is_in_link_title(start + 5));
5404            assert!(!ctx.is_in_link_title(end));
5405        }
5406    }
5407
5408    #[test]
5409    fn test_is_in_link_title_parentheses() {
5410        // Note: The reference def parser may not support parenthesized titles
5411        // This test verifies the is_in_link_title method works when titles exist
5412        let content = "[ref]: /url (Parenthesized title)\n";
5413        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5414
5415        // Parser behavior: may or may not parse parenthesized titles
5416        // We test that is_in_link_title correctly reflects whatever was parsed
5417        if ctx.reference_defs.is_empty() {
5418            // Parser didn't recognize this as a reference def
5419            for i in 0..content.len() {
5420                assert!(!ctx.is_in_link_title(i));
5421            }
5422        } else {
5423            let def = &ctx.reference_defs[0];
5424            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5425                assert!(ctx.is_in_link_title(start));
5426                assert!(ctx.is_in_link_title(start + 5));
5427                assert!(!ctx.is_in_link_title(end));
5428            } else {
5429                // Title wasn't parsed, so no position should be in title
5430                for i in 0..content.len() {
5431                    assert!(!ctx.is_in_link_title(i));
5432                }
5433            }
5434        }
5435    }
5436
5437    #[test]
5438    fn test_is_in_link_title_no_refs() {
5439        let content = "Just plain text without any reference definitions.";
5440        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5441
5442        assert!(ctx.reference_defs.is_empty());
5443
5444        for i in 0..content.len() {
5445            assert!(!ctx.is_in_link_title(i));
5446        }
5447    }
5448
5449    // =========================================================================
5450    // Math span tests (Issue #289)
5451    // =========================================================================
5452
5453    #[test]
5454    fn test_math_spans_inline() {
5455        let content = "Text with inline math $[f](x)$ in it.";
5456        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5457
5458        let math_spans = ctx.math_spans();
5459        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5460
5461        let span = &math_spans[0];
5462        assert!(!span.is_display, "Should be inline math, not display");
5463        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5464    }
5465
5466    #[test]
5467    fn test_math_spans_display_single_line() {
5468        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5469        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5470
5471        let math_spans = ctx.math_spans();
5472        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5473
5474        let span = &math_spans[0];
5475        assert!(span.is_display, "Should be display math");
5476        assert!(
5477            span.content.contains("[x](\\zeta)"),
5478            "Content should contain the link-like pattern"
5479        );
5480    }
5481
5482    #[test]
5483    fn test_math_spans_display_multiline() {
5484        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5485        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5486
5487        let math_spans = ctx.math_spans();
5488        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5489
5490        let span = &math_spans[0];
5491        assert!(span.is_display, "Should be display math");
5492    }
5493
5494    #[test]
5495    fn test_is_in_math_span() {
5496        let content = "Text $[f](x)$ more text";
5497        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5498
5499        // Position inside the math span
5500        let math_start = content.find('$').unwrap();
5501        let math_end = content.rfind('$').unwrap() + 1;
5502
5503        assert!(
5504            ctx.is_in_math_span(math_start + 1),
5505            "Position inside math span should return true"
5506        );
5507        assert!(
5508            ctx.is_in_math_span(math_start + 3),
5509            "Position inside math span should return true"
5510        );
5511
5512        // Position outside the math span
5513        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5514        assert!(
5515            !ctx.is_in_math_span(math_end + 1),
5516            "Position after math span should return false"
5517        );
5518    }
5519
5520    #[test]
5521    fn test_math_spans_mixed_with_code() {
5522        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5523        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5524
5525        let math_spans = ctx.math_spans();
5526        let code_spans = ctx.code_spans();
5527
5528        assert_eq!(math_spans.len(), 1, "Should have one math span");
5529        assert_eq!(code_spans.len(), 1, "Should have one code span");
5530
5531        // Verify math span content
5532        assert_eq!(math_spans[0].content, "[f](x)");
5533        // Verify code span content
5534        assert_eq!(code_spans[0].content, "[g](y)");
5535    }
5536
5537    #[test]
5538    fn test_math_spans_no_math() {
5539        let content = "Regular text without any math at all.";
5540        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5541
5542        let math_spans = ctx.math_spans();
5543        assert!(math_spans.is_empty(), "Should have no math spans");
5544    }
5545
5546    #[test]
5547    fn test_math_spans_multiple() {
5548        let content = "First $a$ and second $b$ and display $$c$$";
5549        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5550
5551        let math_spans = ctx.math_spans();
5552        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5553
5554        // Two inline, one display
5555        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5556        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5557
5558        assert_eq!(inline_count, 2, "Should have two inline math spans");
5559        assert_eq!(display_count, 1, "Should have one display math span");
5560    }
5561
5562    #[test]
5563    fn test_is_in_math_span_boundary_positions() {
5564        // Test exact boundary positions: $[f](x)$
5565        // Byte positions:                0123456789
5566        let content = "$[f](x)$";
5567        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5568
5569        let math_spans = ctx.math_spans();
5570        assert_eq!(math_spans.len(), 1, "Should have one math span");
5571
5572        let span = &math_spans[0];
5573
5574        // Position at opening $ should be in span (byte 0)
5575        assert!(
5576            ctx.is_in_math_span(span.byte_offset),
5577            "Start position should be in span"
5578        );
5579
5580        // Position just inside should be in span
5581        assert!(
5582            ctx.is_in_math_span(span.byte_offset + 1),
5583            "Position after start should be in span"
5584        );
5585
5586        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5587        assert!(
5588            ctx.is_in_math_span(span.byte_end - 1),
5589            "Position at end-1 should be in span"
5590        );
5591
5592        // Position at byte_end should NOT be in span (exclusive end)
5593        assert!(
5594            !ctx.is_in_math_span(span.byte_end),
5595            "Position at byte_end should NOT be in span (exclusive)"
5596        );
5597    }
5598
5599    #[test]
5600    fn test_math_spans_at_document_start() {
5601        let content = "$x$ text";
5602        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5603
5604        let math_spans = ctx.math_spans();
5605        assert_eq!(math_spans.len(), 1);
5606        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5607    }
5608
5609    #[test]
5610    fn test_math_spans_at_document_end() {
5611        let content = "text $x$";
5612        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5613
5614        let math_spans = ctx.math_spans();
5615        assert_eq!(math_spans.len(), 1);
5616        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5617    }
5618
5619    #[test]
5620    fn test_math_spans_consecutive() {
5621        let content = "$a$$b$";
5622        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5623
5624        let math_spans = ctx.math_spans();
5625        // pulldown-cmark should parse these as separate spans
5626        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5627
5628        // All positions should be in some math span
5629        for i in 0..content.len() {
5630            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5631        }
5632    }
5633
5634    #[test]
5635    fn test_math_spans_currency_not_math() {
5636        // Unbalanced $ should not create math spans
5637        let content = "Price is $100";
5638        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5639
5640        let math_spans = ctx.math_spans();
5641        // pulldown-cmark requires balanced delimiters for math
5642        // $100 alone is not math
5643        assert!(
5644            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5645            "Unbalanced $ should not create math span containing 100"
5646        );
5647    }
5648
5649    // =========================================================================
5650    // Tests for O(1) reference definition lookups via HashMap
5651    // =========================================================================
5652
5653    #[test]
5654    fn test_reference_lookup_o1_basic() {
5655        let content = r#"[ref1]: /url1
5656[REF2]: /url2 "Title"
5657[Ref3]: /url3
5658
5659Use [link][ref1] and [link][REF2]."#;
5660        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5661
5662        // Verify we have 3 reference defs
5663        assert_eq!(ctx.reference_defs.len(), 3);
5664
5665        // Test get_reference_url with various cases
5666        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5667        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5668        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5669        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5670        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5671        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5672        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5673    }
5674
5675    #[test]
5676    fn test_reference_lookup_o1_get_reference_def() {
5677        let content = r#"[myref]: https://example.com "My Title"
5678"#;
5679        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5680
5681        // Test get_reference_def
5682        let def = ctx.get_reference_def("myref").expect("Should find myref");
5683        assert_eq!(def.url, "https://example.com");
5684        assert_eq!(def.title.as_deref(), Some("My Title"));
5685
5686        // Case insensitive
5687        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5688        assert_eq!(def2.url, "https://example.com");
5689
5690        // Non-existent
5691        assert!(ctx.get_reference_def("nonexistent").is_none());
5692    }
5693
5694    #[test]
5695    fn test_reference_lookup_o1_has_reference_def() {
5696        let content = r#"[foo]: /foo
5697[BAR]: /bar
5698"#;
5699        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5700
5701        // Test has_reference_def
5702        assert!(ctx.has_reference_def("foo"));
5703        assert!(ctx.has_reference_def("FOO")); // case insensitive
5704        assert!(ctx.has_reference_def("bar"));
5705        assert!(ctx.has_reference_def("Bar")); // case insensitive
5706        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5707    }
5708
5709    #[test]
5710    fn test_reference_lookup_o1_empty_content() {
5711        let content = "No references here.";
5712        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5713
5714        assert!(ctx.reference_defs.is_empty());
5715        assert_eq!(ctx.get_reference_url("anything"), None);
5716        assert!(ctx.get_reference_def("anything").is_none());
5717        assert!(!ctx.has_reference_def("anything"));
5718    }
5719
5720    #[test]
5721    fn test_reference_lookup_o1_special_characters_in_id() {
5722        let content = r#"[ref-with-dash]: /url1
5723[ref_with_underscore]: /url2
5724[ref.with.dots]: /url3
5725"#;
5726        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5727
5728        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5729        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5730        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5731    }
5732
5733    #[test]
5734    fn test_reference_lookup_o1_unicode_id() {
5735        let content = r#"[日本語]: /japanese
5736[émoji]: /emoji
5737"#;
5738        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5739
5740        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5741        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5742        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5743    }
5744}