rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14/// Macro for profiling sections - only active in non-WASM builds
15#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17    ($name:expr, $profile:expr, $code:expr) => {{
18        let start = std::time::Instant::now();
19        let result = $code;
20        if $profile {
21            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22        }
23        result
24    }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32// Comprehensive link pattern that captures both inline and reference links
33// Use (?s) flag to make . match newlines
34static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(
36        r#"(?sx)
37        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38        (?:
39            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
40            |
41            \[([^\]]*)\]      # Reference ID in group 6
42        )"#
43    ).unwrap()
44});
45
46// Image pattern (similar to links but with ! prefix)
47// Use (?s) flag to make . match newlines
48static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r#"(?sx)
51        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52        (?:
53            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
54            |
55            \[([^\]]*)\]      # Reference ID in group 6
56        )"#
57    ).unwrap()
58});
59
60// Reference definition pattern
61static REF_DEF_PATTERN: LazyLock<Regex> =
62    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64// Pattern for bare URLs - uses centralized URL pattern from regex_cache
65
66// Pattern for email addresses
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70// Pattern for blockquote prefix in parse_list_blocks
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73/// Pre-computed information about a line
74#[derive(Debug, Clone)]
75pub struct LineInfo {
76    /// Byte offset where this line starts in the document
77    pub byte_offset: usize,
78    /// Length of the line in bytes (without newline)
79    pub byte_len: usize,
80    /// Number of bytes of leading whitespace (for substring extraction)
81    pub indent: usize,
82    /// Visual column width of leading whitespace (with proper tab expansion)
83    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
84    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
85    pub visual_indent: usize,
86    /// Whether the line is blank (empty or only whitespace)
87    pub is_blank: bool,
88    /// Whether this line is inside a code block
89    pub in_code_block: bool,
90    /// Whether this line is inside front matter
91    pub in_front_matter: bool,
92    /// Whether this line is inside an HTML block
93    pub in_html_block: bool,
94    /// Whether this line is inside an HTML comment
95    pub in_html_comment: bool,
96    /// List item information if this line starts a list item
97    pub list_item: Option<ListItemInfo>,
98    /// Heading information if this line is a heading
99    pub heading: Option<HeadingInfo>,
100    /// Blockquote information if this line is a blockquote
101    pub blockquote: Option<BlockquoteInfo>,
102    /// Whether this line is inside a mkdocstrings autodoc block
103    pub in_mkdocstrings: bool,
104    /// Whether this line is part of an ESM import/export block (MDX only)
105    pub in_esm_block: bool,
106    /// Whether this line is a continuation of a multi-line code span from a previous line
107    pub in_code_span_continuation: bool,
108    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
109    /// Pre-computed for consistent detection across all rules
110    pub is_horizontal_rule: bool,
111    /// Whether this line is inside a math block ($$ ... $$)
112    pub in_math_block: bool,
113    /// Whether this line is inside a Quarto div block (::: ... :::)
114    pub in_quarto_div: bool,
115    /// Whether this line contains or is inside a JSX expression (MDX only)
116    pub in_jsx_expression: bool,
117    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
118    pub in_mdx_comment: bool,
119    /// Whether this line is inside a JSX component (MDX only)
120    pub in_jsx_component: bool,
121    /// Whether this line is inside a JSX fragment (MDX only)
122    pub in_jsx_fragment: bool,
123    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
124    pub in_admonition: bool,
125    /// Whether this line is inside an MkDocs content tab block (===)
126    pub in_content_tab: bool,
127    /// Whether this line is a definition list item (: definition)
128    pub in_definition_list: bool,
129}
130
131impl LineInfo {
132    /// Get the line content as a string slice from the source document
133    pub fn content<'a>(&self, source: &'a str) -> &'a str {
134        &source[self.byte_offset..self.byte_offset + self.byte_len]
135    }
136}
137
138/// Information about a list item
139#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141    /// The marker used (*, -, +, or number with . or ))
142    pub marker: String,
143    /// Whether it's ordered (true) or unordered (false)
144    pub is_ordered: bool,
145    /// The number for ordered lists
146    pub number: Option<usize>,
147    /// Column where the marker starts (0-based)
148    pub marker_column: usize,
149    /// Column where content after marker starts
150    pub content_column: usize,
151}
152
153/// Heading style type
154#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156    /// ATX style heading (# Heading)
157    ATX,
158    /// Setext style heading with = underline
159    Setext1,
160    /// Setext style heading with - underline
161    Setext2,
162}
163
164/// Parsed link information
165#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167    /// Line number (1-indexed)
168    pub line: usize,
169    /// Start column (0-indexed) in the line
170    pub start_col: usize,
171    /// End column (0-indexed) in the line
172    pub end_col: usize,
173    /// Byte offset in document
174    pub byte_offset: usize,
175    /// End byte offset in document
176    pub byte_end: usize,
177    /// Link text
178    pub text: Cow<'a, str>,
179    /// Link URL or reference
180    pub url: Cow<'a, str>,
181    /// Whether this is a reference link [text][ref] vs inline [text](url)
182    pub is_reference: bool,
183    /// Reference ID for reference links
184    pub reference_id: Option<Cow<'a, str>>,
185    /// Link type from pulldown-cmark
186    pub link_type: LinkType,
187}
188
189/// Information about a broken link reported by pulldown-cmark
190#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192    /// The reference text that couldn't be resolved
193    pub reference: String,
194    /// Byte span in the source document
195    pub span: std::ops::Range<usize>,
196}
197
198/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
199#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201    /// The footnote ID (without the ^ prefix)
202    pub id: String,
203    /// Line number (1-indexed)
204    pub line: usize,
205    /// Start byte offset in document
206    pub byte_offset: usize,
207    /// End byte offset in document
208    pub byte_end: usize,
209}
210
211/// Parsed image information
212#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214    /// Line number (1-indexed)
215    pub line: usize,
216    /// Start column (0-indexed) in the line
217    pub start_col: usize,
218    /// End column (0-indexed) in the line
219    pub end_col: usize,
220    /// Byte offset in document
221    pub byte_offset: usize,
222    /// End byte offset in document
223    pub byte_end: usize,
224    /// Alt text
225    pub alt_text: Cow<'a, str>,
226    /// Image URL or reference
227    pub url: Cow<'a, str>,
228    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
229    pub is_reference: bool,
230    /// Reference ID for reference images
231    pub reference_id: Option<Cow<'a, str>>,
232    /// Link type from pulldown-cmark
233    pub link_type: LinkType,
234}
235
236/// Reference definition [ref]: url "title"
237#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239    /// Line number (1-indexed)
240    pub line: usize,
241    /// Reference ID (normalized to lowercase)
242    pub id: String,
243    /// URL
244    pub url: String,
245    /// Optional title
246    pub title: Option<String>,
247    /// Byte offset where the reference definition starts
248    pub byte_offset: usize,
249    /// Byte offset where the reference definition ends
250    pub byte_end: usize,
251    /// Byte offset where the title starts (if present, includes quote)
252    pub title_byte_start: Option<usize>,
253    /// Byte offset where the title ends (if present, includes quote)
254    pub title_byte_end: Option<usize>,
255}
256
257/// Parsed code span information
258#[derive(Debug, Clone)]
259pub struct CodeSpan {
260    /// Line number where the code span starts (1-indexed)
261    pub line: usize,
262    /// Line number where the code span ends (1-indexed)
263    pub end_line: usize,
264    /// Start column (0-indexed) in the line
265    pub start_col: usize,
266    /// End column (0-indexed) in the line
267    pub end_col: usize,
268    /// Byte offset in document
269    pub byte_offset: usize,
270    /// End byte offset in document
271    pub byte_end: usize,
272    /// Number of backticks used (1, 2, 3, etc.)
273    pub backtick_count: usize,
274    /// Content inside the code span (without backticks)
275    pub content: String,
276}
277
278/// Parsed math span information (inline $...$ or display $$...$$)
279#[derive(Debug, Clone)]
280pub struct MathSpan {
281    /// Line number where the math span starts (1-indexed)
282    pub line: usize,
283    /// Line number where the math span ends (1-indexed)
284    pub end_line: usize,
285    /// Start column (0-indexed) in the line
286    pub start_col: usize,
287    /// End column (0-indexed) in the line
288    pub end_col: usize,
289    /// Byte offset in document
290    pub byte_offset: usize,
291    /// End byte offset in document
292    pub byte_end: usize,
293    /// Whether this is display math ($$...$$) vs inline ($...$)
294    pub is_display: bool,
295    /// Content inside the math delimiters
296    pub content: String,
297}
298
299/// Information about a heading
300#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302    /// Heading level (1-6 for ATX, 1-2 for Setext)
303    pub level: u8,
304    /// Style of heading
305    pub style: HeadingStyle,
306    /// The heading marker (# characters or underline)
307    pub marker: String,
308    /// Column where the marker starts (0-based)
309    pub marker_column: usize,
310    /// Column where heading text starts
311    pub content_column: usize,
312    /// The heading text (without markers and without custom ID syntax)
313    pub text: String,
314    /// Custom header ID if present (e.g., from {#custom-id} syntax)
315    pub custom_id: Option<String>,
316    /// Original heading text including custom ID syntax
317    pub raw_text: String,
318    /// Whether it has a closing sequence (for ATX)
319    pub has_closing_sequence: bool,
320    /// The closing sequence if present
321    pub closing_sequence: String,
322    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
323    /// False for malformed headings like `#NoSpace` that MD018 should flag
324    pub is_valid: bool,
325}
326
327/// A valid heading from a filtered iteration
328///
329/// Only includes headings that are CommonMark-compliant (have space after #).
330/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
331#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333    /// The 1-indexed line number in the document
334    pub line_num: usize,
335    /// Reference to the heading information
336    pub heading: &'a HeadingInfo,
337    /// Reference to the full line info (for rules that need additional context)
338    pub line_info: &'a LineInfo,
339}
340
341/// Iterator over valid CommonMark headings in a document
342///
343/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
344/// but should not be processed by other heading rules.
345pub struct ValidHeadingsIter<'a> {
346    lines: &'a [LineInfo],
347    current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351    fn new(lines: &'a [LineInfo]) -> Self {
352        Self {
353            lines,
354            current_index: 0,
355        }
356    }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360    type Item = ValidHeading<'a>;
361
362    fn next(&mut self) -> Option<Self::Item> {
363        while self.current_index < self.lines.len() {
364            let idx = self.current_index;
365            self.current_index += 1;
366
367            let line_info = &self.lines[idx];
368            if let Some(heading) = &line_info.heading
369                && heading.is_valid
370            {
371                return Some(ValidHeading {
372                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
373                    heading,
374                    line_info,
375                });
376            }
377        }
378        None
379    }
380}
381
382/// Information about a blockquote line
383#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385    /// Nesting level (1 for >, 2 for >>, etc.)
386    pub nesting_level: usize,
387    /// The indentation before the blockquote marker
388    pub indent: String,
389    /// Column where the first > starts (0-based)
390    pub marker_column: usize,
391    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
392    pub prefix: String,
393    /// Content after the blockquote marker(s)
394    pub content: String,
395    /// Whether the line has no space after the marker
396    pub has_no_space_after_marker: bool,
397    /// Whether the line has multiple spaces after the marker
398    pub has_multiple_spaces_after_marker: bool,
399    /// Whether this is an empty blockquote line needing MD028 fix
400    pub needs_md028_fix: bool,
401}
402
403/// Information about a list block
404#[derive(Debug, Clone)]
405pub struct ListBlock {
406    /// Line number where the list starts (1-indexed)
407    pub start_line: usize,
408    /// Line number where the list ends (1-indexed)
409    pub end_line: usize,
410    /// Whether it's ordered or unordered
411    pub is_ordered: bool,
412    /// The consistent marker for unordered lists (if any)
413    pub marker: Option<String>,
414    /// Blockquote prefix for this list (empty if not in blockquote)
415    pub blockquote_prefix: String,
416    /// Lines that are list items within this block
417    pub item_lines: Vec<usize>,
418    /// Nesting level (0 for top-level lists)
419    pub nesting_level: usize,
420    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
421    pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
427type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429/// Type alias for byte ranges used in JSX expression and MDX comment detection
430type ByteRanges = Vec<(usize, usize)>;
431
432/// Character frequency data for fast content analysis
433#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435    /// Count of # characters (headings)
436    pub hash_count: usize,
437    /// Count of * characters (emphasis, lists, horizontal rules)
438    pub asterisk_count: usize,
439    /// Count of _ characters (emphasis, horizontal rules)
440    pub underscore_count: usize,
441    /// Count of - characters (lists, horizontal rules, setext headings)
442    pub hyphen_count: usize,
443    /// Count of + characters (lists)
444    pub plus_count: usize,
445    /// Count of > characters (blockquotes)
446    pub gt_count: usize,
447    /// Count of | characters (tables)
448    pub pipe_count: usize,
449    /// Count of [ characters (links, images)
450    pub bracket_count: usize,
451    /// Count of ` characters (code spans, code blocks)
452    pub backtick_count: usize,
453    /// Count of < characters (HTML tags, autolinks)
454    pub lt_count: usize,
455    /// Count of ! characters (images)
456    pub exclamation_count: usize,
457    /// Count of newline characters
458    pub newline_count: usize,
459}
460
461/// Pre-parsed HTML tag information
462#[derive(Debug, Clone)]
463pub struct HtmlTag {
464    /// Line number (1-indexed)
465    pub line: usize,
466    /// Start column (0-indexed) in the line
467    pub start_col: usize,
468    /// End column (0-indexed) in the line
469    pub end_col: usize,
470    /// Byte offset in document
471    pub byte_offset: usize,
472    /// End byte offset in document
473    pub byte_end: usize,
474    /// Tag name (e.g., "div", "img", "br")
475    pub tag_name: String,
476    /// Whether it's a closing tag (`</tag>`)
477    pub is_closing: bool,
478    /// Whether it's self-closing (`<tag />`)
479    pub is_self_closing: bool,
480    /// Raw tag content
481    pub raw_content: String,
482}
483
484/// Pre-parsed emphasis span information
485#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487    /// Line number (1-indexed)
488    pub line: usize,
489    /// Start column (0-indexed) in the line
490    pub start_col: usize,
491    /// End column (0-indexed) in the line
492    pub end_col: usize,
493    /// Byte offset in document
494    pub byte_offset: usize,
495    /// End byte offset in document
496    pub byte_end: usize,
497    /// Type of emphasis ('*' or '_')
498    pub marker: char,
499    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
500    pub marker_count: usize,
501    /// Content inside the emphasis
502    pub content: String,
503}
504
505/// Pre-parsed table row information
506#[derive(Debug, Clone)]
507pub struct TableRow {
508    /// Line number (1-indexed)
509    pub line: usize,
510    /// Whether this is a separator row (contains only |, -, :, and spaces)
511    pub is_separator: bool,
512    /// Number of columns (pipe-separated cells)
513    pub column_count: usize,
514    /// Alignment info from separator row
515    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
516}
517
518/// Pre-parsed bare URL information (not in links)
519#[derive(Debug, Clone)]
520pub struct BareUrl {
521    /// Line number (1-indexed)
522    pub line: usize,
523    /// Start column (0-indexed) in the line
524    pub start_col: usize,
525    /// End column (0-indexed) in the line
526    pub end_col: usize,
527    /// Byte offset in document
528    pub byte_offset: usize,
529    /// End byte offset in document
530    pub byte_end: usize,
531    /// The URL string
532    pub url: String,
533    /// Type of URL ("http", "https", "ftp", "email")
534    pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538    pub content: &'a str,
539    pub line_offsets: Vec<usize>,
540    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
541    pub lines: Vec<LineInfo>,             // Pre-computed line information
542    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
543    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
544    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
545    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
546    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
547    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
548    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
549    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
550    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
551    pub char_frequency: CharFrequency,    // Character frequency analysis
552    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
553    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
554    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
555    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
556    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
557    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
558    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
559    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
560    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
561    pub flavor: MarkdownFlavor,           // Markdown flavor being used
562    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
563    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
564    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
565    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
566    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
567    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
568}
569
570/// Detailed blockquote parse result with all components
571struct BlockquoteComponents<'a> {
572    indent: &'a str,
573    markers: &'a str,
574    spaces_after: &'a str,
575    content: &'a str,
576}
577
578/// Parse blockquote prefix with detailed components using manual parsing
579#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581    let bytes = line.as_bytes();
582    let mut pos = 0;
583
584    // Parse leading whitespace (indent)
585    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586        pos += 1;
587    }
588    let indent_end = pos;
589
590    // Must have at least one '>' marker
591    if pos >= bytes.len() || bytes[pos] != b'>' {
592        return None;
593    }
594
595    // Parse '>' markers
596    while pos < bytes.len() && bytes[pos] == b'>' {
597        pos += 1;
598    }
599    let markers_end = pos;
600
601    // Parse spaces after markers
602    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603        pos += 1;
604    }
605    let spaces_end = pos;
606
607    Some(BlockquoteComponents {
608        indent: &line[0..indent_end],
609        markers: &line[indent_end..markers_end],
610        spaces_after: &line[markers_end..spaces_end],
611        content: &line[spaces_end..],
612    })
613}
614
615impl<'a> LintContext<'a> {
616    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617        #[cfg(not(target_arch = "wasm32"))]
618        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619        #[cfg(target_arch = "wasm32")]
620        let profile = false;
621
622        let line_offsets = profile_section!("Line offsets", profile, {
623            let mut offsets = vec![0];
624            for (i, c) in content.char_indices() {
625                if c == '\n' {
626                    offsets.push(i + 1);
627                }
628            }
629            offsets
630        });
631
632        // Detect code blocks and code spans once and cache them
633        let (code_blocks, code_span_ranges) = profile_section!(
634            "Code blocks",
635            profile,
636            CodeBlockUtils::detect_code_blocks_and_spans(content)
637        );
638
639        // Pre-compute HTML comment ranges ONCE for all operations
640        let html_comment_ranges = profile_section!(
641            "HTML comment ranges",
642            profile,
643            crate::utils::skip_context::compute_html_comment_ranges(content)
644        );
645
646        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
647        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
648            if flavor == MarkdownFlavor::MkDocs {
649                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
650            } else {
651                Vec::new()
652            }
653        });
654
655        // Pre-compute Quarto div block ranges for Quarto flavor
656        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
657            if flavor == MarkdownFlavor::Quarto {
658                crate::utils::quarto_divs::detect_div_block_ranges(content)
659            } else {
660                Vec::new()
661            }
662        });
663
664        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
665        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
666        let (mut lines, emphasis_spans) = profile_section!(
667            "Basic line info",
668            profile,
669            Self::compute_basic_line_info(
670                content,
671                &line_offsets,
672                &code_blocks,
673                flavor,
674                &html_comment_ranges,
675                &autodoc_ranges,
676                &quarto_div_ranges,
677            )
678        );
679
680        // Detect HTML blocks BEFORE heading detection
681        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
682
683        // Detect ESM import/export blocks in MDX files BEFORE heading detection
684        profile_section!(
685            "ESM blocks",
686            profile,
687            Self::detect_esm_blocks(content, &mut lines, flavor)
688        );
689
690        // Detect JSX expressions and MDX comments in MDX files
691        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
692            "JSX/MDX detection",
693            profile,
694            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
695        );
696
697        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
698        profile_section!(
699            "MkDocs constructs",
700            profile,
701            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
702        );
703
704        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
705        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
706
707        // Now detect headings and blockquotes
708        profile_section!(
709            "Headings & blockquotes",
710            profile,
711            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
712        );
713
714        // Parse code spans early so we can exclude them from link/image parsing
715        let code_spans = profile_section!(
716            "Code spans",
717            profile,
718            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
719        );
720
721        // Mark lines that are continuations of multi-line code spans
722        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
723        for span in &code_spans {
724            if span.end_line > span.line {
725                // Mark lines after the first line as continuations
726                for line_num in (span.line + 1)..=span.end_line {
727                    if let Some(line_info) = lines.get_mut(line_num - 1) {
728                        line_info.in_code_span_continuation = true;
729                    }
730                }
731            }
732        }
733
734        // Parse links, images, references, and list blocks
735        let (links, broken_links, footnote_refs) = profile_section!(
736            "Links",
737            profile,
738            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
739        );
740
741        let images = profile_section!(
742            "Images",
743            profile,
744            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
745        );
746
747        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
748
749        // Build O(1) lookup map for reference definitions by lowercase ID
750        let reference_defs_map: HashMap<String, usize> = reference_defs
751            .iter()
752            .enumerate()
753            .map(|(idx, def)| (def.id.to_lowercase(), idx))
754            .collect();
755
756        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
757
758        // Compute character frequency for fast content analysis
759        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
760
761        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
762        let table_blocks = profile_section!(
763            "Table blocks",
764            profile,
765            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
766                content,
767                &code_blocks,
768                &code_spans,
769                &html_comment_ranges,
770            )
771        );
772
773        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
774        let line_index = profile_section!(
775            "Line index",
776            profile,
777            crate::utils::range_utils::LineIndex::new(content)
778        );
779
780        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
781        let jinja_ranges = profile_section!(
782            "Jinja ranges",
783            profile,
784            crate::utils::jinja_utils::find_jinja_ranges(content)
785        );
786
787        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
788        let citation_ranges = profile_section!("Citation ranges", profile, {
789            if flavor == MarkdownFlavor::Quarto {
790                crate::utils::quarto_divs::find_citation_ranges(content)
791            } else {
792                Vec::new()
793            }
794        });
795
796        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
797        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
798            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
799            let mut ranges = Vec::new();
800            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
801                ranges.push((mat.start(), mat.end()));
802            }
803            ranges
804        });
805
806        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
807
808        Self {
809            content,
810            line_offsets,
811            code_blocks,
812            lines,
813            links,
814            images,
815            broken_links,
816            footnote_refs,
817            reference_defs,
818            reference_defs_map,
819            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
820            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
821            list_blocks,
822            char_frequency,
823            html_tags_cache: OnceLock::new(),
824            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
825            table_rows_cache: OnceLock::new(),
826            bare_urls_cache: OnceLock::new(),
827            has_mixed_list_nesting_cache: OnceLock::new(),
828            html_comment_ranges,
829            table_blocks,
830            line_index,
831            jinja_ranges,
832            flavor,
833            source_file,
834            jsx_expression_ranges,
835            mdx_comment_ranges,
836            citation_ranges,
837            shortcode_ranges,
838            inline_config,
839        }
840    }
841
842    /// Check if a rule is disabled at a specific line number (1-indexed)
843    ///
844    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
845    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
846    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
847        self.inline_config.is_rule_disabled(rule_name, line_number)
848    }
849
850    /// Get code spans - computed lazily on first access
851    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
852        Arc::clone(
853            self.code_spans_cache
854                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
855        )
856    }
857
858    /// Get math spans - computed lazily on first access
859    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
860        Arc::clone(
861            self.math_spans_cache
862                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
863        )
864    }
865
866    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
867    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
868        let math_spans = self.math_spans();
869        math_spans
870            .iter()
871            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
872    }
873
874    /// Get HTML comment ranges - pre-computed during LintContext construction
875    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
876        &self.html_comment_ranges
877    }
878
879    /// Get HTML tags - computed lazily on first access
880    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
881        Arc::clone(self.html_tags_cache.get_or_init(|| {
882            Arc::new(Self::parse_html_tags(
883                self.content,
884                &self.lines,
885                &self.code_blocks,
886                self.flavor,
887            ))
888        }))
889    }
890
891    /// Get emphasis spans - pre-computed during construction
892    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
893        Arc::clone(
894            self.emphasis_spans_cache
895                .get()
896                .expect("emphasis_spans_cache initialized during construction"),
897        )
898    }
899
900    /// Get table rows - computed lazily on first access
901    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
902        Arc::clone(
903            self.table_rows_cache
904                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
905        )
906    }
907
908    /// Get bare URLs - computed lazily on first access
909    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
910        Arc::clone(
911            self.bare_urls_cache
912                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
913        )
914    }
915
916    /// Check if document has mixed ordered/unordered list nesting.
917    /// Result is cached after first computation (document-level invariant).
918    /// This is used by MD007 for smart style auto-detection.
919    pub fn has_mixed_list_nesting(&self) -> bool {
920        *self
921            .has_mixed_list_nesting_cache
922            .get_or_init(|| self.compute_mixed_list_nesting())
923    }
924
925    /// Internal computation for mixed list nesting (only called once per LintContext).
926    fn compute_mixed_list_nesting(&self) -> bool {
927        // Track parent list items by their marker position and type
928        // Using marker_column instead of indent because it works correctly
929        // for blockquoted content where indent doesn't account for the prefix
930        // Stack stores: (marker_column, is_ordered)
931        let mut stack: Vec<(usize, bool)> = Vec::new();
932        let mut last_was_blank = false;
933
934        for line_info in &self.lines {
935            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
936            if line_info.in_code_block
937                || line_info.in_front_matter
938                || line_info.in_mkdocstrings
939                || line_info.in_html_comment
940                || line_info.in_esm_block
941            {
942                continue;
943            }
944
945            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
946            if line_info.is_blank {
947                last_was_blank = true;
948                continue;
949            }
950
951            if let Some(list_item) = &line_info.list_item {
952                // Normalize column 1 to column 0 (consistent with MD007 check function)
953                let current_pos = if list_item.marker_column == 1 {
954                    0
955                } else {
956                    list_item.marker_column
957                };
958
959                // If there was a blank line and this item is at root level, reset stack
960                if last_was_blank && current_pos == 0 {
961                    stack.clear();
962                }
963                last_was_blank = false;
964
965                // Pop items at same or greater position (they're siblings or deeper, not parents)
966                while let Some(&(pos, _)) = stack.last() {
967                    if pos >= current_pos {
968                        stack.pop();
969                    } else {
970                        break;
971                    }
972                }
973
974                // Check if immediate parent has different type - this is mixed nesting
975                if let Some(&(_, parent_is_ordered)) = stack.last()
976                    && parent_is_ordered != list_item.is_ordered
977                {
978                    return true; // Found mixed nesting - early exit
979                }
980
981                stack.push((current_pos, list_item.is_ordered));
982            } else {
983                // Non-list line (but not blank) - could be paragraph or other content
984                last_was_blank = false;
985            }
986        }
987
988        false
989    }
990
991    /// Map a byte offset to (line, column)
992    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
993        match self.line_offsets.binary_search(&offset) {
994            Ok(line) => (line + 1, 1),
995            Err(line) => {
996                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
997                (line, offset - line_start + 1)
998            }
999        }
1000    }
1001
1002    /// Check if a position is within a code block or code span
1003    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1004        // Check code blocks first
1005        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1006            return true;
1007        }
1008
1009        // Check inline code spans (lazy load if needed)
1010        self.code_spans()
1011            .iter()
1012            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1013    }
1014
1015    /// Get line information by line number (1-indexed)
1016    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1017        if line_num > 0 {
1018            self.lines.get(line_num - 1)
1019        } else {
1020            None
1021        }
1022    }
1023
1024    /// Get byte offset for a line number (1-indexed)
1025    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1026        self.line_info(line_num).map(|info| info.byte_offset)
1027    }
1028
1029    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1030    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1031        let normalized_id = ref_id.to_lowercase();
1032        self.reference_defs_map
1033            .get(&normalized_id)
1034            .map(|&idx| self.reference_defs[idx].url.as_str())
1035    }
1036
1037    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1038    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1039        let normalized_id = ref_id.to_lowercase();
1040        self.reference_defs_map
1041            .get(&normalized_id)
1042            .map(|&idx| &self.reference_defs[idx])
1043    }
1044
1045    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1046    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1047        let normalized_id = ref_id.to_lowercase();
1048        self.reference_defs_map.contains_key(&normalized_id)
1049    }
1050
1051    /// Check if a line is part of a list block
1052    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1053        self.list_blocks
1054            .iter()
1055            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1056    }
1057
1058    /// Get the list block containing a specific line
1059    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1060        self.list_blocks
1061            .iter()
1062            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1063    }
1064
1065    // Compatibility methods for DocumentStructure migration
1066
1067    /// Check if a line is within a code block
1068    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1069        if line_num == 0 || line_num > self.lines.len() {
1070            return false;
1071        }
1072        self.lines[line_num - 1].in_code_block
1073    }
1074
1075    /// Check if a line is within front matter
1076    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1077        if line_num == 0 || line_num > self.lines.len() {
1078            return false;
1079        }
1080        self.lines[line_num - 1].in_front_matter
1081    }
1082
1083    /// Check if a line is within an HTML block
1084    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1085        if line_num == 0 || line_num > self.lines.len() {
1086            return false;
1087        }
1088        self.lines[line_num - 1].in_html_block
1089    }
1090
1091    /// Check if a line and column is within a code span
1092    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1093        if line_num == 0 || line_num > self.lines.len() {
1094            return false;
1095        }
1096
1097        // Use the code spans cache to check
1098        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1099        // Convert col to 0-indexed for comparison
1100        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1101        let code_spans = self.code_spans();
1102        code_spans.iter().any(|span| {
1103            // Check if line is within the span's line range
1104            if line_num < span.line || line_num > span.end_line {
1105                return false;
1106            }
1107
1108            if span.line == span.end_line {
1109                // Single-line span: check column bounds
1110                col_0indexed >= span.start_col && col_0indexed < span.end_col
1111            } else if line_num == span.line {
1112                // First line of multi-line span: anything after start_col is in span
1113                col_0indexed >= span.start_col
1114            } else if line_num == span.end_line {
1115                // Last line of multi-line span: anything before end_col is in span
1116                col_0indexed < span.end_col
1117            } else {
1118                // Middle line of multi-line span: entire line is in span
1119                true
1120            }
1121        })
1122    }
1123
1124    /// Check if a byte offset is within a code span
1125    #[inline]
1126    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1127        let code_spans = self.code_spans();
1128        code_spans
1129            .iter()
1130            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1131    }
1132
1133    /// Check if a byte position is within a reference definition
1134    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1135    #[inline]
1136    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1137        self.reference_defs
1138            .iter()
1139            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1140    }
1141
1142    /// Check if a byte position is within an HTML comment
1143    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1144    /// where k is the number of HTML comments (typically very small)
1145    #[inline]
1146    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1147        self.html_comment_ranges
1148            .iter()
1149            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1150    }
1151
1152    /// Check if a byte position is within an HTML tag (including multiline tags)
1153    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1154    #[inline]
1155    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1156        self.html_tags()
1157            .iter()
1158            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1159    }
1160
1161    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1162    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1163        self.jinja_ranges
1164            .iter()
1165            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1166    }
1167
1168    /// Check if a byte position is within a JSX expression (MDX: {expression})
1169    #[inline]
1170    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1171        self.jsx_expression_ranges
1172            .iter()
1173            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174    }
1175
1176    /// Check if a byte position is within an MDX comment ({/* ... */})
1177    #[inline]
1178    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1179        self.mdx_comment_ranges
1180            .iter()
1181            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182    }
1183
1184    /// Get all JSX expression byte ranges
1185    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1186        &self.jsx_expression_ranges
1187    }
1188
1189    /// Get all MDX comment byte ranges
1190    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1191        &self.mdx_comment_ranges
1192    }
1193
1194    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1195    /// Only active in Quarto flavor
1196    #[inline]
1197    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1198        self.citation_ranges
1199            .iter()
1200            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201    }
1202
1203    /// Get all citation byte ranges (Quarto flavor only)
1204    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1205        &self.citation_ranges
1206    }
1207
1208    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1209    #[inline]
1210    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1211        self.shortcode_ranges
1212            .iter()
1213            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214    }
1215
1216    /// Get all shortcode byte ranges
1217    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1218        &self.shortcode_ranges
1219    }
1220
1221    /// Check if a byte position is within a link reference definition title
1222    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1223        self.reference_defs.iter().any(|def| {
1224            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1225                byte_pos >= start && byte_pos < end
1226            } else {
1227                false
1228            }
1229        })
1230    }
1231
1232    /// Check if content has any instances of a specific character (fast)
1233    pub fn has_char(&self, ch: char) -> bool {
1234        match ch {
1235            '#' => self.char_frequency.hash_count > 0,
1236            '*' => self.char_frequency.asterisk_count > 0,
1237            '_' => self.char_frequency.underscore_count > 0,
1238            '-' => self.char_frequency.hyphen_count > 0,
1239            '+' => self.char_frequency.plus_count > 0,
1240            '>' => self.char_frequency.gt_count > 0,
1241            '|' => self.char_frequency.pipe_count > 0,
1242            '[' => self.char_frequency.bracket_count > 0,
1243            '`' => self.char_frequency.backtick_count > 0,
1244            '<' => self.char_frequency.lt_count > 0,
1245            '!' => self.char_frequency.exclamation_count > 0,
1246            '\n' => self.char_frequency.newline_count > 0,
1247            _ => self.content.contains(ch), // Fallback for other characters
1248        }
1249    }
1250
1251    /// Get count of a specific character (fast)
1252    pub fn char_count(&self, ch: char) -> usize {
1253        match ch {
1254            '#' => self.char_frequency.hash_count,
1255            '*' => self.char_frequency.asterisk_count,
1256            '_' => self.char_frequency.underscore_count,
1257            '-' => self.char_frequency.hyphen_count,
1258            '+' => self.char_frequency.plus_count,
1259            '>' => self.char_frequency.gt_count,
1260            '|' => self.char_frequency.pipe_count,
1261            '[' => self.char_frequency.bracket_count,
1262            '`' => self.char_frequency.backtick_count,
1263            '<' => self.char_frequency.lt_count,
1264            '!' => self.char_frequency.exclamation_count,
1265            '\n' => self.char_frequency.newline_count,
1266            _ => self.content.matches(ch).count(), // Fallback for other characters
1267        }
1268    }
1269
1270    /// Check if content likely contains headings (fast)
1271    pub fn likely_has_headings(&self) -> bool {
1272        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1273    }
1274
1275    /// Check if content likely contains lists (fast)
1276    pub fn likely_has_lists(&self) -> bool {
1277        self.char_frequency.asterisk_count > 0
1278            || self.char_frequency.hyphen_count > 0
1279            || self.char_frequency.plus_count > 0
1280    }
1281
1282    /// Check if content likely contains emphasis (fast)
1283    pub fn likely_has_emphasis(&self) -> bool {
1284        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1285    }
1286
1287    /// Check if content likely contains tables (fast)
1288    pub fn likely_has_tables(&self) -> bool {
1289        self.char_frequency.pipe_count > 2
1290    }
1291
1292    /// Check if content likely contains blockquotes (fast)
1293    pub fn likely_has_blockquotes(&self) -> bool {
1294        self.char_frequency.gt_count > 0
1295    }
1296
1297    /// Check if content likely contains code (fast)
1298    pub fn likely_has_code(&self) -> bool {
1299        self.char_frequency.backtick_count > 0
1300    }
1301
1302    /// Check if content likely contains links or images (fast)
1303    pub fn likely_has_links_or_images(&self) -> bool {
1304        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1305    }
1306
1307    /// Check if content likely contains HTML (fast)
1308    pub fn likely_has_html(&self) -> bool {
1309        self.char_frequency.lt_count > 0
1310    }
1311
1312    /// Get the blockquote prefix for inserting a blank line at the given line index.
1313    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1314    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1315    /// Returns an empty string if the line is not inside a blockquote.
1316    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1317        if let Some(line_info) = self.lines.get(line_idx)
1318            && let Some(ref bq) = line_info.blockquote
1319        {
1320            bq.prefix.trim_end().to_string()
1321        } else {
1322            String::new()
1323        }
1324    }
1325
1326    /// Get HTML tags on a specific line
1327    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1328        self.html_tags()
1329            .iter()
1330            .filter(|tag| tag.line == line_num)
1331            .cloned()
1332            .collect()
1333    }
1334
1335    /// Get emphasis spans on a specific line
1336    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1337        self.emphasis_spans()
1338            .iter()
1339            .filter(|span| span.line == line_num)
1340            .cloned()
1341            .collect()
1342    }
1343
1344    /// Get table rows on a specific line
1345    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1346        self.table_rows()
1347            .iter()
1348            .filter(|row| row.line == line_num)
1349            .cloned()
1350            .collect()
1351    }
1352
1353    /// Get bare URLs on a specific line
1354    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1355        self.bare_urls()
1356            .iter()
1357            .filter(|url| url.line == line_num)
1358            .cloned()
1359            .collect()
1360    }
1361
1362    /// Find the line index for a given byte offset using binary search.
1363    /// Returns (line_index, line_number, column) where:
1364    /// - line_index is the 0-based index in the lines array
1365    /// - line_number is the 1-based line number
1366    /// - column is the byte offset within that line
1367    #[inline]
1368    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1369        // Binary search to find the line containing this byte offset
1370        let idx = match lines.binary_search_by(|line| {
1371            if byte_offset < line.byte_offset {
1372                std::cmp::Ordering::Greater
1373            } else if byte_offset > line.byte_offset + line.byte_len {
1374                std::cmp::Ordering::Less
1375            } else {
1376                std::cmp::Ordering::Equal
1377            }
1378        }) {
1379            Ok(idx) => idx,
1380            Err(idx) => idx.saturating_sub(1),
1381        };
1382
1383        let line = &lines[idx];
1384        let line_num = idx + 1;
1385        let col = byte_offset.saturating_sub(line.byte_offset);
1386
1387        (idx, line_num, col)
1388    }
1389
1390    /// Check if a byte offset is within a code span using binary search
1391    #[inline]
1392    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1393        // Since spans are sorted by byte_offset, use partition_point for binary search
1394        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1395
1396        // Check the span that starts at or before our offset
1397        if idx > 0 {
1398            let span = &code_spans[idx - 1];
1399            if offset >= span.byte_offset && offset < span.byte_end {
1400                return true;
1401            }
1402        }
1403
1404        false
1405    }
1406
1407    /// Collect byte ranges of all links using pulldown-cmark
1408    /// This is used to skip heading detection for lines that fall within link syntax
1409    /// (e.g., multiline links like `[text](url\n#fragment)`)
1410    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1411        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1412
1413        let mut link_ranges = Vec::new();
1414        let mut options = Options::empty();
1415        options.insert(Options::ENABLE_WIKILINKS);
1416        options.insert(Options::ENABLE_FOOTNOTES);
1417
1418        let parser = Parser::new_ext(content, options).into_offset_iter();
1419        let mut link_stack: Vec<usize> = Vec::new();
1420
1421        for (event, range) in parser {
1422            match event {
1423                Event::Start(Tag::Link { .. }) => {
1424                    link_stack.push(range.start);
1425                }
1426                Event::End(TagEnd::Link) => {
1427                    if let Some(start_pos) = link_stack.pop() {
1428                        link_ranges.push((start_pos, range.end));
1429                    }
1430                }
1431                _ => {}
1432            }
1433        }
1434
1435        link_ranges
1436    }
1437
1438    /// Parse all links in the content
1439    fn parse_links(
1440        content: &'a str,
1441        lines: &[LineInfo],
1442        code_blocks: &[(usize, usize)],
1443        code_spans: &[CodeSpan],
1444        flavor: MarkdownFlavor,
1445        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1446    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1447        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1448        use std::collections::HashSet;
1449
1450        let mut links = Vec::with_capacity(content.len() / 500);
1451        let mut broken_links = Vec::new();
1452        let mut footnote_refs = Vec::new();
1453
1454        // Track byte positions of links found by pulldown-cmark
1455        let mut found_positions = HashSet::new();
1456
1457        // Use pulldown-cmark's streaming parser with BrokenLink callback
1458        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1459        // This automatically handles:
1460        // - Escaped links (won't generate events)
1461        // - Links in code blocks/spans (won't generate Link events)
1462        // - Images (generates Tag::Image instead)
1463        // - Reference resolution (dest_url is already resolved!)
1464        // - Broken references (callback is invoked)
1465        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1466        let mut options = Options::empty();
1467        options.insert(Options::ENABLE_WIKILINKS);
1468        options.insert(Options::ENABLE_FOOTNOTES);
1469
1470        let parser = Parser::new_with_broken_link_callback(
1471            content,
1472            options,
1473            Some(|link: BrokenLink<'_>| {
1474                broken_links.push(BrokenLinkInfo {
1475                    reference: link.reference.to_string(),
1476                    span: link.span.clone(),
1477                });
1478                None
1479            }),
1480        )
1481        .into_offset_iter();
1482
1483        let mut link_stack: Vec<(
1484            usize,
1485            usize,
1486            pulldown_cmark::CowStr<'a>,
1487            LinkType,
1488            pulldown_cmark::CowStr<'a>,
1489        )> = Vec::new();
1490        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1491
1492        for (event, range) in parser {
1493            match event {
1494                Event::Start(Tag::Link {
1495                    link_type,
1496                    dest_url,
1497                    id,
1498                    ..
1499                }) => {
1500                    // Link start - record position, URL, and reference ID
1501                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1502                    text_chunks.clear();
1503                }
1504                Event::Text(text) if !link_stack.is_empty() => {
1505                    // Track text content with its byte range
1506                    text_chunks.push((text.to_string(), range.start, range.end));
1507                }
1508                Event::Code(code) if !link_stack.is_empty() => {
1509                    // Include inline code in link text (with backticks)
1510                    let code_text = format!("`{code}`");
1511                    text_chunks.push((code_text, range.start, range.end));
1512                }
1513                Event::End(TagEnd::Link) => {
1514                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1515                        // Skip if in HTML comment
1516                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1517                            text_chunks.clear();
1518                            continue;
1519                        }
1520
1521                        // Find line and column information
1522                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1523
1524                        // Skip if this link is on a MkDocs snippet line
1525                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1526                            text_chunks.clear();
1527                            continue;
1528                        }
1529
1530                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1531
1532                        let is_reference = matches!(
1533                            link_type,
1534                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1535                        );
1536
1537                        // Extract link text directly from source bytes to preserve escaping
1538                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1539                        let link_text = if start_pos < content.len() {
1540                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1541
1542                            // Find MATCHING ] by tracking bracket depth for nested brackets
1543                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1544                            // Brackets inside code spans (between backticks) should be ignored
1545                            let mut close_pos = None;
1546                            let mut depth = 0;
1547                            let mut in_code_span = false;
1548
1549                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1550                                // Count preceding backslashes
1551                                let mut backslash_count = 0;
1552                                let mut j = i;
1553                                while j > 0 && link_bytes[j - 1] == b'\\' {
1554                                    backslash_count += 1;
1555                                    j -= 1;
1556                                }
1557                                let is_escaped = backslash_count % 2 != 0;
1558
1559                                // Track code spans - backticks toggle in/out of code
1560                                if byte == b'`' && !is_escaped {
1561                                    in_code_span = !in_code_span;
1562                                }
1563
1564                                // Only count brackets when NOT in a code span
1565                                if !is_escaped && !in_code_span {
1566                                    if byte == b'[' {
1567                                        depth += 1;
1568                                    } else if byte == b']' {
1569                                        if depth == 0 {
1570                                            // Found the matching closing bracket
1571                                            close_pos = Some(i);
1572                                            break;
1573                                        } else {
1574                                            depth -= 1;
1575                                        }
1576                                    }
1577                                }
1578                            }
1579
1580                            if let Some(pos) = close_pos {
1581                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1582                            } else {
1583                                Cow::Borrowed("")
1584                            }
1585                        } else {
1586                            Cow::Borrowed("")
1587                        };
1588
1589                        // For reference links, use the actual reference ID from pulldown-cmark
1590                        let reference_id = if is_reference && !ref_id.is_empty() {
1591                            Some(Cow::Owned(ref_id.to_lowercase()))
1592                        } else if is_reference {
1593                            // For collapsed/shortcut references without explicit ID, use the link text
1594                            Some(Cow::Owned(link_text.to_lowercase()))
1595                        } else {
1596                            None
1597                        };
1598
1599                        // Track this position as found
1600                        found_positions.insert(start_pos);
1601
1602                        links.push(ParsedLink {
1603                            line: line_num,
1604                            start_col: col_start,
1605                            end_col: col_end,
1606                            byte_offset: start_pos,
1607                            byte_end: range.end,
1608                            text: link_text,
1609                            url: Cow::Owned(url.to_string()),
1610                            is_reference,
1611                            reference_id,
1612                            link_type,
1613                        });
1614
1615                        text_chunks.clear();
1616                    }
1617                }
1618                Event::FootnoteReference(footnote_id) => {
1619                    // Capture footnote references like [^1], [^note]
1620                    // Skip if in HTML comment
1621                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1622                        continue;
1623                    }
1624
1625                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1626                    footnote_refs.push(FootnoteRef {
1627                        id: footnote_id.to_string(),
1628                        line: line_num,
1629                        byte_offset: range.start,
1630                        byte_end: range.end,
1631                    });
1632                }
1633                _ => {}
1634            }
1635        }
1636
1637        // Also find undefined references using regex
1638        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1639        // because the reference is undefined
1640        for cap in LINK_PATTERN.captures_iter(content) {
1641            let full_match = cap.get(0).unwrap();
1642            let match_start = full_match.start();
1643            let match_end = full_match.end();
1644
1645            // Skip if this was already found by pulldown-cmark (it's a valid link)
1646            if found_positions.contains(&match_start) {
1647                continue;
1648            }
1649
1650            // Skip if escaped
1651            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652                continue;
1653            }
1654
1655            // Skip if it's an image
1656            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1657                continue;
1658            }
1659
1660            // Skip if in code block
1661            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1662                continue;
1663            }
1664
1665            // Skip if in code span
1666            if Self::is_offset_in_code_span(code_spans, match_start) {
1667                continue;
1668            }
1669
1670            // Skip if in HTML comment
1671            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1672                continue;
1673            }
1674
1675            // Find line and column information
1676            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1677
1678            // Skip if this link is on a MkDocs snippet line
1679            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1680                continue;
1681            }
1682
1683            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1684
1685            let text = cap.get(1).map_or("", |m| m.as_str());
1686
1687            // Only process reference links (group 6)
1688            if let Some(ref_id) = cap.get(6) {
1689                let ref_id_str = ref_id.as_str();
1690                let normalized_ref = if ref_id_str.is_empty() {
1691                    Cow::Owned(text.to_lowercase()) // Implicit reference
1692                } else {
1693                    Cow::Owned(ref_id_str.to_lowercase())
1694                };
1695
1696                // This is an undefined reference (pulldown-cmark didn't parse it)
1697                links.push(ParsedLink {
1698                    line: line_num,
1699                    start_col: col_start,
1700                    end_col: col_end,
1701                    byte_offset: match_start,
1702                    byte_end: match_end,
1703                    text: Cow::Borrowed(text),
1704                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1705                    is_reference: true,
1706                    reference_id: Some(normalized_ref),
1707                    link_type: LinkType::Reference, // Undefined references are reference-style
1708                });
1709            }
1710        }
1711
1712        (links, broken_links, footnote_refs)
1713    }
1714
1715    /// Parse all images in the content
1716    fn parse_images(
1717        content: &'a str,
1718        lines: &[LineInfo],
1719        code_blocks: &[(usize, usize)],
1720        code_spans: &[CodeSpan],
1721        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1722    ) -> Vec<ParsedImage<'a>> {
1723        use crate::utils::skip_context::is_in_html_comment_ranges;
1724        use std::collections::HashSet;
1725
1726        // Pre-size based on a heuristic: images are less common than links
1727        let mut images = Vec::with_capacity(content.len() / 1000);
1728        let mut found_positions = HashSet::new();
1729
1730        // Use pulldown-cmark for parsing - more accurate and faster
1731        let parser = Parser::new(content).into_offset_iter();
1732        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1733            Vec::new();
1734        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1735
1736        for (event, range) in parser {
1737            match event {
1738                Event::Start(Tag::Image {
1739                    link_type,
1740                    dest_url,
1741                    id,
1742                    ..
1743                }) => {
1744                    image_stack.push((range.start, dest_url, link_type, id));
1745                    text_chunks.clear();
1746                }
1747                Event::Text(text) if !image_stack.is_empty() => {
1748                    text_chunks.push((text.to_string(), range.start, range.end));
1749                }
1750                Event::Code(code) if !image_stack.is_empty() => {
1751                    let code_text = format!("`{code}`");
1752                    text_chunks.push((code_text, range.start, range.end));
1753                }
1754                Event::End(TagEnd::Image) => {
1755                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1756                        // Skip if in code block
1757                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1758                            continue;
1759                        }
1760
1761                        // Skip if in code span
1762                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1763                            continue;
1764                        }
1765
1766                        // Skip if in HTML comment
1767                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1768                            continue;
1769                        }
1770
1771                        // Find line and column using binary search
1772                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1773                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1774
1775                        let is_reference = matches!(
1776                            link_type,
1777                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1778                        );
1779
1780                        // Extract alt text directly from source bytes to preserve escaping
1781                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1782                        let alt_text = if start_pos < content.len() {
1783                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1784
1785                            // Find MATCHING ] by tracking bracket depth for nested brackets
1786                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1787                            let mut close_pos = None;
1788                            let mut depth = 0;
1789
1790                            if image_bytes.len() > 2 {
1791                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1792                                    // Count preceding backslashes
1793                                    let mut backslash_count = 0;
1794                                    let mut j = i;
1795                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1796                                        backslash_count += 1;
1797                                        j -= 1;
1798                                    }
1799                                    let is_escaped = backslash_count % 2 != 0;
1800
1801                                    if !is_escaped {
1802                                        if byte == b'[' {
1803                                            depth += 1;
1804                                        } else if byte == b']' {
1805                                            if depth == 0 {
1806                                                // Found the matching closing bracket
1807                                                close_pos = Some(i);
1808                                                break;
1809                                            } else {
1810                                                depth -= 1;
1811                                            }
1812                                        }
1813                                    }
1814                                }
1815                            }
1816
1817                            if let Some(pos) = close_pos {
1818                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1819                            } else {
1820                                Cow::Borrowed("")
1821                            }
1822                        } else {
1823                            Cow::Borrowed("")
1824                        };
1825
1826                        let reference_id = if is_reference && !ref_id.is_empty() {
1827                            Some(Cow::Owned(ref_id.to_lowercase()))
1828                        } else if is_reference {
1829                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1830                        } else {
1831                            None
1832                        };
1833
1834                        found_positions.insert(start_pos);
1835                        images.push(ParsedImage {
1836                            line: line_num,
1837                            start_col: col_start,
1838                            end_col: col_end,
1839                            byte_offset: start_pos,
1840                            byte_end: range.end,
1841                            alt_text,
1842                            url: Cow::Owned(url.to_string()),
1843                            is_reference,
1844                            reference_id,
1845                            link_type,
1846                        });
1847                    }
1848                }
1849                _ => {}
1850            }
1851        }
1852
1853        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1854        for cap in IMAGE_PATTERN.captures_iter(content) {
1855            let full_match = cap.get(0).unwrap();
1856            let match_start = full_match.start();
1857            let match_end = full_match.end();
1858
1859            // Skip if already found by pulldown-cmark
1860            if found_positions.contains(&match_start) {
1861                continue;
1862            }
1863
1864            // Skip if the ! is escaped
1865            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1866                continue;
1867            }
1868
1869            // Skip if in code block, code span, or HTML comment
1870            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1871                || Self::is_offset_in_code_span(code_spans, match_start)
1872                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1873            {
1874                continue;
1875            }
1876
1877            // Only process reference images (undefined references not found by pulldown-cmark)
1878            if let Some(ref_id) = cap.get(6) {
1879                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1880                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1881                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1882                let ref_id_str = ref_id.as_str();
1883                let normalized_ref = if ref_id_str.is_empty() {
1884                    Cow::Owned(alt_text.to_lowercase())
1885                } else {
1886                    Cow::Owned(ref_id_str.to_lowercase())
1887                };
1888
1889                images.push(ParsedImage {
1890                    line: line_num,
1891                    start_col: col_start,
1892                    end_col: col_end,
1893                    byte_offset: match_start,
1894                    byte_end: match_end,
1895                    alt_text: Cow::Borrowed(alt_text),
1896                    url: Cow::Borrowed(""),
1897                    is_reference: true,
1898                    reference_id: Some(normalized_ref),
1899                    link_type: LinkType::Reference, // Undefined references are reference-style
1900                });
1901            }
1902        }
1903
1904        images
1905    }
1906
1907    /// Parse reference definitions
1908    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1909        // Pre-size based on lines count as reference definitions are line-based
1910        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1911
1912        for (line_idx, line_info) in lines.iter().enumerate() {
1913            // Skip lines in code blocks
1914            if line_info.in_code_block {
1915                continue;
1916            }
1917
1918            let line = line_info.content(content);
1919            let line_num = line_idx + 1;
1920
1921            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1922                let id_raw = cap.get(1).unwrap().as_str();
1923
1924                // Skip footnote definitions - they use [^id]: syntax and are semantically
1925                // different from reference link definitions
1926                if id_raw.starts_with('^') {
1927                    continue;
1928                }
1929
1930                let id = id_raw.to_lowercase();
1931                let url = cap.get(2).unwrap().as_str().to_string();
1932                let title_match = cap.get(3).or_else(|| cap.get(4));
1933                let title = title_match.map(|m| m.as_str().to_string());
1934
1935                // Calculate byte positions
1936                // The match starts at the beginning of the line (0) and extends to the end
1937                let match_obj = cap.get(0).unwrap();
1938                let byte_offset = line_info.byte_offset + match_obj.start();
1939                let byte_end = line_info.byte_offset + match_obj.end();
1940
1941                // Calculate title byte positions (includes the quote character before content)
1942                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1943                    // The match is the content inside quotes, so we include the quote before
1944                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1945                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1946                    (Some(start), Some(end))
1947                } else {
1948                    (None, None)
1949                };
1950
1951                refs.push(ReferenceDef {
1952                    line: line_num,
1953                    id,
1954                    url,
1955                    title,
1956                    byte_offset,
1957                    byte_end,
1958                    title_byte_start,
1959                    title_byte_end,
1960                });
1961            }
1962        }
1963
1964        refs
1965    }
1966
1967    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1968    /// Handles nested blockquotes like `> > > content`
1969    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1970    #[inline]
1971    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1972        let trimmed_start = line.trim_start();
1973        if !trimmed_start.starts_with('>') {
1974            return None;
1975        }
1976
1977        // Track total prefix length to handle nested blockquotes
1978        let mut remaining = line;
1979        let mut total_prefix_len = 0;
1980
1981        loop {
1982            let trimmed = remaining.trim_start();
1983            if !trimmed.starts_with('>') {
1984                break;
1985            }
1986
1987            // Add leading whitespace + '>' to prefix
1988            let leading_ws_len = remaining.len() - trimmed.len();
1989            total_prefix_len += leading_ws_len + 1;
1990
1991            let after_gt = &trimmed[1..];
1992
1993            // Handle optional whitespace after '>' (space or tab)
1994            if let Some(stripped) = after_gt.strip_prefix(' ') {
1995                total_prefix_len += 1;
1996                remaining = stripped;
1997            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1998                total_prefix_len += 1;
1999                remaining = stripped;
2000            } else {
2001                remaining = after_gt;
2002            }
2003        }
2004
2005        Some((&line[..total_prefix_len], remaining))
2006    }
2007
2008    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2009    ///
2010    /// Returns a HashMap keyed by line byte offset, containing:
2011    /// `(is_ordered, marker, marker_column, content_column, number)`
2012    ///
2013    /// ## Why pulldown-cmark?
2014    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2015    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2016    /// This fixes issue #253 where continuation lines were falsely detected.
2017    ///
2018    /// ## Tab indentation quirk
2019    /// Pulldown-cmark reports nested list items at the newline character position
2020    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2021    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2022    /// We detect this and advance to the correct line.
2023    ///
2024    /// ## HashMap key strategy
2025    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2026    /// that resolve to the same line (after newline adjustment). The first event
2027    /// for each line is authoritative.
2028    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2029    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2030    /// This avoids a separate parse for emphasis detection.
2031    fn detect_list_items_and_emphasis_with_pulldown(
2032        content: &str,
2033        line_offsets: &[usize],
2034        flavor: MarkdownFlavor,
2035        front_matter_end: usize,
2036        code_blocks: &[(usize, usize)],
2037    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2038        use std::collections::HashMap;
2039
2040        let mut list_items = HashMap::new();
2041        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2042
2043        let mut options = Options::empty();
2044        options.insert(Options::ENABLE_TABLES);
2045        options.insert(Options::ENABLE_FOOTNOTES);
2046        options.insert(Options::ENABLE_STRIKETHROUGH);
2047        options.insert(Options::ENABLE_TASKLISTS);
2048        // Always enable GFM features for consistency with existing behavior
2049        options.insert(Options::ENABLE_GFM);
2050
2051        // Suppress unused variable warning
2052        let _ = flavor;
2053
2054        let parser = Parser::new_ext(content, options).into_offset_iter();
2055        let mut list_depth: usize = 0;
2056        let mut list_stack: Vec<bool> = Vec::new();
2057
2058        for (event, range) in parser {
2059            match event {
2060                // Capture emphasis spans (for MD030's emphasis detection)
2061                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2062                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2063                        2
2064                    } else {
2065                        1
2066                    };
2067                    let match_start = range.start;
2068                    let match_end = range.end;
2069
2070                    // Skip if in code block
2071                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2072                        // Determine marker character by looking at the content at the start
2073                        let marker = content[match_start..].chars().next().unwrap_or('*');
2074                        if marker == '*' || marker == '_' {
2075                            // Extract content between markers
2076                            let content_start = match_start + marker_count;
2077                            let content_end = if match_end >= marker_count {
2078                                match_end - marker_count
2079                            } else {
2080                                match_end
2081                            };
2082                            let content_part = if content_start < content_end && content_end <= content.len() {
2083                                &content[content_start..content_end]
2084                            } else {
2085                                ""
2086                            };
2087
2088                            // Find which line this emphasis is on using line_offsets
2089                            let line_idx = match line_offsets.binary_search(&match_start) {
2090                                Ok(idx) => idx,
2091                                Err(idx) => idx.saturating_sub(1),
2092                            };
2093                            let line_num = line_idx + 1;
2094                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2095                            let col_start = match_start - line_start;
2096                            let col_end = match_end - line_start;
2097
2098                            emphasis_spans.push(EmphasisSpan {
2099                                line: line_num,
2100                                start_col: col_start,
2101                                end_col: col_end,
2102                                byte_offset: match_start,
2103                                byte_end: match_end,
2104                                marker,
2105                                marker_count,
2106                                content: content_part.to_string(),
2107                            });
2108                        }
2109                    }
2110                }
2111                Event::Start(Tag::List(start_number)) => {
2112                    list_depth += 1;
2113                    list_stack.push(start_number.is_some());
2114                }
2115                Event::End(TagEnd::List(_)) => {
2116                    list_depth = list_depth.saturating_sub(1);
2117                    list_stack.pop();
2118                }
2119                Event::Start(Tag::Item) if list_depth > 0 => {
2120                    // Get the ordered state for the CURRENT (innermost) list
2121                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2122                    // Find which line this byte offset corresponds to
2123                    let item_start = range.start;
2124
2125                    // Binary search to find the line number
2126                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2127                        Ok(idx) => idx,
2128                        Err(idx) => idx.saturating_sub(1),
2129                    };
2130
2131                    // Pulldown-cmark reports nested list items at the newline before the item
2132                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2133                    // Advance to the actual content line in this case.
2134                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2135                        line_idx += 1;
2136                    }
2137
2138                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2139                    if front_matter_end > 0 && line_idx < front_matter_end {
2140                        continue;
2141                    }
2142
2143                    if line_idx < line_offsets.len() {
2144                        let line_start_byte = line_offsets[line_idx];
2145                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2146                        let line = &content[line_start_byte..line_end.min(content.len())];
2147
2148                        // Strip trailing newline
2149                        let line = line
2150                            .strip_suffix('\n')
2151                            .or_else(|| line.strip_suffix("\r\n"))
2152                            .unwrap_or(line);
2153
2154                        // Strip blockquote prefix if present
2155                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2156                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2157                            (prefix.len(), content)
2158                        } else {
2159                            (0, line)
2160                        };
2161
2162                        // Parse the list marker from the actual line
2163                        if current_list_is_ordered {
2164                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2165                                Self::parse_ordered_list(line_to_parse)
2166                            {
2167                                let marker = format!("{number_str}{delimiter}");
2168                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2169                                let content_column = marker_column + marker.len() + spacing.len();
2170                                let number = number_str.parse().ok();
2171
2172                                list_items.entry(line_start_byte).or_insert((
2173                                    true,
2174                                    marker,
2175                                    marker_column,
2176                                    content_column,
2177                                    number,
2178                                ));
2179                            }
2180                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2181                            Self::parse_unordered_list(line_to_parse)
2182                        {
2183                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2184                            let content_column = marker_column + 1 + spacing.len();
2185
2186                            list_items.entry(line_start_byte).or_insert((
2187                                false,
2188                                marker.to_string(),
2189                                marker_column,
2190                                content_column,
2191                                None,
2192                            ));
2193                        }
2194                    }
2195                }
2196                _ => {}
2197            }
2198        }
2199
2200        (list_items, emphasis_spans)
2201    }
2202
2203    /// Fast unordered list parser - replaces regex for 5-10x speedup
2204    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2205    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2206    #[inline]
2207    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2208        let bytes = line.as_bytes();
2209        let mut i = 0;
2210
2211        // Skip leading whitespace
2212        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2213            i += 1;
2214        }
2215
2216        // Check for marker
2217        if i >= bytes.len() {
2218            return None;
2219        }
2220        let marker = bytes[i] as char;
2221        if marker != '-' && marker != '*' && marker != '+' {
2222            return None;
2223        }
2224        let marker_pos = i;
2225        i += 1;
2226
2227        // Collect spacing after marker (space or tab only)
2228        let spacing_start = i;
2229        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2230            i += 1;
2231        }
2232
2233        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2234    }
2235
2236    /// Fast ordered list parser - replaces regex for 5-10x speedup
2237    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2238    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2239    #[inline]
2240    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2241        let bytes = line.as_bytes();
2242        let mut i = 0;
2243
2244        // Skip leading whitespace
2245        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2246            i += 1;
2247        }
2248
2249        // Collect digits
2250        let number_start = i;
2251        while i < bytes.len() && bytes[i].is_ascii_digit() {
2252            i += 1;
2253        }
2254        if i == number_start {
2255            return None; // No digits found
2256        }
2257
2258        // Check for delimiter
2259        if i >= bytes.len() {
2260            return None;
2261        }
2262        let delimiter = bytes[i] as char;
2263        if delimiter != '.' && delimiter != ')' {
2264            return None;
2265        }
2266        let delimiter_pos = i;
2267        i += 1;
2268
2269        // Collect spacing after delimiter (space or tab only)
2270        let spacing_start = i;
2271        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272            i += 1;
2273        }
2274
2275        Some((
2276            &line[..number_start],
2277            &line[number_start..delimiter_pos],
2278            delimiter,
2279            &line[spacing_start..i],
2280            &line[i..],
2281        ))
2282    }
2283
2284    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2285    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2286    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2287        let num_lines = line_offsets.len();
2288        let mut in_code_block = vec![false; num_lines];
2289
2290        // For each code block, mark all lines within it
2291        for &(start, end) in code_blocks {
2292            // Ensure we're at valid UTF-8 boundaries
2293            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2294                let mut boundary = start;
2295                while boundary > 0 && !content.is_char_boundary(boundary) {
2296                    boundary -= 1;
2297                }
2298                boundary
2299            } else {
2300                start
2301            };
2302
2303            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2304                let mut boundary = end;
2305                while boundary < content.len() && !content.is_char_boundary(boundary) {
2306                    boundary += 1;
2307                }
2308                boundary
2309            } else {
2310                end.min(content.len())
2311            };
2312
2313            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2314            // That function now has proper list context awareness (see code_block_utils.rs)
2315            // and correctly distinguishes between:
2316            // - Fenced code blocks (``` or ~~~)
2317            // - Indented code blocks at document level (4 spaces + blank line before)
2318            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2319            //
2320            // We no longer need to re-validate here. The original validation logic
2321            // was causing false positives by marking list continuation paragraphs as
2322            // code blocks when they have 4 spaces of indentation.
2323
2324            // Use binary search to find the first and last line indices
2325            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2326            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2327            //
2328            // Find the line that CONTAINS safe_start: the line with the largest
2329            // start offset that is <= safe_start. partition_point gives us the
2330            // first line that starts AFTER safe_start, so we subtract 1.
2331            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2332            let first_line = first_line_after.saturating_sub(1);
2333            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2334
2335            // Mark all lines in the range at once
2336            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2337                *flag = true;
2338            }
2339        }
2340
2341        in_code_block
2342    }
2343
2344    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2345    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2346    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2347        let content_lines: Vec<&str> = content.lines().collect();
2348        let num_lines = content_lines.len();
2349        let mut in_math_block = vec![false; num_lines];
2350
2351        let mut inside_math = false;
2352
2353        for (i, line) in content_lines.iter().enumerate() {
2354            // Skip lines that are in code blocks - math delimiters inside code are literal
2355            if code_block_map.get(i).copied().unwrap_or(false) {
2356                continue;
2357            }
2358
2359            let trimmed = line.trim();
2360
2361            // Check for math block delimiter ($$)
2362            // A line with just $$ toggles the math block state
2363            if trimmed == "$$" {
2364                if inside_math {
2365                    // Closing delimiter - this line is still part of the math block
2366                    in_math_block[i] = true;
2367                    inside_math = false;
2368                } else {
2369                    // Opening delimiter - this line starts the math block
2370                    in_math_block[i] = true;
2371                    inside_math = true;
2372                }
2373            } else if inside_math {
2374                // Content inside math block
2375                in_math_block[i] = true;
2376            }
2377        }
2378
2379        in_math_block
2380    }
2381
2382    /// Pre-compute basic line information (without headings/blockquotes)
2383    /// Also returns emphasis spans detected during the pulldown-cmark parse
2384    fn compute_basic_line_info(
2385        content: &str,
2386        line_offsets: &[usize],
2387        code_blocks: &[(usize, usize)],
2388        flavor: MarkdownFlavor,
2389        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2390        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2391        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2392    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2393        let content_lines: Vec<&str> = content.lines().collect();
2394        let mut lines = Vec::with_capacity(content_lines.len());
2395
2396        // Pre-compute which lines are in code blocks
2397        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2398
2399        // Pre-compute which lines are in math blocks ($$ ... $$)
2400        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2401
2402        // Detect front matter boundaries FIRST, before any other parsing
2403        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2404        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2405
2406        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2407        // (context-aware, eliminates false positives)
2408        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2409            content,
2410            line_offsets,
2411            flavor,
2412            front_matter_end,
2413            code_blocks,
2414        );
2415
2416        for (i, line) in content_lines.iter().enumerate() {
2417            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2418            let indent = line.len() - line.trim_start().len();
2419            // Compute visual indent with proper CommonMark tab expansion
2420            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2421
2422            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2423            let blockquote_parse = Self::parse_blockquote_prefix(line);
2424
2425            // For blank detection, consider blockquote context
2426            let is_blank = if let Some((_, content)) = blockquote_parse {
2427                // In blockquote context, check if content after prefix is blank
2428                content.trim().is_empty()
2429            } else {
2430                line.trim().is_empty()
2431            };
2432
2433            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2434            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2435
2436            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2437            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2438                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2439            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2440            // This ensures content after `-->` on the same line is not incorrectly skipped
2441            let line_end_offset = byte_offset + line.len();
2442            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2443                html_comment_ranges,
2444                byte_offset,
2445                line_end_offset,
2446            );
2447            // Use pulldown-cmark's list detection for context-aware parsing
2448            // This eliminates false positives on continuation lines (issue #253)
2449            let list_item =
2450                list_item_map
2451                    .get(&byte_offset)
2452                    .map(
2453                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2454                            marker: marker.clone(),
2455                            is_ordered: *is_ordered,
2456                            number: *number,
2457                            marker_column: *marker_column,
2458                            content_column: *content_column,
2459                        },
2460                    );
2461
2462            // Detect horizontal rules (only outside code blocks and frontmatter)
2463            // Uses CommonMark-compliant check including leading indentation validation
2464            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2465            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2466
2467            // Get math block status for this line
2468            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2469
2470            // Check if line is inside a Quarto div block
2471            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2472                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2473
2474            lines.push(LineInfo {
2475                byte_offset,
2476                byte_len: line.len(),
2477                indent,
2478                visual_indent,
2479                is_blank,
2480                in_code_block,
2481                in_front_matter,
2482                in_html_block: false, // Will be populated after line creation
2483                in_html_comment,
2484                list_item,
2485                heading: None,    // Will be populated in second pass for Setext headings
2486                blockquote: None, // Will be populated after line creation
2487                in_mkdocstrings,
2488                in_esm_block: false, // Will be populated after line creation for MDX files
2489                in_code_span_continuation: false, // Will be populated after code spans are parsed
2490                is_horizontal_rule: is_hr,
2491                in_math_block,
2492                in_quarto_div,
2493                in_jsx_expression: false,  // Will be populated for MDX files
2494                in_mdx_comment: false,     // Will be populated for MDX files
2495                in_jsx_component: false,   // Will be populated for MDX files
2496                in_jsx_fragment: false,    // Will be populated for MDX files
2497                in_admonition: false,      // Will be populated for MkDocs files
2498                in_content_tab: false,     // Will be populated for MkDocs files
2499                in_definition_list: false, // Will be populated for MkDocs files
2500            });
2501        }
2502
2503        (lines, emphasis_spans)
2504    }
2505
2506    /// Detect headings and blockquotes (called after HTML block detection)
2507    fn detect_headings_and_blockquotes(
2508        content: &str,
2509        lines: &mut [LineInfo],
2510        flavor: MarkdownFlavor,
2511        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2512        link_byte_ranges: &[(usize, usize)],
2513    ) {
2514        // Regex for heading detection
2515        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2516            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2517        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2518            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2519
2520        let content_lines: Vec<&str> = content.lines().collect();
2521
2522        // Detect front matter boundaries to skip those lines
2523        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2524
2525        // Detect headings (including Setext which needs look-ahead) and blockquotes
2526        for i in 0..lines.len() {
2527            let line = content_lines[i];
2528
2529            // Detect blockquotes FIRST, before any skip conditions.
2530            // A line can be both a blockquote AND contain a code block inside it.
2531            // We need to know about the blockquote marker regardless of code block status.
2532            // Skip only frontmatter lines - those are never blockquotes.
2533            if !(front_matter_end > 0 && i < front_matter_end)
2534                && let Some(bq) = parse_blockquote_detailed(line)
2535            {
2536                let nesting_level = bq.markers.len();
2537                let marker_column = bq.indent.len();
2538                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2539                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2540                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2541                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2542
2543                lines[i].blockquote = Some(BlockquoteInfo {
2544                    nesting_level,
2545                    indent: bq.indent.to_string(),
2546                    marker_column,
2547                    prefix,
2548                    content: bq.content.to_string(),
2549                    has_no_space_after_marker: has_no_space,
2550                    has_multiple_spaces_after_marker: has_multiple_spaces,
2551                    needs_md028_fix,
2552                });
2553
2554                // Update is_horizontal_rule for blockquote content
2555                // The original detection doesn't strip blockquote prefix, so we need to check here
2556                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2557                    lines[i].is_horizontal_rule = true;
2558                }
2559            }
2560
2561            // Now apply skip conditions for heading detection
2562            if lines[i].in_code_block {
2563                continue;
2564            }
2565
2566            // Skip lines in front matter
2567            if front_matter_end > 0 && i < front_matter_end {
2568                continue;
2569            }
2570
2571            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2572            if lines[i].in_html_block {
2573                continue;
2574            }
2575
2576            // Skip heading detection for blank lines
2577            if lines[i].is_blank {
2578                continue;
2579            }
2580
2581            // Check for ATX headings (but skip MkDocs snippet lines)
2582            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2583            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2584                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2585                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2586            } else {
2587                false
2588            };
2589
2590            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2591                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2592                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2593                    continue;
2594                }
2595                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2596                // This prevents false positives where `#fragment` is detected as a heading
2597                let line_offset = lines[i].byte_offset;
2598                if link_byte_ranges
2599                    .iter()
2600                    .any(|&(start, end)| line_offset > start && line_offset < end)
2601                {
2602                    continue;
2603                }
2604                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2605                let hashes = caps.get(2).map_or("", |m| m.as_str());
2606                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2607                let rest = caps.get(4).map_or("", |m| m.as_str());
2608
2609                let level = hashes.len() as u8;
2610                let marker_column = leading_spaces.len();
2611
2612                // Check for closing sequence, but handle custom IDs that might come after
2613                let (text, has_closing, closing_seq) = {
2614                    // First check if there's a custom ID at the end
2615                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2616                        // Check if this looks like a valid custom ID (ends with })
2617                        if rest[id_start..].trim_end().ends_with('}') {
2618                            // Split off the custom ID
2619                            (&rest[..id_start], &rest[id_start..])
2620                        } else {
2621                            (rest, "")
2622                        }
2623                    } else {
2624                        (rest, "")
2625                    };
2626
2627                    // Now look for closing hashes in the part before the custom ID
2628                    let trimmed_rest = rest_without_id.trim_end();
2629                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2630                        // Find the start of the hash sequence by walking backwards
2631                        // Use char_indices to get byte positions at char boundaries
2632                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2633
2634                        // Find which char index corresponds to last_hash_byte_pos
2635                        let last_hash_char_idx = char_positions
2636                            .iter()
2637                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2638
2639                        if let Some(mut char_idx) = last_hash_char_idx {
2640                            // Walk backwards to find start of hash sequence
2641                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2642                                char_idx -= 1;
2643                            }
2644
2645                            // Get the byte position of the start of hashes
2646                            let start_of_hashes = char_positions[char_idx].0;
2647
2648                            // Check if there's at least one space before the closing hashes
2649                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2650
2651                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2652                            let potential_closing = &trimmed_rest[start_of_hashes..];
2653                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2654
2655                            if is_all_hashes && has_space_before {
2656                                // This is a closing sequence
2657                                let closing_hashes = potential_closing.to_string();
2658                                // The text is everything before the closing hashes
2659                                // Don't include the custom ID here - it will be extracted later
2660                                let text_part = if !custom_id_part.is_empty() {
2661                                    // If we have a custom ID, append it back to get the full rest
2662                                    // This allows the extract_header_id function to handle it properly
2663                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2664                                } else {
2665                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2666                                };
2667                                (text_part, true, closing_hashes)
2668                            } else {
2669                                // Not a valid closing sequence, return the full content
2670                                (rest.to_string(), false, String::new())
2671                            }
2672                        } else {
2673                            // Couldn't find char boundary, return the full content
2674                            (rest.to_string(), false, String::new())
2675                        }
2676                    } else {
2677                        // No hashes found, return the full content
2678                        (rest.to_string(), false, String::new())
2679                    }
2680                };
2681
2682                let content_column = marker_column + hashes.len() + spaces_after.len();
2683
2684                // Extract custom header ID if present
2685                let raw_text = text.trim().to_string();
2686                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2687
2688                // If no custom ID was found on the header line, check the next line for standalone attr-list
2689                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2690                    let next_line = content_lines[i + 1];
2691                    if !lines[i + 1].in_code_block
2692                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2693                        && let Some(next_line_id) =
2694                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2695                    {
2696                        custom_id = Some(next_line_id);
2697                    }
2698                }
2699
2700                // ATX heading is "valid" for processing by heading rules if:
2701                // 1. Has space after # (CommonMark compliant): `# Heading`
2702                // 2. Is empty (just hashes): `#`
2703                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2704                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2705                //
2706                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2707                // - `#tag` - single # with lowercase (social hashtag)
2708                // - `#123` - single # with number (GitHub issue ref)
2709                let is_valid = !spaces_after.is_empty()
2710                    || rest.is_empty()
2711                    || level > 1
2712                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2713
2714                lines[i].heading = Some(HeadingInfo {
2715                    level,
2716                    style: HeadingStyle::ATX,
2717                    marker: hashes.to_string(),
2718                    marker_column,
2719                    content_column,
2720                    text: clean_text,
2721                    custom_id,
2722                    raw_text,
2723                    has_closing_sequence: has_closing,
2724                    closing_sequence: closing_seq,
2725                    is_valid,
2726                });
2727            }
2728            // Check for Setext headings (need to look at next line)
2729            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2730                let next_line = content_lines[i + 1];
2731                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2732                    // Skip if next line is front matter delimiter
2733                    if front_matter_end > 0 && i < front_matter_end {
2734                        continue;
2735                    }
2736
2737                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2738                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2739                    {
2740                        continue;
2741                    }
2742
2743                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2744                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2745                    let content_line = line.trim();
2746
2747                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2748                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2749                        continue;
2750                    }
2751
2752                    // Skip underscore thematic breaks (___)
2753                    if content_line.starts_with('_') {
2754                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2755                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2756                            continue;
2757                        }
2758                    }
2759
2760                    // Skip numbered lists (1. Item, 2. Item, etc.)
2761                    if let Some(first_char) = content_line.chars().next()
2762                        && first_char.is_ascii_digit()
2763                    {
2764                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2765                        if num_end < content_line.len() {
2766                            let next = content_line.chars().nth(num_end);
2767                            if next == Some('.') || next == Some(')') {
2768                                continue;
2769                            }
2770                        }
2771                    }
2772
2773                    // Skip ATX headings
2774                    if ATX_HEADING_REGEX.is_match(line) {
2775                        continue;
2776                    }
2777
2778                    // Skip blockquotes
2779                    if content_line.starts_with('>') {
2780                        continue;
2781                    }
2782
2783                    // Skip code fences
2784                    let trimmed_start = line.trim_start();
2785                    if trimmed_start.len() >= 3 {
2786                        let first_three: String = trimmed_start.chars().take(3).collect();
2787                        if first_three == "```" || first_three == "~~~" {
2788                            continue;
2789                        }
2790                    }
2791
2792                    // Skip HTML blocks
2793                    if content_line.starts_with('<') {
2794                        continue;
2795                    }
2796
2797                    let underline = next_line.trim();
2798
2799                    let level = if underline.starts_with('=') { 1 } else { 2 };
2800                    let style = if level == 1 {
2801                        HeadingStyle::Setext1
2802                    } else {
2803                        HeadingStyle::Setext2
2804                    };
2805
2806                    // Extract custom header ID if present
2807                    let raw_text = line.trim().to_string();
2808                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2809
2810                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2811                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2812                        let attr_line = content_lines[i + 2];
2813                        if !lines[i + 2].in_code_block
2814                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2815                            && let Some(attr_line_id) =
2816                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2817                        {
2818                            custom_id = Some(attr_line_id);
2819                        }
2820                    }
2821
2822                    lines[i].heading = Some(HeadingInfo {
2823                        level,
2824                        style,
2825                        marker: underline.to_string(),
2826                        marker_column: next_line.len() - next_line.trim_start().len(),
2827                        content_column: lines[i].indent,
2828                        text: clean_text,
2829                        custom_id,
2830                        raw_text,
2831                        has_closing_sequence: false,
2832                        closing_sequence: String::new(),
2833                        is_valid: true, // Setext headings are always valid
2834                    });
2835                }
2836            }
2837        }
2838    }
2839
2840    /// Detect HTML blocks in the content
2841    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2842        // HTML block elements that trigger block context
2843        // Includes HTML5 media, embedded content, and interactive elements
2844        const BLOCK_ELEMENTS: &[&str] = &[
2845            "address",
2846            "article",
2847            "aside",
2848            "audio",
2849            "blockquote",
2850            "canvas",
2851            "details",
2852            "dialog",
2853            "dd",
2854            "div",
2855            "dl",
2856            "dt",
2857            "embed",
2858            "fieldset",
2859            "figcaption",
2860            "figure",
2861            "footer",
2862            "form",
2863            "h1",
2864            "h2",
2865            "h3",
2866            "h4",
2867            "h5",
2868            "h6",
2869            "header",
2870            "hr",
2871            "iframe",
2872            "li",
2873            "main",
2874            "menu",
2875            "nav",
2876            "noscript",
2877            "object",
2878            "ol",
2879            "p",
2880            "picture",
2881            "pre",
2882            "script",
2883            "search",
2884            "section",
2885            "source",
2886            "style",
2887            "summary",
2888            "svg",
2889            "table",
2890            "tbody",
2891            "td",
2892            "template",
2893            "textarea",
2894            "tfoot",
2895            "th",
2896            "thead",
2897            "tr",
2898            "track",
2899            "ul",
2900            "video",
2901        ];
2902
2903        let mut i = 0;
2904        while i < lines.len() {
2905            // Skip if already in code block or front matter
2906            if lines[i].in_code_block || lines[i].in_front_matter {
2907                i += 1;
2908                continue;
2909            }
2910
2911            let trimmed = lines[i].content(content).trim_start();
2912
2913            // Check if line starts with an HTML tag
2914            if trimmed.starts_with('<') && trimmed.len() > 1 {
2915                // Extract tag name safely
2916                let after_bracket = &trimmed[1..];
2917                let is_closing = after_bracket.starts_with('/');
2918                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2919
2920                // Extract tag name (stop at space, >, /, or end of string)
2921                let tag_name = tag_start
2922                    .chars()
2923                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2924                    .collect::<String>()
2925                    .to_lowercase();
2926
2927                // Check if it's a block element
2928                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2929                    // Mark this line as in HTML block
2930                    lines[i].in_html_block = true;
2931
2932                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2933                    // This avoids complex nesting logic that might cause infinite loops
2934                    if !is_closing {
2935                        let closing_tag = format!("</{tag_name}>");
2936                        // style and script tags can contain blank lines (CSS/JS formatting)
2937                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2938                        let mut j = i + 1;
2939                        let mut found_closing_tag = false;
2940                        while j < lines.len() && j < i + 100 {
2941                            // Limit search to 100 lines
2942                            // Stop at blank lines (except for style/script tags)
2943                            if !allow_blank_lines && lines[j].is_blank {
2944                                break;
2945                            }
2946
2947                            lines[j].in_html_block = true;
2948
2949                            // Check if this line contains the closing tag
2950                            if lines[j].content(content).contains(&closing_tag) {
2951                                found_closing_tag = true;
2952                            }
2953
2954                            // After finding closing tag, continue marking lines as
2955                            // in_html_block until blank line (per CommonMark spec)
2956                            if found_closing_tag {
2957                                j += 1;
2958                                // Continue marking subsequent lines until blank
2959                                while j < lines.len() && j < i + 100 {
2960                                    if lines[j].is_blank {
2961                                        break;
2962                                    }
2963                                    lines[j].in_html_block = true;
2964                                    j += 1;
2965                                }
2966                                break;
2967                            }
2968                            j += 1;
2969                        }
2970                    }
2971                }
2972            }
2973
2974            i += 1;
2975        }
2976    }
2977
2978    /// Detect ESM import/export blocks anywhere in MDX files
2979    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
2980    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2981        // Only process MDX files
2982        if !flavor.supports_esm_blocks() {
2983            return;
2984        }
2985
2986        let mut in_multiline_import = false;
2987
2988        for line in lines.iter_mut() {
2989            // Skip code blocks, front matter, and HTML comments
2990            if line.in_code_block || line.in_front_matter || line.in_html_comment {
2991                in_multiline_import = false;
2992                continue;
2993            }
2994
2995            let line_content = line.content(content);
2996            let trimmed = line_content.trim();
2997
2998            // Handle continuation of multi-line import/export
2999            if in_multiline_import {
3000                line.in_esm_block = true;
3001                // Check if this line completes the statement
3002                // Multi-line import ends when we see the closing quote + optional semicolon
3003                if trimmed.ends_with('\'')
3004                    || trimmed.ends_with('"')
3005                    || trimmed.ends_with("';")
3006                    || trimmed.ends_with("\";")
3007                    || line_content.contains(';')
3008                {
3009                    in_multiline_import = false;
3010                }
3011                continue;
3012            }
3013
3014            // Skip blank lines
3015            if line.is_blank {
3016                continue;
3017            }
3018
3019            // Check if line starts with import or export
3020            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3021                line.in_esm_block = true;
3022
3023                // Determine if this is a complete single-line statement or starts a multi-line one
3024                // Multi-line imports look like:
3025                //   import {
3026                //     Foo,
3027                //     Bar
3028                //   } from 'module'
3029                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3030                let is_import = trimmed.starts_with("import ");
3031
3032                // Check for simple complete statements
3033                let is_complete =
3034                    // Ends with semicolon
3035                    trimmed.ends_with(';')
3036                    // import/export with from clause that ends with quote
3037                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3038                    // Simple export (export const/let/var/function/class without from)
3039                    || (!is_import && !trimmed.contains(" from ") && (
3040                        trimmed.starts_with("export const ")
3041                        || trimmed.starts_with("export let ")
3042                        || trimmed.starts_with("export var ")
3043                        || trimmed.starts_with("export function ")
3044                        || trimmed.starts_with("export class ")
3045                        || trimmed.starts_with("export default ")
3046                    ));
3047
3048                if !is_complete && is_import {
3049                    // Only imports can span multiple lines in the typical case
3050                    // Check if it looks like the start of a multi-line import
3051                    // e.g., "import {" or "import type {"
3052                    if trimmed.contains('{') && !trimmed.contains('}') {
3053                        in_multiline_import = true;
3054                    }
3055                }
3056            }
3057        }
3058    }
3059
3060    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3061    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3062    fn detect_jsx_and_mdx_comments(
3063        content: &str,
3064        lines: &mut [LineInfo],
3065        flavor: MarkdownFlavor,
3066        code_blocks: &[(usize, usize)],
3067    ) -> (ByteRanges, ByteRanges) {
3068        // Only process MDX files
3069        if !flavor.supports_jsx() {
3070            return (Vec::new(), Vec::new());
3071        }
3072
3073        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3074        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3075
3076        // Quick check - if no braces, no JSX expressions or MDX comments
3077        if !content.contains('{') {
3078            return (jsx_expression_ranges, mdx_comment_ranges);
3079        }
3080
3081        let bytes = content.as_bytes();
3082        let mut i = 0;
3083
3084        while i < bytes.len() {
3085            if bytes[i] == b'{' {
3086                // Check if we're in a code block
3087                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3088                    i += 1;
3089                    continue;
3090                }
3091
3092                let start = i;
3093
3094                // Check if it's an MDX comment: {/* ... */}
3095                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3096                    // Find the closing */}
3097                    let mut j = i + 3;
3098                    while j + 2 < bytes.len() {
3099                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3100                            let end = j + 3;
3101                            mdx_comment_ranges.push((start, end));
3102
3103                            // Mark lines as in MDX comment
3104                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3105                                line.in_mdx_comment = true;
3106                            });
3107
3108                            i = end;
3109                            break;
3110                        }
3111                        j += 1;
3112                    }
3113                    if j + 2 >= bytes.len() {
3114                        // Unclosed MDX comment - mark rest as comment
3115                        mdx_comment_ranges.push((start, bytes.len()));
3116                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3117                            line.in_mdx_comment = true;
3118                        });
3119                        break;
3120                    }
3121                } else {
3122                    // Regular JSX expression: { ... }
3123                    // Need to handle nested braces
3124                    let mut brace_depth = 1;
3125                    let mut j = i + 1;
3126                    let mut in_string = false;
3127                    let mut string_char = b'"';
3128
3129                    while j < bytes.len() && brace_depth > 0 {
3130                        let c = bytes[j];
3131
3132                        // Handle strings to avoid counting braces inside them
3133                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3134                            in_string = true;
3135                            string_char = c;
3136                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3137                            in_string = false;
3138                        } else if !in_string {
3139                            if c == b'{' {
3140                                brace_depth += 1;
3141                            } else if c == b'}' {
3142                                brace_depth -= 1;
3143                            }
3144                        }
3145                        j += 1;
3146                    }
3147
3148                    if brace_depth == 0 {
3149                        let end = j;
3150                        jsx_expression_ranges.push((start, end));
3151
3152                        // Mark lines as in JSX expression
3153                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3154                            line.in_jsx_expression = true;
3155                        });
3156
3157                        i = end;
3158                    } else {
3159                        i += 1;
3160                    }
3161                }
3162            } else {
3163                i += 1;
3164            }
3165        }
3166
3167        (jsx_expression_ranges, mdx_comment_ranges)
3168    }
3169
3170    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3171    /// and populate the corresponding fields in LineInfo
3172    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3173        if flavor != MarkdownFlavor::MkDocs {
3174            return;
3175        }
3176
3177        use crate::utils::mkdocs_admonitions;
3178        use crate::utils::mkdocs_definition_lists;
3179        use crate::utils::mkdocs_tabs;
3180
3181        let content_lines: Vec<&str> = content.lines().collect();
3182
3183        // Track admonition context
3184        let mut in_admonition = false;
3185        let mut admonition_indent = 0;
3186
3187        // Track tab context
3188        let mut in_tab = false;
3189        let mut tab_indent = 0;
3190
3191        // Track definition list context
3192        let mut in_definition = false;
3193
3194        for (i, line) in content_lines.iter().enumerate() {
3195            if i >= lines.len() {
3196                break;
3197            }
3198
3199            // Skip lines in code blocks
3200            if lines[i].in_code_block {
3201                continue;
3202            }
3203
3204            // Check for admonition markers
3205            if mkdocs_admonitions::is_admonition_start(line) {
3206                in_admonition = true;
3207                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3208                lines[i].in_admonition = true;
3209            } else if in_admonition {
3210                // Check if still in admonition content
3211                if line.trim().is_empty() {
3212                    // Blank lines are part of admonitions
3213                    lines[i].in_admonition = true;
3214                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3215                    lines[i].in_admonition = true;
3216                } else {
3217                    // End of admonition
3218                    in_admonition = false;
3219                    // Check if this line starts a new admonition
3220                    if mkdocs_admonitions::is_admonition_start(line) {
3221                        in_admonition = true;
3222                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3223                        lines[i].in_admonition = true;
3224                    }
3225                }
3226            }
3227
3228            // Check for tab markers
3229            if mkdocs_tabs::is_tab_marker(line) {
3230                in_tab = true;
3231                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3232                lines[i].in_content_tab = true;
3233            } else if in_tab {
3234                // Check if still in tab content
3235                if line.trim().is_empty() {
3236                    // Blank lines are part of tabs
3237                    lines[i].in_content_tab = true;
3238                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3239                    lines[i].in_content_tab = true;
3240                } else {
3241                    // End of tab content
3242                    in_tab = false;
3243                    // Check if this line starts a new tab
3244                    if mkdocs_tabs::is_tab_marker(line) {
3245                        in_tab = true;
3246                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3247                        lines[i].in_content_tab = true;
3248                    }
3249                }
3250            }
3251
3252            // Check for definition list items
3253            if mkdocs_definition_lists::is_definition_line(line) {
3254                in_definition = true;
3255                lines[i].in_definition_list = true;
3256            } else if in_definition {
3257                // Check if continuation
3258                if mkdocs_definition_lists::is_definition_continuation(line) {
3259                    lines[i].in_definition_list = true;
3260                } else if line.trim().is_empty() {
3261                    // Blank line might continue definition
3262                    lines[i].in_definition_list = true;
3263                } else if mkdocs_definition_lists::could_be_term_line(line) {
3264                    // This could be a new term - check if followed by definition
3265                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3266                    {
3267                        lines[i].in_definition_list = true;
3268                    } else {
3269                        in_definition = false;
3270                    }
3271                } else {
3272                    in_definition = false;
3273                }
3274            } else if mkdocs_definition_lists::could_be_term_line(line) {
3275                // Check if this is a term followed by a definition
3276                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3277                    lines[i].in_definition_list = true;
3278                    in_definition = true;
3279                }
3280            }
3281        }
3282    }
3283
3284    /// Helper to mark lines within a byte range
3285    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3286    where
3287        F: FnMut(&mut LineInfo),
3288    {
3289        // Find lines that overlap with the range
3290        for line in lines.iter_mut() {
3291            let line_start = line.byte_offset;
3292            let line_end = line.byte_offset + line.byte_len;
3293
3294            // Check if this line overlaps with the range
3295            if line_start < end && line_end > start {
3296                f(line);
3297            }
3298        }
3299
3300        // Silence unused warning for content (needed for signature consistency)
3301        let _ = content;
3302    }
3303
3304    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3305    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3306        // Quick check - if no backticks, no code spans
3307        if !content.contains('`') {
3308            return Vec::new();
3309        }
3310
3311        // Use pulldown-cmark's streaming parser with byte offsets
3312        let parser = Parser::new(content).into_offset_iter();
3313        let mut ranges = Vec::new();
3314
3315        for (event, range) in parser {
3316            if let Event::Code(_) = event {
3317                ranges.push((range.start, range.end));
3318            }
3319        }
3320
3321        Self::build_code_spans_from_ranges(content, lines, &ranges)
3322    }
3323
3324    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3325        let mut code_spans = Vec::new();
3326        if ranges.is_empty() {
3327            return code_spans;
3328        }
3329
3330        for &(start_pos, end_pos) in ranges {
3331            // The range includes the backticks, extract the actual content
3332            let full_span = &content[start_pos..end_pos];
3333            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3334
3335            // Extract content between backticks, preserving spaces
3336            let content_start = start_pos + backtick_count;
3337            let content_end = end_pos - backtick_count;
3338            let span_content = if content_start < content_end {
3339                content[content_start..content_end].to_string()
3340            } else {
3341                String::new()
3342            };
3343
3344            // Use binary search to find line number - O(log n) instead of O(n)
3345            // Find the rightmost line whose byte_offset <= start_pos
3346            let line_idx = lines
3347                .partition_point(|line| line.byte_offset <= start_pos)
3348                .saturating_sub(1);
3349            let line_num = line_idx + 1;
3350            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3351
3352            // Find end column using binary search
3353            let end_line_idx = lines
3354                .partition_point(|line| line.byte_offset <= end_pos)
3355                .saturating_sub(1);
3356            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3357
3358            // Convert byte offsets to character positions for correct Unicode handling
3359            // This ensures consistency with warning.column which uses character positions
3360            let line_content = lines[line_idx].content(content);
3361            let col_start = if byte_col_start <= line_content.len() {
3362                line_content[..byte_col_start].chars().count()
3363            } else {
3364                line_content.chars().count()
3365            };
3366
3367            let end_line_content = lines[end_line_idx].content(content);
3368            let col_end = if byte_col_end <= end_line_content.len() {
3369                end_line_content[..byte_col_end].chars().count()
3370            } else {
3371                end_line_content.chars().count()
3372            };
3373
3374            code_spans.push(CodeSpan {
3375                line: line_num,
3376                end_line: end_line_idx + 1,
3377                start_col: col_start,
3378                end_col: col_end,
3379                byte_offset: start_pos,
3380                byte_end: end_pos,
3381                backtick_count,
3382                content: span_content,
3383            });
3384        }
3385
3386        // Sort by position to ensure consistent ordering
3387        code_spans.sort_by_key(|span| span.byte_offset);
3388
3389        code_spans
3390    }
3391
3392    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3393    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3394        let mut math_spans = Vec::new();
3395
3396        // Quick check - if no $ signs, no math spans
3397        if !content.contains('$') {
3398            return math_spans;
3399        }
3400
3401        // Use pulldown-cmark with ENABLE_MATH option
3402        let mut options = Options::empty();
3403        options.insert(Options::ENABLE_MATH);
3404        let parser = Parser::new_ext(content, options).into_offset_iter();
3405
3406        for (event, range) in parser {
3407            let (is_display, math_content) = match &event {
3408                Event::InlineMath(text) => (false, text.as_ref()),
3409                Event::DisplayMath(text) => (true, text.as_ref()),
3410                _ => continue,
3411            };
3412
3413            let start_pos = range.start;
3414            let end_pos = range.end;
3415
3416            // Use binary search to find line number - O(log n) instead of O(n)
3417            let line_idx = lines
3418                .partition_point(|line| line.byte_offset <= start_pos)
3419                .saturating_sub(1);
3420            let line_num = line_idx + 1;
3421            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3422
3423            // Find end column using binary search
3424            let end_line_idx = lines
3425                .partition_point(|line| line.byte_offset <= end_pos)
3426                .saturating_sub(1);
3427            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3428
3429            // Convert byte offsets to character positions for correct Unicode handling
3430            let line_content = lines[line_idx].content(content);
3431            let col_start = if byte_col_start <= line_content.len() {
3432                line_content[..byte_col_start].chars().count()
3433            } else {
3434                line_content.chars().count()
3435            };
3436
3437            let end_line_content = lines[end_line_idx].content(content);
3438            let col_end = if byte_col_end <= end_line_content.len() {
3439                end_line_content[..byte_col_end].chars().count()
3440            } else {
3441                end_line_content.chars().count()
3442            };
3443
3444            math_spans.push(MathSpan {
3445                line: line_num,
3446                end_line: end_line_idx + 1,
3447                start_col: col_start,
3448                end_col: col_end,
3449                byte_offset: start_pos,
3450                byte_end: end_pos,
3451                is_display,
3452                content: math_content.to_string(),
3453            });
3454        }
3455
3456        // Sort by position to ensure consistent ordering
3457        math_spans.sort_by_key(|span| span.byte_offset);
3458
3459        math_spans
3460    }
3461
3462    /// Parse all list blocks in the content (legacy line-by-line approach)
3463    ///
3464    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3465    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3466    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3467    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3468    ///   treated as list continuation (based on the list marker width)
3469    ///
3470    /// When a new list item is encountered, we check if list-breaking content was seen
3471    /// since the last item. If so, we start a new list block.
3472    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3473        // Minimum indentation for unordered list continuation per CommonMark spec
3474        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3475
3476        /// Initialize or reset the forward-scanning tracking state.
3477        /// This helper eliminates code duplication across three initialization sites.
3478        #[inline]
3479        fn reset_tracking_state(
3480            list_item: &ListItemInfo,
3481            has_list_breaking_content: &mut bool,
3482            min_continuation: &mut usize,
3483        ) {
3484            *has_list_breaking_content = false;
3485            let marker_width = if list_item.is_ordered {
3486                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3487            } else {
3488                list_item.marker.len()
3489            };
3490            *min_continuation = if list_item.is_ordered {
3491                marker_width
3492            } else {
3493                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3494            };
3495        }
3496
3497        // Pre-size based on lines that could be list items
3498        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3499        let mut current_block: Option<ListBlock> = None;
3500        let mut last_list_item_line = 0;
3501        let mut current_indent_level = 0;
3502        let mut last_marker_width = 0;
3503
3504        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3505        let mut has_list_breaking_content_since_last_item = false;
3506        let mut min_continuation_for_tracking = 0;
3507
3508        for (line_idx, line_info) in lines.iter().enumerate() {
3509            let line_num = line_idx + 1;
3510
3511            // Enhanced code block handling using Design #3's context analysis
3512            if line_info.in_code_block {
3513                if let Some(ref mut block) = current_block {
3514                    // Calculate minimum indentation for list continuation
3515                    let min_continuation_indent =
3516                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3517
3518                    // Analyze code block context using the three-tier classification
3519                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3520
3521                    match context {
3522                        CodeBlockContext::Indented => {
3523                            // Code block is properly indented - continues the list
3524                            block.end_line = line_num;
3525                            continue;
3526                        }
3527                        CodeBlockContext::Standalone => {
3528                            // Code block separates lists - end current block
3529                            let completed_block = current_block.take().unwrap();
3530                            list_blocks.push(completed_block);
3531                            continue;
3532                        }
3533                        CodeBlockContext::Adjacent => {
3534                            // Edge case - use conservative behavior (continue list)
3535                            block.end_line = line_num;
3536                            continue;
3537                        }
3538                    }
3539                } else {
3540                    // No current list block - skip code block lines
3541                    continue;
3542                }
3543            }
3544
3545            // Extract blockquote prefix if any
3546            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3547                caps.get(0).unwrap().as_str().to_string()
3548            } else {
3549                String::new()
3550            };
3551
3552            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3553            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3554            if let Some(ref block) = current_block
3555                && line_info.list_item.is_none()
3556                && !line_info.is_blank
3557                && !line_info.in_code_span_continuation
3558            {
3559                let line_content = line_info.content(content).trim();
3560
3561                // Check for structural separators that break lists
3562                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3563                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3564                // as they indicate improper indentation rather than lazy continuation.
3565                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3566
3567                // Check if blockquote context changes (different prefix than current block)
3568                // Lines within the SAME blockquote context don't break lists
3569                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3570
3571                let breaks_list = line_info.heading.is_some()
3572                    || line_content.starts_with("---")
3573                    || line_content.starts_with("***")
3574                    || line_content.starts_with("___")
3575                    || crate::utils::skip_context::is_table_line(line_content)
3576                    || blockquote_prefix_changes
3577                    || (line_info.indent > 0
3578                        && line_info.indent < min_continuation_for_tracking
3579                        && !is_lazy_continuation);
3580
3581                if breaks_list {
3582                    has_list_breaking_content_since_last_item = true;
3583                }
3584            }
3585
3586            // If this line is a code span continuation within an active list block,
3587            // extend the block's end_line to include this line (maintains list continuity)
3588            if line_info.in_code_span_continuation
3589                && line_info.list_item.is_none()
3590                && let Some(ref mut block) = current_block
3591            {
3592                block.end_line = line_num;
3593            }
3594
3595            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3596            // properly indented lines within the list). This ensures the workaround at line 2448
3597            // works correctly when there are multiple continuation lines before a nested list item.
3598            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3599            // For blockquote lines, compute effective indent after stripping the prefix
3600            let effective_continuation_indent = if let Some(ref block) = current_block {
3601                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3602                let line_content = line_info.content(content);
3603                let line_bq_level = line_content
3604                    .chars()
3605                    .take_while(|c| *c == '>' || c.is_whitespace())
3606                    .filter(|&c| c == '>')
3607                    .count();
3608                if line_bq_level > 0 && line_bq_level == block_bq_level {
3609                    // Compute indent after blockquote markers
3610                    let mut pos = 0;
3611                    let mut found_markers = 0;
3612                    for c in line_content.chars() {
3613                        pos += c.len_utf8();
3614                        if c == '>' {
3615                            found_markers += 1;
3616                            if found_markers == line_bq_level {
3617                                if line_content.get(pos..pos + 1) == Some(" ") {
3618                                    pos += 1;
3619                                }
3620                                break;
3621                            }
3622                        }
3623                    }
3624                    let after_bq = &line_content[pos..];
3625                    after_bq.len() - after_bq.trim_start().len()
3626                } else {
3627                    line_info.indent
3628                }
3629            } else {
3630                line_info.indent
3631            };
3632            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3633                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3634                if block_bq_level > 0 {
3635                    if block.is_ordered { last_marker_width } else { 2 }
3636                } else {
3637                    min_continuation_for_tracking
3638                }
3639            } else {
3640                min_continuation_for_tracking
3641            };
3642            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3643                || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
3644
3645            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3646                eprintln!(
3647                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3648                    line_num,
3649                    effective_continuation_indent,
3650                    adjusted_min_continuation_for_tracking,
3651                    is_valid_continuation,
3652                    line_info.in_code_span_continuation,
3653                    line_info.in_code_block,
3654                    current_block.is_some()
3655                );
3656            }
3657
3658            if !line_info.in_code_span_continuation
3659                && line_info.list_item.is_none()
3660                && !line_info.is_blank
3661                && !line_info.in_code_block
3662                && is_valid_continuation
3663                && let Some(ref mut block) = current_block
3664            {
3665                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3666                    eprintln!(
3667                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3668                        line_num, block.end_line, line_num
3669                    );
3670                }
3671                block.end_line = line_num;
3672            }
3673
3674            // Check if this line is a list item
3675            if let Some(list_item) = &line_info.list_item {
3676                // Calculate nesting level based on indentation
3677                let item_indent = list_item.marker_column;
3678                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3679
3680                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3681                    eprintln!(
3682                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3683                        line_num, list_item.marker, item_indent
3684                    );
3685                }
3686
3687                if let Some(ref mut block) = current_block {
3688                    // Check if this continues the current block
3689                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3690                    // or a continuation at the same or lower level
3691                    let is_nested = nesting > block.nesting_level;
3692                    let same_type =
3693                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3694                    let same_context = block.blockquote_prefix == blockquote_prefix;
3695                    // Allow one blank line after last item, or lines immediately after block content
3696                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3697
3698                    // For unordered lists, also check marker consistency
3699                    let marker_compatible =
3700                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3701
3702                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3703                    // This eliminates the quadratic bottleneck from issue #148
3704                    let has_non_list_content = has_list_breaking_content_since_last_item;
3705
3706                    // A list continues if:
3707                    // 1. It's a nested item (indented more than the parent), OR
3708                    // 2. It's the same type at the same level with reasonable distance
3709                    let mut continues_list = if is_nested {
3710                        // Nested items always continue the list if they're in the same context
3711                        same_context && reasonable_distance && !has_non_list_content
3712                    } else {
3713                        // Same-level items need to match type and markers
3714                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3715                    };
3716
3717                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3718                        eprintln!(
3719                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3720                            line_num,
3721                            continues_list,
3722                            is_nested,
3723                            same_type,
3724                            same_context,
3725                            reasonable_distance,
3726                            marker_compatible,
3727                            has_non_list_content,
3728                            last_list_item_line,
3729                            block.end_line
3730                        );
3731                    }
3732
3733                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3734                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3735                    // Apply for: nested items (different types OK), OR same-level same-type items
3736                    if !continues_list
3737                        && (is_nested || same_type)
3738                        && reasonable_distance
3739                        && line_num > 0
3740                        && block.end_line == line_num - 1
3741                    {
3742                        // Check if the previous line was a list item or a continuation of a list item
3743                        // (including lazy continuation lines)
3744                        if block.item_lines.contains(&(line_num - 1)) {
3745                            // They're consecutive list items - force them to be in the same list
3746                            continues_list = true;
3747                        } else {
3748                            // Previous line is a continuation line within this block
3749                            // (e.g., lazy continuation with indent=0)
3750                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3751                            continues_list = true;
3752                        }
3753                    }
3754
3755                    if continues_list {
3756                        // Extend current block
3757                        block.end_line = line_num;
3758                        block.item_lines.push(line_num);
3759
3760                        // Update max marker width
3761                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3762                            list_item.marker.len() + 1
3763                        } else {
3764                            list_item.marker.len()
3765                        });
3766
3767                        // Update marker consistency for unordered lists
3768                        if !block.is_ordered
3769                            && block.marker.is_some()
3770                            && block.marker.as_ref() != Some(&list_item.marker)
3771                        {
3772                            // Mixed markers, clear the marker field
3773                            block.marker = None;
3774                        }
3775
3776                        // Reset tracked state for issue #148 optimization
3777                        reset_tracking_state(
3778                            list_item,
3779                            &mut has_list_breaking_content_since_last_item,
3780                            &mut min_continuation_for_tracking,
3781                        );
3782                    } else {
3783                        // End current block and start a new one
3784                        // When a different list type starts AT THE SAME LEVEL (not nested),
3785                        // trim back lazy continuation lines (they become part of the gap, not the list)
3786                        // For nested items, different types are fine - they're sub-lists
3787                        if !same_type
3788                            && !is_nested
3789                            && let Some(&last_item) = block.item_lines.last()
3790                        {
3791                            block.end_line = last_item;
3792                        }
3793
3794                        list_blocks.push(block.clone());
3795
3796                        *block = ListBlock {
3797                            start_line: line_num,
3798                            end_line: line_num,
3799                            is_ordered: list_item.is_ordered,
3800                            marker: if list_item.is_ordered {
3801                                None
3802                            } else {
3803                                Some(list_item.marker.clone())
3804                            },
3805                            blockquote_prefix: blockquote_prefix.clone(),
3806                            item_lines: vec![line_num],
3807                            nesting_level: nesting,
3808                            max_marker_width: if list_item.is_ordered {
3809                                list_item.marker.len() + 1
3810                            } else {
3811                                list_item.marker.len()
3812                            },
3813                        };
3814
3815                        // Initialize tracked state for new block (issue #148 optimization)
3816                        reset_tracking_state(
3817                            list_item,
3818                            &mut has_list_breaking_content_since_last_item,
3819                            &mut min_continuation_for_tracking,
3820                        );
3821                    }
3822                } else {
3823                    // Start a new block
3824                    current_block = Some(ListBlock {
3825                        start_line: line_num,
3826                        end_line: line_num,
3827                        is_ordered: list_item.is_ordered,
3828                        marker: if list_item.is_ordered {
3829                            None
3830                        } else {
3831                            Some(list_item.marker.clone())
3832                        },
3833                        blockquote_prefix,
3834                        item_lines: vec![line_num],
3835                        nesting_level: nesting,
3836                        max_marker_width: list_item.marker.len(),
3837                    });
3838
3839                    // Initialize tracked state for new block (issue #148 optimization)
3840                    reset_tracking_state(
3841                        list_item,
3842                        &mut has_list_breaking_content_since_last_item,
3843                        &mut min_continuation_for_tracking,
3844                    );
3845                }
3846
3847                last_list_item_line = line_num;
3848                current_indent_level = item_indent;
3849                last_marker_width = if list_item.is_ordered {
3850                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3851                } else {
3852                    list_item.marker.len()
3853                };
3854            } else if let Some(ref mut block) = current_block {
3855                // Not a list item - check if it continues the current block
3856                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3857                    eprintln!(
3858                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3859                        line_num, line_info.is_blank
3860                    );
3861                }
3862
3863                // For MD032 compatibility, we use a simple approach:
3864                // - Indented lines continue the list
3865                // - Blank lines followed by indented content continue the list
3866                // - Everything else ends the list
3867
3868                // Check if the last line in the list block ended with a backslash (hard line break)
3869                // This handles cases where list items use backslash for hard line breaks
3870                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3871                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3872                } else {
3873                    false
3874                };
3875
3876                // Calculate minimum indentation for list continuation
3877                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3878                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3879                let min_continuation_indent = if block.is_ordered {
3880                    current_indent_level + last_marker_width
3881                } else {
3882                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3883                };
3884
3885                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3886                    // Indented line or backslash continuation continues the list
3887                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3888                        eprintln!(
3889                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3890                            line_num, line_info.indent, min_continuation_indent
3891                        );
3892                    }
3893                    block.end_line = line_num;
3894                } else if line_info.is_blank {
3895                    // Blank line - check if it's internal to the list or ending it
3896                    // We only include blank lines that are followed by more list content
3897                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3898                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3899                    }
3900                    let mut check_idx = line_idx + 1;
3901                    let mut found_continuation = false;
3902
3903                    // Skip additional blank lines
3904                    while check_idx < lines.len() && lines[check_idx].is_blank {
3905                        check_idx += 1;
3906                    }
3907
3908                    if check_idx < lines.len() {
3909                        let next_line = &lines[check_idx];
3910                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3911                        let next_content = next_line.content(content);
3912                        // Use blockquote level (count of >) to compare, not the full prefix
3913                        // This avoids issues where the regex captures extra whitespace
3914                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3915                        let next_bq_level_for_indent = next_content
3916                            .chars()
3917                            .take_while(|c| *c == '>' || c.is_whitespace())
3918                            .filter(|&c| c == '>')
3919                            .count();
3920                        let effective_indent =
3921                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3922                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3923                                // Find position after ">" and one space
3924                                let mut pos = 0;
3925                                let mut found_markers = 0;
3926                                for c in next_content.chars() {
3927                                    pos += c.len_utf8();
3928                                    if c == '>' {
3929                                        found_markers += 1;
3930                                        if found_markers == next_bq_level_for_indent {
3931                                            // Skip optional space after last >
3932                                            if next_content.get(pos..pos + 1) == Some(" ") {
3933                                                pos += 1;
3934                                            }
3935                                            break;
3936                                        }
3937                                    }
3938                                }
3939                                let after_blockquote_marker = &next_content[pos..];
3940                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3941                            } else {
3942                                next_line.indent
3943                            };
3944                        // Also adjust min_continuation_indent for blockquote lists
3945                        // The marker_column includes blockquote prefix, so subtract it
3946                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3947                            // For blockquote lists, the continuation is relative to blockquote content
3948                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3949                            if block.is_ordered { last_marker_width } else { 2 }
3950                        } else {
3951                            min_continuation_indent
3952                        };
3953                        // Check if followed by indented content (list continuation)
3954                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3955                            eprintln!(
3956                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3957                                line_num,
3958                                check_idx + 1,
3959                                effective_indent,
3960                                adjusted_min_continuation,
3961                                next_line.list_item.is_some(),
3962                                next_line.in_code_block
3963                            );
3964                        }
3965                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3966                            found_continuation = true;
3967                        }
3968                        // Check if followed by another list item at the same level
3969                        else if !next_line.in_code_block
3970                            && next_line.list_item.is_some()
3971                            && let Some(item) = &next_line.list_item
3972                        {
3973                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3974                                .find(next_line.content(content))
3975                                .map_or(String::new(), |m| m.as_str().to_string());
3976                            if item.marker_column == current_indent_level
3977                                && item.is_ordered == block.is_ordered
3978                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3979                            {
3980                                // Check if there was meaningful content between the list items (unused now)
3981                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3982                                // Pre-compute block's blockquote level for use in closures
3983                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3984                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3985                                    if let Some(between_line) = lines.get(idx) {
3986                                        let between_content = between_line.content(content);
3987                                        let trimmed = between_content.trim();
3988                                        // Skip empty lines
3989                                        if trimmed.is_empty() {
3990                                            return false;
3991                                        }
3992                                        // Check for meaningful content
3993                                        let line_indent = between_content.len() - between_content.trim_start().len();
3994
3995                                        // Check if blockquote level changed (not just if line starts with ">")
3996                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3997                                            .find(between_content)
3998                                            .map_or(String::new(), |m| m.as_str().to_string());
3999                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4000                                        let blockquote_level_changed =
4001                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4002
4003                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4004                                        if trimmed.starts_with("```")
4005                                            || trimmed.starts_with("~~~")
4006                                            || trimmed.starts_with("---")
4007                                            || trimmed.starts_with("***")
4008                                            || trimmed.starts_with("___")
4009                                            || blockquote_level_changed
4010                                            || crate::utils::skip_context::is_table_line(trimmed)
4011                                            || between_line.heading.is_some()
4012                                        {
4013                                            return true; // These are structural separators - meaningful content that breaks lists
4014                                        }
4015
4016                                        // Only properly indented content continues the list
4017                                        line_indent >= min_continuation_indent
4018                                    } else {
4019                                        false
4020                                    }
4021                                });
4022
4023                                if block.is_ordered {
4024                                    // For ordered lists: don't continue if there are structural separators
4025                                    // Check if there are structural separators between the list items
4026                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4027                                        if let Some(between_line) = lines.get(idx) {
4028                                            let between_content = between_line.content(content);
4029                                            let trimmed = between_content.trim();
4030                                            if trimmed.is_empty() {
4031                                                return false;
4032                                            }
4033                                            // Check if blockquote level changed (not just if line starts with ">")
4034                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4035                                                .find(between_content)
4036                                                .map_or(String::new(), |m| m.as_str().to_string());
4037                                            let between_bq_level =
4038                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4039                                            let blockquote_level_changed =
4040                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4041                                            // Check for structural separators that break lists
4042                                            trimmed.starts_with("```")
4043                                                || trimmed.starts_with("~~~")
4044                                                || trimmed.starts_with("---")
4045                                                || trimmed.starts_with("***")
4046                                                || trimmed.starts_with("___")
4047                                                || blockquote_level_changed
4048                                                || crate::utils::skip_context::is_table_line(trimmed)
4049                                                || between_line.heading.is_some()
4050                                        } else {
4051                                            false
4052                                        }
4053                                    });
4054                                    found_continuation = !has_structural_separators;
4055                                } else {
4056                                    // For unordered lists: also check for structural separators
4057                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4058                                        if let Some(between_line) = lines.get(idx) {
4059                                            let between_content = between_line.content(content);
4060                                            let trimmed = between_content.trim();
4061                                            if trimmed.is_empty() {
4062                                                return false;
4063                                            }
4064                                            // Check if blockquote level changed (not just if line starts with ">")
4065                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4066                                                .find(between_content)
4067                                                .map_or(String::new(), |m| m.as_str().to_string());
4068                                            let between_bq_level =
4069                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4070                                            let blockquote_level_changed =
4071                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4072                                            // Check for structural separators that break lists
4073                                            trimmed.starts_with("```")
4074                                                || trimmed.starts_with("~~~")
4075                                                || trimmed.starts_with("---")
4076                                                || trimmed.starts_with("***")
4077                                                || trimmed.starts_with("___")
4078                                                || blockquote_level_changed
4079                                                || crate::utils::skip_context::is_table_line(trimmed)
4080                                                || between_line.heading.is_some()
4081                                        } else {
4082                                            false
4083                                        }
4084                                    });
4085                                    found_continuation = !has_structural_separators;
4086                                }
4087                            }
4088                        }
4089                    }
4090
4091                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4092                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4093                    }
4094                    if found_continuation {
4095                        // Include the blank line in the block
4096                        block.end_line = line_num;
4097                    } else {
4098                        // Blank line ends the list - don't include it
4099                        list_blocks.push(block.clone());
4100                        current_block = None;
4101                    }
4102                } else {
4103                    // Check for lazy continuation - non-indented line immediately after a list item
4104                    // But only if the line has sufficient indentation for the list type
4105                    let min_required_indent = if block.is_ordered {
4106                        current_indent_level + last_marker_width
4107                    } else {
4108                        current_indent_level + 2
4109                    };
4110
4111                    // For lazy continuation to apply, the line must either:
4112                    // 1. Have no indentation (true lazy continuation)
4113                    // 2. Have sufficient indentation for the list type
4114                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4115                    let line_content = line_info.content(content).trim();
4116
4117                    // Check for table-like patterns
4118                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4119
4120                    // Check if blockquote level changed (not just if line starts with ">")
4121                    // Lines within the same blockquote level are NOT structural separators
4122                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4123                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4124                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4125
4126                    let is_structural_separator = line_info.heading.is_some()
4127                        || line_content.starts_with("```")
4128                        || line_content.starts_with("~~~")
4129                        || line_content.starts_with("---")
4130                        || line_content.starts_with("***")
4131                        || line_content.starts_with("___")
4132                        || blockquote_level_changed
4133                        || looks_like_table;
4134
4135                    // Allow lazy continuation if we're still within the same list block
4136                    // (not just immediately after a list item)
4137                    // Also treat code span continuations as valid continuations regardless of indent
4138                    let is_lazy_continuation = !is_structural_separator
4139                        && !line_info.is_blank
4140                        && (line_info.indent == 0
4141                            || line_info.indent >= min_required_indent
4142                            || line_info.in_code_span_continuation);
4143
4144                    if is_lazy_continuation {
4145                        // Per CommonMark, lazy continuation continues until a blank line
4146                        // or structural element, regardless of uppercase at line start
4147                        block.end_line = line_num;
4148                    } else {
4149                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4150                        list_blocks.push(block.clone());
4151                        current_block = None;
4152                    }
4153                }
4154            }
4155        }
4156
4157        // Don't forget the last block
4158        if let Some(block) = current_block {
4159            list_blocks.push(block);
4160        }
4161
4162        // Merge adjacent blocks that should be one
4163        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4164
4165        list_blocks
4166    }
4167
4168    /// Compute character frequency for fast content analysis
4169    fn compute_char_frequency(content: &str) -> CharFrequency {
4170        let mut frequency = CharFrequency::default();
4171
4172        for ch in content.chars() {
4173            match ch {
4174                '#' => frequency.hash_count += 1,
4175                '*' => frequency.asterisk_count += 1,
4176                '_' => frequency.underscore_count += 1,
4177                '-' => frequency.hyphen_count += 1,
4178                '+' => frequency.plus_count += 1,
4179                '>' => frequency.gt_count += 1,
4180                '|' => frequency.pipe_count += 1,
4181                '[' => frequency.bracket_count += 1,
4182                '`' => frequency.backtick_count += 1,
4183                '<' => frequency.lt_count += 1,
4184                '!' => frequency.exclamation_count += 1,
4185                '\n' => frequency.newline_count += 1,
4186                _ => {}
4187            }
4188        }
4189
4190        frequency
4191    }
4192
4193    /// Parse HTML tags in the content
4194    fn parse_html_tags(
4195        content: &str,
4196        lines: &[LineInfo],
4197        code_blocks: &[(usize, usize)],
4198        flavor: MarkdownFlavor,
4199    ) -> Vec<HtmlTag> {
4200        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4201            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4202
4203        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4204
4205        for cap in HTML_TAG_REGEX.captures_iter(content) {
4206            let full_match = cap.get(0).unwrap();
4207            let match_start = full_match.start();
4208            let match_end = full_match.end();
4209
4210            // Skip if in code block
4211            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4212                continue;
4213            }
4214
4215            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4216            let tag_name_original = cap.get(2).unwrap().as_str();
4217            let tag_name = tag_name_original.to_lowercase();
4218            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4219
4220            // Skip JSX components in MDX files (tags starting with uppercase letter)
4221            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4222            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4223                continue;
4224            }
4225
4226            // Find which line this tag is on
4227            let mut line_num = 1;
4228            let mut col_start = match_start;
4229            let mut col_end = match_end;
4230            for (idx, line_info) in lines.iter().enumerate() {
4231                if match_start >= line_info.byte_offset {
4232                    line_num = idx + 1;
4233                    col_start = match_start - line_info.byte_offset;
4234                    col_end = match_end - line_info.byte_offset;
4235                } else {
4236                    break;
4237                }
4238            }
4239
4240            html_tags.push(HtmlTag {
4241                line: line_num,
4242                start_col: col_start,
4243                end_col: col_end,
4244                byte_offset: match_start,
4245                byte_end: match_end,
4246                tag_name,
4247                is_closing,
4248                is_self_closing,
4249                raw_content: full_match.as_str().to_string(),
4250            });
4251        }
4252
4253        html_tags
4254    }
4255
4256    /// Parse table rows in the content
4257    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4258        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4259
4260        for (line_idx, line_info) in lines.iter().enumerate() {
4261            // Skip lines in code blocks or blank lines
4262            if line_info.in_code_block || line_info.is_blank {
4263                continue;
4264            }
4265
4266            let line = line_info.content(content);
4267            let line_num = line_idx + 1;
4268
4269            // Check if this line contains pipes (potential table row)
4270            if !line.contains('|') {
4271                continue;
4272            }
4273
4274            // Count columns by splitting on pipes
4275            let parts: Vec<&str> = line.split('|').collect();
4276            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4277
4278            // Check if this is a separator row
4279            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4280            let mut column_alignments = Vec::new();
4281
4282            if is_separator {
4283                for part in &parts[1..parts.len() - 1] {
4284                    // Skip first and last empty parts
4285                    let trimmed = part.trim();
4286                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4287                        "center".to_string()
4288                    } else if trimmed.ends_with(':') {
4289                        "right".to_string()
4290                    } else if trimmed.starts_with(':') {
4291                        "left".to_string()
4292                    } else {
4293                        "none".to_string()
4294                    };
4295                    column_alignments.push(alignment);
4296                }
4297            }
4298
4299            table_rows.push(TableRow {
4300                line: line_num,
4301                is_separator,
4302                column_count,
4303                column_alignments,
4304            });
4305        }
4306
4307        table_rows
4308    }
4309
4310    /// Parse bare URLs and emails in the content
4311    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4312        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4313
4314        // Check for bare URLs (not in angle brackets or markdown links)
4315        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4316            let full_match = cap.get(0).unwrap();
4317            let match_start = full_match.start();
4318            let match_end = full_match.end();
4319
4320            // Skip if in code block
4321            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4322                continue;
4323            }
4324
4325            // Skip if already in angle brackets or markdown links
4326            let preceding_char = if match_start > 0 {
4327                content.chars().nth(match_start - 1)
4328            } else {
4329                None
4330            };
4331            let following_char = content.chars().nth(match_end);
4332
4333            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4334                continue;
4335            }
4336            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4337                continue;
4338            }
4339
4340            let url = full_match.as_str();
4341            let url_type = if url.starts_with("https://") {
4342                "https"
4343            } else if url.starts_with("http://") {
4344                "http"
4345            } else if url.starts_with("ftp://") {
4346                "ftp"
4347            } else {
4348                "other"
4349            };
4350
4351            // Find which line this URL is on
4352            let mut line_num = 1;
4353            let mut col_start = match_start;
4354            let mut col_end = match_end;
4355            for (idx, line_info) in lines.iter().enumerate() {
4356                if match_start >= line_info.byte_offset {
4357                    line_num = idx + 1;
4358                    col_start = match_start - line_info.byte_offset;
4359                    col_end = match_end - line_info.byte_offset;
4360                } else {
4361                    break;
4362                }
4363            }
4364
4365            bare_urls.push(BareUrl {
4366                line: line_num,
4367                start_col: col_start,
4368                end_col: col_end,
4369                byte_offset: match_start,
4370                byte_end: match_end,
4371                url: url.to_string(),
4372                url_type: url_type.to_string(),
4373            });
4374        }
4375
4376        // Check for bare email addresses
4377        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4378            let full_match = cap.get(0).unwrap();
4379            let match_start = full_match.start();
4380            let match_end = full_match.end();
4381
4382            // Skip if in code block
4383            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4384                continue;
4385            }
4386
4387            // Skip if already in angle brackets or markdown links
4388            let preceding_char = if match_start > 0 {
4389                content.chars().nth(match_start - 1)
4390            } else {
4391                None
4392            };
4393            let following_char = content.chars().nth(match_end);
4394
4395            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4396                continue;
4397            }
4398            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4399                continue;
4400            }
4401
4402            let email = full_match.as_str();
4403
4404            // Find which line this email is on
4405            let mut line_num = 1;
4406            let mut col_start = match_start;
4407            let mut col_end = match_end;
4408            for (idx, line_info) in lines.iter().enumerate() {
4409                if match_start >= line_info.byte_offset {
4410                    line_num = idx + 1;
4411                    col_start = match_start - line_info.byte_offset;
4412                    col_end = match_end - line_info.byte_offset;
4413                } else {
4414                    break;
4415                }
4416            }
4417
4418            bare_urls.push(BareUrl {
4419                line: line_num,
4420                start_col: col_start,
4421                end_col: col_end,
4422                byte_offset: match_start,
4423                byte_end: match_end,
4424                url: email.to_string(),
4425                url_type: "email".to_string(),
4426            });
4427        }
4428
4429        bare_urls
4430    }
4431
4432    /// Get an iterator over valid CommonMark headings
4433    ///
4434    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4435    /// that should be flagged by MD018 but should not be processed by other heading rules.
4436    ///
4437    /// # Examples
4438    ///
4439    /// ```rust
4440    /// use rumdl_lib::lint_context::LintContext;
4441    /// use rumdl_lib::config::MarkdownFlavor;
4442    ///
4443    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4444    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4445    ///
4446    /// for heading in ctx.valid_headings() {
4447    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4448    /// }
4449    /// // Only prints valid headings, skips `#NoSpace`
4450    /// ```
4451    #[must_use]
4452    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4453        ValidHeadingsIter::new(&self.lines)
4454    }
4455
4456    /// Check if the document contains any valid CommonMark headings
4457    ///
4458    /// Returns `true` if there is at least one heading with proper space after `#`.
4459    #[must_use]
4460    pub fn has_valid_headings(&self) -> bool {
4461        self.lines
4462            .iter()
4463            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4464    }
4465}
4466
4467/// Merge adjacent list blocks that should be treated as one
4468fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4469    if list_blocks.len() < 2 {
4470        return;
4471    }
4472
4473    let mut merger = ListBlockMerger::new(content, lines);
4474    *list_blocks = merger.merge(list_blocks);
4475}
4476
4477/// Helper struct to manage the complex logic of merging list blocks
4478struct ListBlockMerger<'a> {
4479    content: &'a str,
4480    lines: &'a [LineInfo],
4481}
4482
4483impl<'a> ListBlockMerger<'a> {
4484    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4485        Self { content, lines }
4486    }
4487
4488    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4489        let mut merged = Vec::with_capacity(list_blocks.len());
4490        let mut current = list_blocks[0].clone();
4491
4492        for next in list_blocks.iter().skip(1) {
4493            if self.should_merge_blocks(&current, next) {
4494                current = self.merge_two_blocks(current, next);
4495            } else {
4496                merged.push(current);
4497                current = next.clone();
4498            }
4499        }
4500
4501        merged.push(current);
4502        merged
4503    }
4504
4505    /// Determine if two adjacent list blocks should be merged
4506    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4507        // Basic compatibility checks
4508        if !self.blocks_are_compatible(current, next) {
4509            return false;
4510        }
4511
4512        // Check spacing and content between blocks
4513        let spacing = self.analyze_spacing_between(current, next);
4514        match spacing {
4515            BlockSpacing::Consecutive => true,
4516            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4517            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4518                self.can_merge_with_content_between(current, next)
4519            }
4520        }
4521    }
4522
4523    /// Check if blocks have compatible structure for merging
4524    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4525        current.is_ordered == next.is_ordered
4526            && current.blockquote_prefix == next.blockquote_prefix
4527            && current.nesting_level == next.nesting_level
4528    }
4529
4530    /// Analyze the spacing between two list blocks
4531    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4532        let gap = next.start_line - current.end_line;
4533
4534        match gap {
4535            1 => BlockSpacing::Consecutive,
4536            2 => BlockSpacing::SingleBlank,
4537            _ if gap > 2 => {
4538                if self.has_only_blank_lines_between(current, next) {
4539                    BlockSpacing::MultipleBlanks
4540                } else {
4541                    BlockSpacing::ContentBetween
4542                }
4543            }
4544            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4545        }
4546    }
4547
4548    /// Check if unordered lists can be merged with a single blank line between
4549    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4550        // Check if there are structural separators between the blocks
4551        // If has_meaningful_content_between returns true, it means there are structural separators
4552        if has_meaningful_content_between(self.content, current, next, self.lines) {
4553            return false; // Structural separators prevent merging
4554        }
4555
4556        // Only merge unordered lists with same marker across single blank
4557        !current.is_ordered && current.marker == next.marker
4558    }
4559
4560    /// Check if ordered lists can be merged when there's content between them
4561    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4562        // Do not merge lists if there are structural separators between them
4563        if has_meaningful_content_between(self.content, current, next, self.lines) {
4564            return false; // Structural separators prevent merging
4565        }
4566
4567        // Only consider merging ordered lists if there's no structural content between
4568        current.is_ordered && next.is_ordered
4569    }
4570
4571    /// Check if there are only blank lines between blocks
4572    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4573        for line_num in (current.end_line + 1)..next.start_line {
4574            if let Some(line_info) = self.lines.get(line_num - 1)
4575                && !line_info.content(self.content).trim().is_empty()
4576            {
4577                return false;
4578            }
4579        }
4580        true
4581    }
4582
4583    /// Merge two compatible list blocks into one
4584    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4585        current.end_line = next.end_line;
4586        current.item_lines.extend_from_slice(&next.item_lines);
4587
4588        // Update max marker width
4589        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4590
4591        // Handle marker consistency for unordered lists
4592        if !current.is_ordered && self.markers_differ(&current, next) {
4593            current.marker = None; // Mixed markers
4594        }
4595
4596        current
4597    }
4598
4599    /// Check if two blocks have different markers
4600    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4601        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4602    }
4603}
4604
4605/// Types of spacing between list blocks
4606#[derive(Debug, PartialEq)]
4607enum BlockSpacing {
4608    Consecutive,    // No gap between blocks
4609    SingleBlank,    // One blank line between blocks
4610    MultipleBlanks, // Multiple blank lines but no content
4611    ContentBetween, // Content exists between blocks
4612}
4613
4614/// Check if there's meaningful content (not just blank lines) between two list blocks
4615fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4616    // Check lines between current.end_line and next.start_line
4617    for line_num in (current.end_line + 1)..next.start_line {
4618        if let Some(line_info) = lines.get(line_num - 1) {
4619            // Convert to 0-indexed
4620            let trimmed = line_info.content(content).trim();
4621
4622            // Skip empty lines
4623            if trimmed.is_empty() {
4624                continue;
4625            }
4626
4627            // Check for structural separators that should separate lists (CommonMark compliant)
4628
4629            // Headings separate lists
4630            if line_info.heading.is_some() {
4631                return true; // Has meaningful content - headings separate lists
4632            }
4633
4634            // Horizontal rules separate lists (---, ***, ___)
4635            if is_horizontal_rule(trimmed) {
4636                return true; // Has meaningful content - horizontal rules separate lists
4637            }
4638
4639            // Tables separate lists
4640            if crate::utils::skip_context::is_table_line(trimmed) {
4641                return true; // Has meaningful content - tables separate lists
4642            }
4643
4644            // Blockquotes separate lists
4645            if trimmed.starts_with('>') {
4646                return true; // Has meaningful content - blockquotes separate lists
4647            }
4648
4649            // Code block fences separate lists (unless properly indented as list content)
4650            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4651                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4652
4653                // Check if this code block is properly indented as list continuation
4654                let min_continuation_indent = if current.is_ordered {
4655                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4656                } else {
4657                    current.nesting_level + 2
4658                };
4659
4660                if line_indent < min_continuation_indent {
4661                    // This is a standalone code block that separates lists
4662                    return true; // Has meaningful content - standalone code blocks separate lists
4663                }
4664            }
4665
4666            // Check if this line has proper indentation for list continuation
4667            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4668
4669            // Calculate minimum indentation needed to be list continuation
4670            let min_indent = if current.is_ordered {
4671                current.nesting_level + current.max_marker_width
4672            } else {
4673                current.nesting_level + 2
4674            };
4675
4676            // If the line is not indented enough to be list continuation, it's meaningful content
4677            if line_indent < min_indent {
4678                return true; // Has meaningful content - content not indented as list continuation
4679            }
4680
4681            // If we reach here, the line is properly indented as list continuation
4682            // Continue checking other lines
4683        }
4684    }
4685
4686    // Only blank lines or properly indented list continuation content between blocks
4687    false
4688}
4689
4690/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4691/// CommonMark rules for thematic breaks (horizontal rules):
4692/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4693/// - Must have 3+ of the same character (-, *, or _)
4694/// - May have spaces between characters
4695/// - No other characters allowed
4696pub fn is_horizontal_rule_line(line: &str) -> bool {
4697    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4698    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4699    if leading_spaces > 3 || line.starts_with('\t') {
4700        return false;
4701    }
4702
4703    is_horizontal_rule_content(line.trim())
4704}
4705
4706/// Check if trimmed content matches horizontal rule pattern.
4707/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4708pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4709    if trimmed.len() < 3 {
4710        return false;
4711    }
4712
4713    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4714    let chars: Vec<char> = trimmed.chars().collect();
4715    if let Some(&first_char) = chars.first()
4716        && (first_char == '-' || first_char == '*' || first_char == '_')
4717    {
4718        let mut count = 0;
4719        for &ch in &chars {
4720            if ch == first_char {
4721                count += 1;
4722            } else if ch != ' ' && ch != '\t' {
4723                return false; // Non-matching, non-whitespace character
4724            }
4725        }
4726        return count >= 3;
4727    }
4728    false
4729}
4730
4731/// Backwards-compatible alias for `is_horizontal_rule_content`
4732pub fn is_horizontal_rule(trimmed: &str) -> bool {
4733    is_horizontal_rule_content(trimmed)
4734}
4735
4736/// Check if content contains patterns that cause the markdown crate to panic
4737#[cfg(test)]
4738mod tests {
4739    use super::*;
4740
4741    #[test]
4742    fn test_empty_content() {
4743        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4744        assert_eq!(ctx.content, "");
4745        assert_eq!(ctx.line_offsets, vec![0]);
4746        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4747        assert_eq!(ctx.lines.len(), 0);
4748    }
4749
4750    #[test]
4751    fn test_single_line() {
4752        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4753        assert_eq!(ctx.content, "# Hello");
4754        assert_eq!(ctx.line_offsets, vec![0]);
4755        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4756        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4757    }
4758
4759    #[test]
4760    fn test_multi_line() {
4761        let content = "# Title\n\nSecond line\nThird line";
4762        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4763        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4764        // Test offset to line/col
4765        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4766        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4767        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4768        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4769        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4770    }
4771
4772    #[test]
4773    fn test_line_info() {
4774        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4775        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4776
4777        // Test line info
4778        assert_eq!(ctx.lines.len(), 7);
4779
4780        // Line 1: "# Title"
4781        let line1 = &ctx.lines[0];
4782        assert_eq!(line1.content(ctx.content), "# Title");
4783        assert_eq!(line1.byte_offset, 0);
4784        assert_eq!(line1.indent, 0);
4785        assert!(!line1.is_blank);
4786        assert!(!line1.in_code_block);
4787        assert!(line1.list_item.is_none());
4788
4789        // Line 2: "    indented"
4790        let line2 = &ctx.lines[1];
4791        assert_eq!(line2.content(ctx.content), "    indented");
4792        assert_eq!(line2.byte_offset, 8);
4793        assert_eq!(line2.indent, 4);
4794        assert!(!line2.is_blank);
4795
4796        // Line 3: "" (blank)
4797        let line3 = &ctx.lines[2];
4798        assert_eq!(line3.content(ctx.content), "");
4799        assert!(line3.is_blank);
4800
4801        // Test helper methods
4802        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4803        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4804        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4805        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4806    }
4807
4808    #[test]
4809    fn test_list_item_detection() {
4810        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4811        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4812
4813        // Line 1: "- Unordered item"
4814        let line1 = &ctx.lines[0];
4815        assert!(line1.list_item.is_some());
4816        let list1 = line1.list_item.as_ref().unwrap();
4817        assert_eq!(list1.marker, "-");
4818        assert!(!list1.is_ordered);
4819        assert_eq!(list1.marker_column, 0);
4820        assert_eq!(list1.content_column, 2);
4821
4822        // Line 2: "  * Nested item"
4823        let line2 = &ctx.lines[1];
4824        assert!(line2.list_item.is_some());
4825        let list2 = line2.list_item.as_ref().unwrap();
4826        assert_eq!(list2.marker, "*");
4827        assert_eq!(list2.marker_column, 2);
4828
4829        // Line 3: "1. Ordered item"
4830        let line3 = &ctx.lines[2];
4831        assert!(line3.list_item.is_some());
4832        let list3 = line3.list_item.as_ref().unwrap();
4833        assert_eq!(list3.marker, "1.");
4834        assert!(list3.is_ordered);
4835        assert_eq!(list3.number, Some(1));
4836
4837        // Line 6: "Not a list"
4838        let line6 = &ctx.lines[5];
4839        assert!(line6.list_item.is_none());
4840    }
4841
4842    #[test]
4843    fn test_offset_to_line_col_edge_cases() {
4844        let content = "a\nb\nc";
4845        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4846        // line_offsets: [0, 2, 4]
4847        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4848        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4849        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4850        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4851        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4852        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4853    }
4854
4855    #[test]
4856    fn test_mdx_esm_blocks() {
4857        let content = r##"import {Chart} from './snowfall.js'
4858export const year = 2023
4859
4860# Last year's snowfall
4861
4862In {year}, the snowfall was above average.
4863It was followed by a warm spring which caused
4864flood conditions in many of the nearby rivers.
4865
4866<Chart color="#fcb32c" year={year} />
4867"##;
4868
4869        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4870
4871        // Check that lines 1 and 2 are marked as ESM blocks
4872        assert_eq!(ctx.lines.len(), 10);
4873        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4874        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4875        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4876        assert!(
4877            !ctx.lines[3].in_esm_block,
4878            "Line 4 (heading) should NOT be in_esm_block"
4879        );
4880        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4881        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4882    }
4883
4884    #[test]
4885    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4886        let content = r#"import {Chart} from './snowfall.js'
4887export const year = 2023
4888
4889# Last year's snowfall
4890"#;
4891
4892        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4893
4894        // ESM blocks should NOT be detected in Standard flavor
4895        assert!(
4896            !ctx.lines[0].in_esm_block,
4897            "Line 1 should NOT be in_esm_block in Standard flavor"
4898        );
4899        assert!(
4900            !ctx.lines[1].in_esm_block,
4901            "Line 2 should NOT be in_esm_block in Standard flavor"
4902        );
4903    }
4904
4905    #[test]
4906    fn test_blockquote_with_indented_content() {
4907        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4908        // The content inside the blockquote may also be detected as a code block (which is correct),
4909        // but for MD046 purposes, we need to know the line is inside a blockquote.
4910        let content = r#"# Heading
4911
4912>      -S socket-path
4913>                    More text
4914"#;
4915        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4916
4917        // Line 3 (index 2) should be detected as blockquote
4918        assert!(
4919            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4920            "Line 3 should be a blockquote"
4921        );
4922        // Line 4 (index 3) should also be blockquote
4923        assert!(
4924            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4925            "Line 4 should be a blockquote"
4926        );
4927
4928        // Verify blockquote content is correctly parsed
4929        // Note: spaces_after includes the spaces between `>` and content
4930        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4931        assert_eq!(bq3.content, "-S socket-path");
4932        assert_eq!(bq3.nesting_level, 1);
4933        // 6 spaces after the `>` marker
4934        assert!(bq3.has_multiple_spaces_after_marker);
4935
4936        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4937        assert_eq!(bq4.content, "More text");
4938        assert_eq!(bq4.nesting_level, 1);
4939    }
4940
4941    #[test]
4942    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4943        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4944        let content = r#"# Title
4945
4946A footnote[^1].
4947
4948[^1]: This is the footnote content.
4949
4950[^note]: Another footnote with [link](https://example.com).
4951
4952[regular]: ./path.md "A real reference definition"
4953"#;
4954        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4955
4956        // Should only have one reference definition (the regular one)
4957        assert_eq!(
4958            ctx.reference_defs.len(),
4959            1,
4960            "Footnotes should not be parsed as reference definitions"
4961        );
4962
4963        // The only reference def should be the regular one
4964        assert_eq!(ctx.reference_defs[0].id, "regular");
4965        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4966        assert_eq!(
4967            ctx.reference_defs[0].title,
4968            Some("A real reference definition".to_string())
4969        );
4970    }
4971
4972    #[test]
4973    fn test_footnote_with_inline_link_not_misidentified() {
4974        // Regression test for issue #286: footnote containing an inline link
4975        // was incorrectly parsed as a reference definition with URL "[link](url)"
4976        let content = r#"# Title
4977
4978A footnote[^1].
4979
4980[^1]: [link](https://www.google.com).
4981"#;
4982        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4983
4984        // Should have no reference definitions
4985        assert!(
4986            ctx.reference_defs.is_empty(),
4987            "Footnote with inline link should not create a reference definition"
4988        );
4989    }
4990
4991    #[test]
4992    fn test_various_footnote_formats_excluded() {
4993        // Test various footnote ID formats are all excluded
4994        let content = r#"[^1]: Numeric footnote
4995[^note]: Named footnote
4996[^a]: Single char footnote
4997[^long-footnote-name]: Long named footnote
4998[^123abc]: Mixed alphanumeric
4999
5000[ref1]: ./file1.md
5001[ref2]: ./file2.md
5002"#;
5003        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5004
5005        // Should only have the two regular reference definitions
5006        assert_eq!(
5007            ctx.reference_defs.len(),
5008            2,
5009            "Only regular reference definitions should be parsed"
5010        );
5011
5012        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5013        assert!(ids.contains(&"ref1"));
5014        assert!(ids.contains(&"ref2"));
5015        assert!(!ids.iter().any(|id| id.starts_with('^')));
5016    }
5017
5018    // =========================================================================
5019    // Tests for has_char and char_count methods
5020    // =========================================================================
5021
5022    #[test]
5023    fn test_has_char_tracked_characters() {
5024        // Test all 12 tracked characters
5025        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5026        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5027
5028        // All tracked characters should be detected
5029        assert!(ctx.has_char('#'), "Should detect hash");
5030        assert!(ctx.has_char('*'), "Should detect asterisk");
5031        assert!(ctx.has_char('_'), "Should detect underscore");
5032        assert!(ctx.has_char('-'), "Should detect hyphen");
5033        assert!(ctx.has_char('+'), "Should detect plus");
5034        assert!(ctx.has_char('>'), "Should detect gt");
5035        assert!(ctx.has_char('|'), "Should detect pipe");
5036        assert!(ctx.has_char('['), "Should detect bracket");
5037        assert!(ctx.has_char('`'), "Should detect backtick");
5038        assert!(ctx.has_char('<'), "Should detect lt");
5039        assert!(ctx.has_char('!'), "Should detect exclamation");
5040        assert!(ctx.has_char('\n'), "Should detect newline");
5041    }
5042
5043    #[test]
5044    fn test_has_char_absent_characters() {
5045        let content = "Simple text without special chars";
5046        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5047
5048        // None of the tracked characters should be present
5049        assert!(!ctx.has_char('#'), "Should not detect hash");
5050        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5051        assert!(!ctx.has_char('_'), "Should not detect underscore");
5052        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5053        assert!(!ctx.has_char('+'), "Should not detect plus");
5054        assert!(!ctx.has_char('>'), "Should not detect gt");
5055        assert!(!ctx.has_char('|'), "Should not detect pipe");
5056        assert!(!ctx.has_char('['), "Should not detect bracket");
5057        assert!(!ctx.has_char('`'), "Should not detect backtick");
5058        assert!(!ctx.has_char('<'), "Should not detect lt");
5059        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5060        // Note: single line content has no newlines
5061        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5062    }
5063
5064    #[test]
5065    fn test_has_char_fallback_for_untracked() {
5066        let content = "Text with @mention and $dollar and %percent";
5067        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5068
5069        // Untracked characters should fall back to content.contains()
5070        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5071        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5072        assert!(ctx.has_char('%'), "Should detect % via fallback");
5073        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5074    }
5075
5076    #[test]
5077    fn test_char_count_tracked_characters() {
5078        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5079        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5080
5081        // Count each tracked character
5082        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5083        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5084        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5085        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5086        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5087        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5088        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5089        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5090        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5091        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5092        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5093        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5094    }
5095
5096    #[test]
5097    fn test_char_count_zero_for_absent() {
5098        let content = "Plain text";
5099        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5100
5101        assert_eq!(ctx.char_count('#'), 0);
5102        assert_eq!(ctx.char_count('*'), 0);
5103        assert_eq!(ctx.char_count('_'), 0);
5104        assert_eq!(ctx.char_count('\n'), 0);
5105    }
5106
5107    #[test]
5108    fn test_char_count_fallback_for_untracked() {
5109        let content = "@@@ $$ %%%";
5110        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5111
5112        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5113        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5114        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5115        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5116    }
5117
5118    #[test]
5119    fn test_char_count_empty_content() {
5120        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5121
5122        assert_eq!(ctx.char_count('#'), 0);
5123        assert_eq!(ctx.char_count('*'), 0);
5124        assert_eq!(ctx.char_count('@'), 0);
5125        assert!(!ctx.has_char('#'));
5126        assert!(!ctx.has_char('@'));
5127    }
5128
5129    // =========================================================================
5130    // Tests for is_in_html_tag method
5131    // =========================================================================
5132
5133    #[test]
5134    fn test_is_in_html_tag_simple() {
5135        let content = "<div>content</div>";
5136        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5137
5138        // Inside opening tag
5139        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5140        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5141        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5142
5143        // Outside tag (in content)
5144        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5145        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5146
5147        // Inside closing tag
5148        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5149        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5150    }
5151
5152    #[test]
5153    fn test_is_in_html_tag_self_closing() {
5154        let content = "Text <br/> more text";
5155        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5156
5157        // Before tag
5158        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5159        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5160
5161        // Inside self-closing tag
5162        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5163        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5164        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5165
5166        // After tag
5167        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5168    }
5169
5170    #[test]
5171    fn test_is_in_html_tag_with_attributes() {
5172        let content = r#"<a href="url" class="link">text</a>"#;
5173        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5174
5175        // All positions inside opening tag with attributes
5176        assert!(ctx.is_in_html_tag(0), "Start of tag");
5177        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5178        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5179        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5180
5181        // Content between tags
5182        assert!(!ctx.is_in_html_tag(27), "Start of content");
5183        assert!(!ctx.is_in_html_tag(30), "End of content");
5184
5185        // Closing tag
5186        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5187    }
5188
5189    #[test]
5190    fn test_is_in_html_tag_multiline() {
5191        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5192        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5193
5194        // Opening tag spans multiple lines
5195        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5196        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5197        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5198
5199        // After closing > of opening tag
5200        let closing_bracket_pos = content.find(">\n").unwrap();
5201        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5202    }
5203
5204    #[test]
5205    fn test_is_in_html_tag_no_tags() {
5206        let content = "Plain text without any HTML";
5207        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5208
5209        // No position should be in an HTML tag
5210        for i in 0..content.len() {
5211            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5212        }
5213    }
5214
5215    // =========================================================================
5216    // Tests for is_in_jinja_range method
5217    // =========================================================================
5218
5219    #[test]
5220    fn test_is_in_jinja_range_expression() {
5221        let content = "Hello {{ name }}!";
5222        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5223
5224        // Before Jinja
5225        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5226        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5227
5228        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5229        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5230        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5231        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5232        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5233        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5234
5235        // After Jinja
5236        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5237    }
5238
5239    #[test]
5240    fn test_is_in_jinja_range_statement() {
5241        let content = "{% if condition %}content{% endif %}";
5242        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5243
5244        // Inside opening statement
5245        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5246        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5247        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5248
5249        // Content between
5250        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5251
5252        // Inside closing statement
5253        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5254        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5255    }
5256
5257    #[test]
5258    fn test_is_in_jinja_range_multiple() {
5259        let content = "{{ a }} and {{ b }}";
5260        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5261
5262        // First Jinja expression
5263        assert!(ctx.is_in_jinja_range(0));
5264        assert!(ctx.is_in_jinja_range(3));
5265        assert!(ctx.is_in_jinja_range(6));
5266
5267        // Between expressions
5268        assert!(!ctx.is_in_jinja_range(8));
5269        assert!(!ctx.is_in_jinja_range(11));
5270
5271        // Second Jinja expression
5272        assert!(ctx.is_in_jinja_range(12));
5273        assert!(ctx.is_in_jinja_range(15));
5274        assert!(ctx.is_in_jinja_range(18));
5275    }
5276
5277    #[test]
5278    fn test_is_in_jinja_range_no_jinja() {
5279        let content = "Plain text with single braces but not Jinja";
5280        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5281
5282        // No position should be in Jinja
5283        for i in 0..content.len() {
5284            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5285        }
5286    }
5287
5288    // =========================================================================
5289    // Tests for is_in_link_title method
5290    // =========================================================================
5291
5292    #[test]
5293    fn test_is_in_link_title_with_title() {
5294        let content = r#"[ref]: https://example.com "Title text"
5295
5296Some content."#;
5297        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5298
5299        // Verify we have a reference def with title
5300        assert_eq!(ctx.reference_defs.len(), 1);
5301        let def = &ctx.reference_defs[0];
5302        assert!(def.title_byte_start.is_some());
5303        assert!(def.title_byte_end.is_some());
5304
5305        let title_start = def.title_byte_start.unwrap();
5306        let title_end = def.title_byte_end.unwrap();
5307
5308        // Before title (in URL)
5309        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5310
5311        // Inside title
5312        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5313        assert!(
5314            ctx.is_in_link_title(title_start + 5),
5315            "Middle of title should be in title"
5316        );
5317        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5318
5319        // After title
5320        assert!(
5321            !ctx.is_in_link_title(title_end),
5322            "After title end should not be in title"
5323        );
5324    }
5325
5326    #[test]
5327    fn test_is_in_link_title_without_title() {
5328        let content = "[ref]: https://example.com\n\nSome content.";
5329        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5330
5331        // Reference def without title
5332        assert_eq!(ctx.reference_defs.len(), 1);
5333        let def = &ctx.reference_defs[0];
5334        assert!(def.title_byte_start.is_none());
5335        assert!(def.title_byte_end.is_none());
5336
5337        // No position should be in a title
5338        for i in 0..content.len() {
5339            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5340        }
5341    }
5342
5343    #[test]
5344    fn test_is_in_link_title_multiple_refs() {
5345        let content = r#"[ref1]: /url1 "Title One"
5346[ref2]: /url2
5347[ref3]: /url3 "Title Three"
5348"#;
5349        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5350
5351        // Should have 3 reference defs
5352        assert_eq!(ctx.reference_defs.len(), 3);
5353
5354        // ref1 has title
5355        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5356        assert!(ref1.title_byte_start.is_some());
5357
5358        // ref2 has no title
5359        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5360        assert!(ref2.title_byte_start.is_none());
5361
5362        // ref3 has title
5363        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5364        assert!(ref3.title_byte_start.is_some());
5365
5366        // Check positions in ref1's title
5367        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5368            assert!(ctx.is_in_link_title(start + 1));
5369            assert!(!ctx.is_in_link_title(end + 5));
5370        }
5371
5372        // Check positions in ref3's title
5373        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5374            assert!(ctx.is_in_link_title(start + 1));
5375        }
5376    }
5377
5378    #[test]
5379    fn test_is_in_link_title_single_quotes() {
5380        let content = "[ref]: /url 'Single quoted title'\n";
5381        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5382
5383        assert_eq!(ctx.reference_defs.len(), 1);
5384        let def = &ctx.reference_defs[0];
5385
5386        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5387            assert!(ctx.is_in_link_title(start));
5388            assert!(ctx.is_in_link_title(start + 5));
5389            assert!(!ctx.is_in_link_title(end));
5390        }
5391    }
5392
5393    #[test]
5394    fn test_is_in_link_title_parentheses() {
5395        // Note: The reference def parser may not support parenthesized titles
5396        // This test verifies the is_in_link_title method works when titles exist
5397        let content = "[ref]: /url (Parenthesized title)\n";
5398        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5399
5400        // Parser behavior: may or may not parse parenthesized titles
5401        // We test that is_in_link_title correctly reflects whatever was parsed
5402        if ctx.reference_defs.is_empty() {
5403            // Parser didn't recognize this as a reference def
5404            for i in 0..content.len() {
5405                assert!(!ctx.is_in_link_title(i));
5406            }
5407        } else {
5408            let def = &ctx.reference_defs[0];
5409            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5410                assert!(ctx.is_in_link_title(start));
5411                assert!(ctx.is_in_link_title(start + 5));
5412                assert!(!ctx.is_in_link_title(end));
5413            } else {
5414                // Title wasn't parsed, so no position should be in title
5415                for i in 0..content.len() {
5416                    assert!(!ctx.is_in_link_title(i));
5417                }
5418            }
5419        }
5420    }
5421
5422    #[test]
5423    fn test_is_in_link_title_no_refs() {
5424        let content = "Just plain text without any reference definitions.";
5425        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5426
5427        assert!(ctx.reference_defs.is_empty());
5428
5429        for i in 0..content.len() {
5430            assert!(!ctx.is_in_link_title(i));
5431        }
5432    }
5433
5434    // =========================================================================
5435    // Math span tests (Issue #289)
5436    // =========================================================================
5437
5438    #[test]
5439    fn test_math_spans_inline() {
5440        let content = "Text with inline math $[f](x)$ in it.";
5441        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5442
5443        let math_spans = ctx.math_spans();
5444        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5445
5446        let span = &math_spans[0];
5447        assert!(!span.is_display, "Should be inline math, not display");
5448        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5449    }
5450
5451    #[test]
5452    fn test_math_spans_display_single_line() {
5453        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5454        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5455
5456        let math_spans = ctx.math_spans();
5457        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5458
5459        let span = &math_spans[0];
5460        assert!(span.is_display, "Should be display math");
5461        assert!(
5462            span.content.contains("[x](\\zeta)"),
5463            "Content should contain the link-like pattern"
5464        );
5465    }
5466
5467    #[test]
5468    fn test_math_spans_display_multiline() {
5469        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5470        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5471
5472        let math_spans = ctx.math_spans();
5473        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5474
5475        let span = &math_spans[0];
5476        assert!(span.is_display, "Should be display math");
5477    }
5478
5479    #[test]
5480    fn test_is_in_math_span() {
5481        let content = "Text $[f](x)$ more text";
5482        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5483
5484        // Position inside the math span
5485        let math_start = content.find('$').unwrap();
5486        let math_end = content.rfind('$').unwrap() + 1;
5487
5488        assert!(
5489            ctx.is_in_math_span(math_start + 1),
5490            "Position inside math span should return true"
5491        );
5492        assert!(
5493            ctx.is_in_math_span(math_start + 3),
5494            "Position inside math span should return true"
5495        );
5496
5497        // Position outside the math span
5498        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5499        assert!(
5500            !ctx.is_in_math_span(math_end + 1),
5501            "Position after math span should return false"
5502        );
5503    }
5504
5505    #[test]
5506    fn test_math_spans_mixed_with_code() {
5507        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5508        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5509
5510        let math_spans = ctx.math_spans();
5511        let code_spans = ctx.code_spans();
5512
5513        assert_eq!(math_spans.len(), 1, "Should have one math span");
5514        assert_eq!(code_spans.len(), 1, "Should have one code span");
5515
5516        // Verify math span content
5517        assert_eq!(math_spans[0].content, "[f](x)");
5518        // Verify code span content
5519        assert_eq!(code_spans[0].content, "[g](y)");
5520    }
5521
5522    #[test]
5523    fn test_math_spans_no_math() {
5524        let content = "Regular text without any math at all.";
5525        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5526
5527        let math_spans = ctx.math_spans();
5528        assert!(math_spans.is_empty(), "Should have no math spans");
5529    }
5530
5531    #[test]
5532    fn test_math_spans_multiple() {
5533        let content = "First $a$ and second $b$ and display $$c$$";
5534        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5535
5536        let math_spans = ctx.math_spans();
5537        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5538
5539        // Two inline, one display
5540        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5541        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5542
5543        assert_eq!(inline_count, 2, "Should have two inline math spans");
5544        assert_eq!(display_count, 1, "Should have one display math span");
5545    }
5546
5547    #[test]
5548    fn test_is_in_math_span_boundary_positions() {
5549        // Test exact boundary positions: $[f](x)$
5550        // Byte positions:                0123456789
5551        let content = "$[f](x)$";
5552        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5553
5554        let math_spans = ctx.math_spans();
5555        assert_eq!(math_spans.len(), 1, "Should have one math span");
5556
5557        let span = &math_spans[0];
5558
5559        // Position at opening $ should be in span (byte 0)
5560        assert!(
5561            ctx.is_in_math_span(span.byte_offset),
5562            "Start position should be in span"
5563        );
5564
5565        // Position just inside should be in span
5566        assert!(
5567            ctx.is_in_math_span(span.byte_offset + 1),
5568            "Position after start should be in span"
5569        );
5570
5571        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5572        assert!(
5573            ctx.is_in_math_span(span.byte_end - 1),
5574            "Position at end-1 should be in span"
5575        );
5576
5577        // Position at byte_end should NOT be in span (exclusive end)
5578        assert!(
5579            !ctx.is_in_math_span(span.byte_end),
5580            "Position at byte_end should NOT be in span (exclusive)"
5581        );
5582    }
5583
5584    #[test]
5585    fn test_math_spans_at_document_start() {
5586        let content = "$x$ text";
5587        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5588
5589        let math_spans = ctx.math_spans();
5590        assert_eq!(math_spans.len(), 1);
5591        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5592    }
5593
5594    #[test]
5595    fn test_math_spans_at_document_end() {
5596        let content = "text $x$";
5597        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5598
5599        let math_spans = ctx.math_spans();
5600        assert_eq!(math_spans.len(), 1);
5601        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5602    }
5603
5604    #[test]
5605    fn test_math_spans_consecutive() {
5606        let content = "$a$$b$";
5607        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5608
5609        let math_spans = ctx.math_spans();
5610        // pulldown-cmark should parse these as separate spans
5611        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5612
5613        // All positions should be in some math span
5614        for i in 0..content.len() {
5615            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5616        }
5617    }
5618
5619    #[test]
5620    fn test_math_spans_currency_not_math() {
5621        // Unbalanced $ should not create math spans
5622        let content = "Price is $100";
5623        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5624
5625        let math_spans = ctx.math_spans();
5626        // pulldown-cmark requires balanced delimiters for math
5627        // $100 alone is not math
5628        assert!(
5629            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5630            "Unbalanced $ should not create math span containing 100"
5631        );
5632    }
5633
5634    // =========================================================================
5635    // Tests for O(1) reference definition lookups via HashMap
5636    // =========================================================================
5637
5638    #[test]
5639    fn test_reference_lookup_o1_basic() {
5640        let content = r#"[ref1]: /url1
5641[REF2]: /url2 "Title"
5642[Ref3]: /url3
5643
5644Use [link][ref1] and [link][REF2]."#;
5645        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5646
5647        // Verify we have 3 reference defs
5648        assert_eq!(ctx.reference_defs.len(), 3);
5649
5650        // Test get_reference_url with various cases
5651        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5652        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5653        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5654        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5655        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5656        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5657        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5658    }
5659
5660    #[test]
5661    fn test_reference_lookup_o1_get_reference_def() {
5662        let content = r#"[myref]: https://example.com "My Title"
5663"#;
5664        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5665
5666        // Test get_reference_def
5667        let def = ctx.get_reference_def("myref").expect("Should find myref");
5668        assert_eq!(def.url, "https://example.com");
5669        assert_eq!(def.title.as_deref(), Some("My Title"));
5670
5671        // Case insensitive
5672        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5673        assert_eq!(def2.url, "https://example.com");
5674
5675        // Non-existent
5676        assert!(ctx.get_reference_def("nonexistent").is_none());
5677    }
5678
5679    #[test]
5680    fn test_reference_lookup_o1_has_reference_def() {
5681        let content = r#"[foo]: /foo
5682[BAR]: /bar
5683"#;
5684        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5685
5686        // Test has_reference_def
5687        assert!(ctx.has_reference_def("foo"));
5688        assert!(ctx.has_reference_def("FOO")); // case insensitive
5689        assert!(ctx.has_reference_def("bar"));
5690        assert!(ctx.has_reference_def("Bar")); // case insensitive
5691        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5692    }
5693
5694    #[test]
5695    fn test_reference_lookup_o1_empty_content() {
5696        let content = "No references here.";
5697        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5698
5699        assert!(ctx.reference_defs.is_empty());
5700        assert_eq!(ctx.get_reference_url("anything"), None);
5701        assert!(ctx.get_reference_def("anything").is_none());
5702        assert!(!ctx.has_reference_def("anything"));
5703    }
5704
5705    #[test]
5706    fn test_reference_lookup_o1_special_characters_in_id() {
5707        let content = r#"[ref-with-dash]: /url1
5708[ref_with_underscore]: /url2
5709[ref.with.dots]: /url3
5710"#;
5711        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5712
5713        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5714        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5715        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5716    }
5717
5718    #[test]
5719    fn test_reference_lookup_o1_unicode_id() {
5720        let content = r#"[日本語]: /japanese
5721[émoji]: /emoji
5722"#;
5723        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5724
5725        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5726        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5727        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5728    }
5729}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs