rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14/// Macro for profiling sections - only active in non-WASM builds
15#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17    ($name:expr, $profile:expr, $code:expr) => {{
18        let start = std::time::Instant::now();
19        let result = $code;
20        if $profile {
21            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22        }
23        result
24    }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32// Comprehensive link pattern that captures both inline and reference links
33// Use (?s) flag to make . match newlines
34static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(
36        r#"(?sx)
37        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38        (?:
39            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
40            |
41            \[([^\]]*)\]      # Reference ID in group 6
42        )"#
43    ).unwrap()
44});
45
46// Image pattern (similar to links but with ! prefix)
47// Use (?s) flag to make . match newlines
48static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r#"(?sx)
51        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52        (?:
53            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
54            |
55            \[([^\]]*)\]      # Reference ID in group 6
56        )"#
57    ).unwrap()
58});
59
60// Reference definition pattern
61static REF_DEF_PATTERN: LazyLock<Regex> =
62    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64// Pattern for bare URLs - uses centralized URL pattern from regex_cache
65
66// Pattern for email addresses
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70// Pattern for blockquote prefix in parse_list_blocks
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73/// Pre-computed information about a line
74#[derive(Debug, Clone)]
75pub struct LineInfo {
76    /// Byte offset where this line starts in the document
77    pub byte_offset: usize,
78    /// Length of the line in bytes (without newline)
79    pub byte_len: usize,
80    /// Number of bytes of leading whitespace (for substring extraction)
81    pub indent: usize,
82    /// Visual column width of leading whitespace (with proper tab expansion)
83    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
84    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
85    pub visual_indent: usize,
86    /// Whether the line is blank (empty or only whitespace)
87    pub is_blank: bool,
88    /// Whether this line is inside a code block
89    pub in_code_block: bool,
90    /// Whether this line is inside front matter
91    pub in_front_matter: bool,
92    /// Whether this line is inside an HTML block
93    pub in_html_block: bool,
94    /// Whether this line is inside an HTML comment
95    pub in_html_comment: bool,
96    /// List item information if this line starts a list item
97    pub list_item: Option<ListItemInfo>,
98    /// Heading information if this line is a heading
99    pub heading: Option<HeadingInfo>,
100    /// Blockquote information if this line is a blockquote
101    pub blockquote: Option<BlockquoteInfo>,
102    /// Whether this line is inside a mkdocstrings autodoc block
103    pub in_mkdocstrings: bool,
104    /// Whether this line is part of an ESM import/export block (MDX only)
105    pub in_esm_block: bool,
106    /// Whether this line is a continuation of a multi-line code span from a previous line
107    pub in_code_span_continuation: bool,
108    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
109    /// Pre-computed for consistent detection across all rules
110    pub is_horizontal_rule: bool,
111    /// Whether this line is inside a math block ($$ ... $$)
112    pub in_math_block: bool,
113    /// Whether this line is inside a Quarto div block (::: ... :::)
114    pub in_quarto_div: bool,
115    /// Whether this line contains or is inside a JSX expression (MDX only)
116    pub in_jsx_expression: bool,
117    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
118    pub in_mdx_comment: bool,
119    /// Whether this line is inside a JSX component (MDX only)
120    pub in_jsx_component: bool,
121    /// Whether this line is inside a JSX fragment (MDX only)
122    pub in_jsx_fragment: bool,
123    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
124    pub in_admonition: bool,
125    /// Whether this line is inside an MkDocs content tab block (===)
126    pub in_content_tab: bool,
127    /// Whether this line is a definition list item (: definition)
128    pub in_definition_list: bool,
129}
130
131impl LineInfo {
132    /// Get the line content as a string slice from the source document
133    pub fn content<'a>(&self, source: &'a str) -> &'a str {
134        &source[self.byte_offset..self.byte_offset + self.byte_len]
135    }
136}
137
138/// Information about a list item
139#[derive(Debug, Clone)]
140pub struct ListItemInfo {
141    /// The marker used (*, -, +, or number with . or ))
142    pub marker: String,
143    /// Whether it's ordered (true) or unordered (false)
144    pub is_ordered: bool,
145    /// The number for ordered lists
146    pub number: Option<usize>,
147    /// Column where the marker starts (0-based)
148    pub marker_column: usize,
149    /// Column where content after marker starts
150    pub content_column: usize,
151}
152
153/// Heading style type
154#[derive(Debug, Clone, PartialEq)]
155pub enum HeadingStyle {
156    /// ATX style heading (# Heading)
157    ATX,
158    /// Setext style heading with = underline
159    Setext1,
160    /// Setext style heading with - underline
161    Setext2,
162}
163
164/// Parsed link information
165#[derive(Debug, Clone)]
166pub struct ParsedLink<'a> {
167    /// Line number (1-indexed)
168    pub line: usize,
169    /// Start column (0-indexed) in the line
170    pub start_col: usize,
171    /// End column (0-indexed) in the line
172    pub end_col: usize,
173    /// Byte offset in document
174    pub byte_offset: usize,
175    /// End byte offset in document
176    pub byte_end: usize,
177    /// Link text
178    pub text: Cow<'a, str>,
179    /// Link URL or reference
180    pub url: Cow<'a, str>,
181    /// Whether this is a reference link [text][ref] vs inline [text](url)
182    pub is_reference: bool,
183    /// Reference ID for reference links
184    pub reference_id: Option<Cow<'a, str>>,
185    /// Link type from pulldown-cmark
186    pub link_type: LinkType,
187}
188
189/// Information about a broken link reported by pulldown-cmark
190#[derive(Debug, Clone)]
191pub struct BrokenLinkInfo {
192    /// The reference text that couldn't be resolved
193    pub reference: String,
194    /// Byte span in the source document
195    pub span: std::ops::Range<usize>,
196}
197
198/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
199#[derive(Debug, Clone)]
200pub struct FootnoteRef {
201    /// The footnote ID (without the ^ prefix)
202    pub id: String,
203    /// Line number (1-indexed)
204    pub line: usize,
205    /// Start byte offset in document
206    pub byte_offset: usize,
207    /// End byte offset in document
208    pub byte_end: usize,
209}
210
211/// Parsed image information
212#[derive(Debug, Clone)]
213pub struct ParsedImage<'a> {
214    /// Line number (1-indexed)
215    pub line: usize,
216    /// Start column (0-indexed) in the line
217    pub start_col: usize,
218    /// End column (0-indexed) in the line
219    pub end_col: usize,
220    /// Byte offset in document
221    pub byte_offset: usize,
222    /// End byte offset in document
223    pub byte_end: usize,
224    /// Alt text
225    pub alt_text: Cow<'a, str>,
226    /// Image URL or reference
227    pub url: Cow<'a, str>,
228    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
229    pub is_reference: bool,
230    /// Reference ID for reference images
231    pub reference_id: Option<Cow<'a, str>>,
232    /// Link type from pulldown-cmark
233    pub link_type: LinkType,
234}
235
236/// Reference definition [ref]: url "title"
237#[derive(Debug, Clone)]
238pub struct ReferenceDef {
239    /// Line number (1-indexed)
240    pub line: usize,
241    /// Reference ID (normalized to lowercase)
242    pub id: String,
243    /// URL
244    pub url: String,
245    /// Optional title
246    pub title: Option<String>,
247    /// Byte offset where the reference definition starts
248    pub byte_offset: usize,
249    /// Byte offset where the reference definition ends
250    pub byte_end: usize,
251    /// Byte offset where the title starts (if present, includes quote)
252    pub title_byte_start: Option<usize>,
253    /// Byte offset where the title ends (if present, includes quote)
254    pub title_byte_end: Option<usize>,
255}
256
257/// Parsed code span information
258#[derive(Debug, Clone)]
259pub struct CodeSpan {
260    /// Line number where the code span starts (1-indexed)
261    pub line: usize,
262    /// Line number where the code span ends (1-indexed)
263    pub end_line: usize,
264    /// Start column (0-indexed) in the line
265    pub start_col: usize,
266    /// End column (0-indexed) in the line
267    pub end_col: usize,
268    /// Byte offset in document
269    pub byte_offset: usize,
270    /// End byte offset in document
271    pub byte_end: usize,
272    /// Number of backticks used (1, 2, 3, etc.)
273    pub backtick_count: usize,
274    /// Content inside the code span (without backticks)
275    pub content: String,
276}
277
278/// Parsed math span information (inline $...$ or display $$...$$)
279#[derive(Debug, Clone)]
280pub struct MathSpan {
281    /// Line number where the math span starts (1-indexed)
282    pub line: usize,
283    /// Line number where the math span ends (1-indexed)
284    pub end_line: usize,
285    /// Start column (0-indexed) in the line
286    pub start_col: usize,
287    /// End column (0-indexed) in the line
288    pub end_col: usize,
289    /// Byte offset in document
290    pub byte_offset: usize,
291    /// End byte offset in document
292    pub byte_end: usize,
293    /// Whether this is display math ($$...$$) vs inline ($...$)
294    pub is_display: bool,
295    /// Content inside the math delimiters
296    pub content: String,
297}
298
299/// Information about a heading
300#[derive(Debug, Clone)]
301pub struct HeadingInfo {
302    /// Heading level (1-6 for ATX, 1-2 for Setext)
303    pub level: u8,
304    /// Style of heading
305    pub style: HeadingStyle,
306    /// The heading marker (# characters or underline)
307    pub marker: String,
308    /// Column where the marker starts (0-based)
309    pub marker_column: usize,
310    /// Column where heading text starts
311    pub content_column: usize,
312    /// The heading text (without markers and without custom ID syntax)
313    pub text: String,
314    /// Custom header ID if present (e.g., from {#custom-id} syntax)
315    pub custom_id: Option<String>,
316    /// Original heading text including custom ID syntax
317    pub raw_text: String,
318    /// Whether it has a closing sequence (for ATX)
319    pub has_closing_sequence: bool,
320    /// The closing sequence if present
321    pub closing_sequence: String,
322    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
323    /// False for malformed headings like `#NoSpace` that MD018 should flag
324    pub is_valid: bool,
325}
326
327/// A valid heading from a filtered iteration
328///
329/// Only includes headings that are CommonMark-compliant (have space after #).
330/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
331#[derive(Debug, Clone)]
332pub struct ValidHeading<'a> {
333    /// The 1-indexed line number in the document
334    pub line_num: usize,
335    /// Reference to the heading information
336    pub heading: &'a HeadingInfo,
337    /// Reference to the full line info (for rules that need additional context)
338    pub line_info: &'a LineInfo,
339}
340
341/// Iterator over valid CommonMark headings in a document
342///
343/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
344/// but should not be processed by other heading rules.
345pub struct ValidHeadingsIter<'a> {
346    lines: &'a [LineInfo],
347    current_index: usize,
348}
349
350impl<'a> ValidHeadingsIter<'a> {
351    fn new(lines: &'a [LineInfo]) -> Self {
352        Self {
353            lines,
354            current_index: 0,
355        }
356    }
357}
358
359impl<'a> Iterator for ValidHeadingsIter<'a> {
360    type Item = ValidHeading<'a>;
361
362    fn next(&mut self) -> Option<Self::Item> {
363        while self.current_index < self.lines.len() {
364            let idx = self.current_index;
365            self.current_index += 1;
366
367            let line_info = &self.lines[idx];
368            if let Some(heading) = &line_info.heading
369                && heading.is_valid
370            {
371                return Some(ValidHeading {
372                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
373                    heading,
374                    line_info,
375                });
376            }
377        }
378        None
379    }
380}
381
382/// Information about a blockquote line
383#[derive(Debug, Clone)]
384pub struct BlockquoteInfo {
385    /// Nesting level (1 for >, 2 for >>, etc.)
386    pub nesting_level: usize,
387    /// The indentation before the blockquote marker
388    pub indent: String,
389    /// Column where the first > starts (0-based)
390    pub marker_column: usize,
391    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
392    pub prefix: String,
393    /// Content after the blockquote marker(s)
394    pub content: String,
395    /// Whether the line has no space after the marker
396    pub has_no_space_after_marker: bool,
397    /// Whether the line has multiple spaces after the marker
398    pub has_multiple_spaces_after_marker: bool,
399    /// Whether this is an empty blockquote line needing MD028 fix
400    pub needs_md028_fix: bool,
401}
402
403/// Information about a list block
404#[derive(Debug, Clone)]
405pub struct ListBlock {
406    /// Line number where the list starts (1-indexed)
407    pub start_line: usize,
408    /// Line number where the list ends (1-indexed)
409    pub end_line: usize,
410    /// Whether it's ordered or unordered
411    pub is_ordered: bool,
412    /// The consistent marker for unordered lists (if any)
413    pub marker: Option<String>,
414    /// Blockquote prefix for this list (empty if not in blockquote)
415    pub blockquote_prefix: String,
416    /// Lines that are list items within this block
417    pub item_lines: Vec<usize>,
418    /// Nesting level (0 for top-level lists)
419    pub nesting_level: usize,
420    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
421    pub max_marker_width: usize,
422}
423
424use std::sync::{Arc, OnceLock};
425
426/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
427type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
428
429/// Type alias for byte ranges used in JSX expression and MDX comment detection
430type ByteRanges = Vec<(usize, usize)>;
431
432/// Character frequency data for fast content analysis
433#[derive(Debug, Clone, Default)]
434pub struct CharFrequency {
435    /// Count of # characters (headings)
436    pub hash_count: usize,
437    /// Count of * characters (emphasis, lists, horizontal rules)
438    pub asterisk_count: usize,
439    /// Count of _ characters (emphasis, horizontal rules)
440    pub underscore_count: usize,
441    /// Count of - characters (lists, horizontal rules, setext headings)
442    pub hyphen_count: usize,
443    /// Count of + characters (lists)
444    pub plus_count: usize,
445    /// Count of > characters (blockquotes)
446    pub gt_count: usize,
447    /// Count of | characters (tables)
448    pub pipe_count: usize,
449    /// Count of [ characters (links, images)
450    pub bracket_count: usize,
451    /// Count of ` characters (code spans, code blocks)
452    pub backtick_count: usize,
453    /// Count of < characters (HTML tags, autolinks)
454    pub lt_count: usize,
455    /// Count of ! characters (images)
456    pub exclamation_count: usize,
457    /// Count of newline characters
458    pub newline_count: usize,
459}
460
461/// Pre-parsed HTML tag information
462#[derive(Debug, Clone)]
463pub struct HtmlTag {
464    /// Line number (1-indexed)
465    pub line: usize,
466    /// Start column (0-indexed) in the line
467    pub start_col: usize,
468    /// End column (0-indexed) in the line
469    pub end_col: usize,
470    /// Byte offset in document
471    pub byte_offset: usize,
472    /// End byte offset in document
473    pub byte_end: usize,
474    /// Tag name (e.g., "div", "img", "br")
475    pub tag_name: String,
476    /// Whether it's a closing tag (`</tag>`)
477    pub is_closing: bool,
478    /// Whether it's self-closing (`<tag />`)
479    pub is_self_closing: bool,
480    /// Raw tag content
481    pub raw_content: String,
482}
483
484/// Pre-parsed emphasis span information
485#[derive(Debug, Clone)]
486pub struct EmphasisSpan {
487    /// Line number (1-indexed)
488    pub line: usize,
489    /// Start column (0-indexed) in the line
490    pub start_col: usize,
491    /// End column (0-indexed) in the line
492    pub end_col: usize,
493    /// Byte offset in document
494    pub byte_offset: usize,
495    /// End byte offset in document
496    pub byte_end: usize,
497    /// Type of emphasis ('*' or '_')
498    pub marker: char,
499    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
500    pub marker_count: usize,
501    /// Content inside the emphasis
502    pub content: String,
503}
504
505/// Pre-parsed table row information
506#[derive(Debug, Clone)]
507pub struct TableRow {
508    /// Line number (1-indexed)
509    pub line: usize,
510    /// Whether this is a separator row (contains only |, -, :, and spaces)
511    pub is_separator: bool,
512    /// Number of columns (pipe-separated cells)
513    pub column_count: usize,
514    /// Alignment info from separator row
515    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
516}
517
518/// Pre-parsed bare URL information (not in links)
519#[derive(Debug, Clone)]
520pub struct BareUrl {
521    /// Line number (1-indexed)
522    pub line: usize,
523    /// Start column (0-indexed) in the line
524    pub start_col: usize,
525    /// End column (0-indexed) in the line
526    pub end_col: usize,
527    /// Byte offset in document
528    pub byte_offset: usize,
529    /// End byte offset in document
530    pub byte_end: usize,
531    /// The URL string
532    pub url: String,
533    /// Type of URL ("http", "https", "ftp", "email")
534    pub url_type: String,
535}
536
537pub struct LintContext<'a> {
538    pub content: &'a str,
539    pub line_offsets: Vec<usize>,
540    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
541    pub lines: Vec<LineInfo>,             // Pre-computed line information
542    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
543    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
544    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
545    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
546    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
547    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
548    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
549    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
550    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
551    pub char_frequency: CharFrequency,    // Character frequency analysis
552    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
553    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
554    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
555    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
556    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
557    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
558    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
559    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
560    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
561    pub flavor: MarkdownFlavor,           // Markdown flavor being used
562    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
563    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
564    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
565    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
566    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
567    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
568}
569
570/// Detailed blockquote parse result with all components
571struct BlockquoteComponents<'a> {
572    indent: &'a str,
573    markers: &'a str,
574    spaces_after: &'a str,
575    content: &'a str,
576}
577
578/// Parse blockquote prefix with detailed components using manual parsing
579#[inline]
580fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
581    let bytes = line.as_bytes();
582    let mut pos = 0;
583
584    // Parse leading whitespace (indent)
585    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
586        pos += 1;
587    }
588    let indent_end = pos;
589
590    // Must have at least one '>' marker
591    if pos >= bytes.len() || bytes[pos] != b'>' {
592        return None;
593    }
594
595    // Parse '>' markers
596    while pos < bytes.len() && bytes[pos] == b'>' {
597        pos += 1;
598    }
599    let markers_end = pos;
600
601    // Parse spaces after markers
602    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
603        pos += 1;
604    }
605    let spaces_end = pos;
606
607    Some(BlockquoteComponents {
608        indent: &line[0..indent_end],
609        markers: &line[indent_end..markers_end],
610        spaces_after: &line[markers_end..spaces_end],
611        content: &line[spaces_end..],
612    })
613}
614
615impl<'a> LintContext<'a> {
616    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
617        #[cfg(not(target_arch = "wasm32"))]
618        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
619        #[cfg(target_arch = "wasm32")]
620        let profile = false;
621
622        let line_offsets = profile_section!("Line offsets", profile, {
623            let mut offsets = vec![0];
624            for (i, c) in content.char_indices() {
625                if c == '\n' {
626                    offsets.push(i + 1);
627                }
628            }
629            offsets
630        });
631
632        // Detect code blocks and code spans once and cache them
633        let (code_blocks, code_span_ranges) = profile_section!(
634            "Code blocks",
635            profile,
636            CodeBlockUtils::detect_code_blocks_and_spans(content)
637        );
638
639        // Pre-compute HTML comment ranges ONCE for all operations
640        let html_comment_ranges = profile_section!(
641            "HTML comment ranges",
642            profile,
643            crate::utils::skip_context::compute_html_comment_ranges(content)
644        );
645
646        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
647        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
648            if flavor == MarkdownFlavor::MkDocs {
649                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
650            } else {
651                Vec::new()
652            }
653        });
654
655        // Pre-compute Quarto div block ranges for Quarto flavor
656        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
657            if flavor == MarkdownFlavor::Quarto {
658                crate::utils::quarto_divs::detect_div_block_ranges(content)
659            } else {
660                Vec::new()
661            }
662        });
663
664        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
665        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
666        let (mut lines, emphasis_spans) = profile_section!(
667            "Basic line info",
668            profile,
669            Self::compute_basic_line_info(
670                content,
671                &line_offsets,
672                &code_blocks,
673                flavor,
674                &html_comment_ranges,
675                &autodoc_ranges,
676                &quarto_div_ranges,
677            )
678        );
679
680        // Detect HTML blocks BEFORE heading detection
681        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
682
683        // Detect ESM import/export blocks in MDX files BEFORE heading detection
684        profile_section!(
685            "ESM blocks",
686            profile,
687            Self::detect_esm_blocks(content, &mut lines, flavor)
688        );
689
690        // Detect JSX expressions and MDX comments in MDX files
691        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
692            "JSX/MDX detection",
693            profile,
694            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
695        );
696
697        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
698        profile_section!(
699            "MkDocs constructs",
700            profile,
701            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
702        );
703
704        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
705        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
706
707        // Now detect headings and blockquotes
708        profile_section!(
709            "Headings & blockquotes",
710            profile,
711            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
712        );
713
714        // Parse code spans early so we can exclude them from link/image parsing
715        let code_spans = profile_section!(
716            "Code spans",
717            profile,
718            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
719        );
720
721        // Mark lines that are continuations of multi-line code spans
722        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
723        for span in &code_spans {
724            if span.end_line > span.line {
725                // Mark lines after the first line as continuations
726                for line_num in (span.line + 1)..=span.end_line {
727                    if let Some(line_info) = lines.get_mut(line_num - 1) {
728                        line_info.in_code_span_continuation = true;
729                    }
730                }
731            }
732        }
733
734        // Parse links, images, references, and list blocks
735        let (links, broken_links, footnote_refs) = profile_section!(
736            "Links",
737            profile,
738            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
739        );
740
741        let images = profile_section!(
742            "Images",
743            profile,
744            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
745        );
746
747        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
748
749        // Build O(1) lookup map for reference definitions by lowercase ID
750        let reference_defs_map: HashMap<String, usize> = reference_defs
751            .iter()
752            .enumerate()
753            .map(|(idx, def)| (def.id.to_lowercase(), idx))
754            .collect();
755
756        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
757
758        // Compute character frequency for fast content analysis
759        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
760
761        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
762        let table_blocks = profile_section!(
763            "Table blocks",
764            profile,
765            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
766                content,
767                &code_blocks,
768                &code_spans,
769                &html_comment_ranges,
770            )
771        );
772
773        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
774        let line_index = profile_section!(
775            "Line index",
776            profile,
777            crate::utils::range_utils::LineIndex::new(content)
778        );
779
780        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
781        let jinja_ranges = profile_section!(
782            "Jinja ranges",
783            profile,
784            crate::utils::jinja_utils::find_jinja_ranges(content)
785        );
786
787        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
788        let citation_ranges = profile_section!("Citation ranges", profile, {
789            if flavor == MarkdownFlavor::Quarto {
790                crate::utils::quarto_divs::find_citation_ranges(content)
791            } else {
792                Vec::new()
793            }
794        });
795
796        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
797        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
798            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
799            let mut ranges = Vec::new();
800            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
801                ranges.push((mat.start(), mat.end()));
802            }
803            ranges
804        });
805
806        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
807
808        Self {
809            content,
810            line_offsets,
811            code_blocks,
812            lines,
813            links,
814            images,
815            broken_links,
816            footnote_refs,
817            reference_defs,
818            reference_defs_map,
819            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
820            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
821            list_blocks,
822            char_frequency,
823            html_tags_cache: OnceLock::new(),
824            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
825            table_rows_cache: OnceLock::new(),
826            bare_urls_cache: OnceLock::new(),
827            has_mixed_list_nesting_cache: OnceLock::new(),
828            html_comment_ranges,
829            table_blocks,
830            line_index,
831            jinja_ranges,
832            flavor,
833            source_file,
834            jsx_expression_ranges,
835            mdx_comment_ranges,
836            citation_ranges,
837            shortcode_ranges,
838            inline_config,
839        }
840    }
841
842    /// Check if a rule is disabled at a specific line number (1-indexed)
843    ///
844    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
845    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
846    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
847        self.inline_config.is_rule_disabled(rule_name, line_number)
848    }
849
850    /// Get code spans - computed lazily on first access
851    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
852        Arc::clone(
853            self.code_spans_cache
854                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
855        )
856    }
857
858    /// Get math spans - computed lazily on first access
859    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
860        Arc::clone(
861            self.math_spans_cache
862                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
863        )
864    }
865
866    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
867    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
868        let math_spans = self.math_spans();
869        math_spans
870            .iter()
871            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
872    }
873
874    /// Get HTML comment ranges - pre-computed during LintContext construction
875    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
876        &self.html_comment_ranges
877    }
878
879    /// Get HTML tags - computed lazily on first access
880    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
881        Arc::clone(self.html_tags_cache.get_or_init(|| {
882            Arc::new(Self::parse_html_tags(
883                self.content,
884                &self.lines,
885                &self.code_blocks,
886                self.flavor,
887            ))
888        }))
889    }
890
891    /// Get emphasis spans - pre-computed during construction
892    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
893        Arc::clone(
894            self.emphasis_spans_cache
895                .get()
896                .expect("emphasis_spans_cache initialized during construction"),
897        )
898    }
899
900    /// Get table rows - computed lazily on first access
901    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
902        Arc::clone(
903            self.table_rows_cache
904                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
905        )
906    }
907
908    /// Get bare URLs - computed lazily on first access
909    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
910        Arc::clone(
911            self.bare_urls_cache
912                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
913        )
914    }
915
916    /// Check if document has mixed ordered/unordered list nesting.
917    /// Result is cached after first computation (document-level invariant).
918    /// This is used by MD007 for smart style auto-detection.
919    pub fn has_mixed_list_nesting(&self) -> bool {
920        *self
921            .has_mixed_list_nesting_cache
922            .get_or_init(|| self.compute_mixed_list_nesting())
923    }
924
925    /// Internal computation for mixed list nesting (only called once per LintContext).
926    fn compute_mixed_list_nesting(&self) -> bool {
927        // Track parent list items by their marker position and type
928        // Using marker_column instead of indent because it works correctly
929        // for blockquoted content where indent doesn't account for the prefix
930        // Stack stores: (marker_column, is_ordered)
931        let mut stack: Vec<(usize, bool)> = Vec::new();
932        let mut last_was_blank = false;
933
934        for line_info in &self.lines {
935            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
936            if line_info.in_code_block
937                || line_info.in_front_matter
938                || line_info.in_mkdocstrings
939                || line_info.in_html_comment
940                || line_info.in_esm_block
941            {
942                continue;
943            }
944
945            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
946            if line_info.is_blank {
947                last_was_blank = true;
948                continue;
949            }
950
951            if let Some(list_item) = &line_info.list_item {
952                // Normalize column 1 to column 0 (consistent with MD007 check function)
953                let current_pos = if list_item.marker_column == 1 {
954                    0
955                } else {
956                    list_item.marker_column
957                };
958
959                // If there was a blank line and this item is at root level, reset stack
960                if last_was_blank && current_pos == 0 {
961                    stack.clear();
962                }
963                last_was_blank = false;
964
965                // Pop items at same or greater position (they're siblings or deeper, not parents)
966                while let Some(&(pos, _)) = stack.last() {
967                    if pos >= current_pos {
968                        stack.pop();
969                    } else {
970                        break;
971                    }
972                }
973
974                // Check if immediate parent has different type - this is mixed nesting
975                if let Some(&(_, parent_is_ordered)) = stack.last()
976                    && parent_is_ordered != list_item.is_ordered
977                {
978                    return true; // Found mixed nesting - early exit
979                }
980
981                stack.push((current_pos, list_item.is_ordered));
982            } else {
983                // Non-list line (but not blank) - could be paragraph or other content
984                last_was_blank = false;
985            }
986        }
987
988        false
989    }
990
991    /// Map a byte offset to (line, column)
992    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
993        match self.line_offsets.binary_search(&offset) {
994            Ok(line) => (line + 1, 1),
995            Err(line) => {
996                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
997                (line, offset - line_start + 1)
998            }
999        }
1000    }
1001
1002    /// Check if a position is within a code block or code span
1003    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1004        // Check code blocks first
1005        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1006            return true;
1007        }
1008
1009        // Check inline code spans (lazy load if needed)
1010        self.code_spans()
1011            .iter()
1012            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1013    }
1014
1015    /// Get line information by line number (1-indexed)
1016    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1017        if line_num > 0 {
1018            self.lines.get(line_num - 1)
1019        } else {
1020            None
1021        }
1022    }
1023
1024    /// Get byte offset for a line number (1-indexed)
1025    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1026        self.line_info(line_num).map(|info| info.byte_offset)
1027    }
1028
1029    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1030    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1031        let normalized_id = ref_id.to_lowercase();
1032        self.reference_defs_map
1033            .get(&normalized_id)
1034            .map(|&idx| self.reference_defs[idx].url.as_str())
1035    }
1036
1037    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1038    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1039        let normalized_id = ref_id.to_lowercase();
1040        self.reference_defs_map
1041            .get(&normalized_id)
1042            .map(|&idx| &self.reference_defs[idx])
1043    }
1044
1045    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1046    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1047        let normalized_id = ref_id.to_lowercase();
1048        self.reference_defs_map.contains_key(&normalized_id)
1049    }
1050
1051    /// Check if a line is part of a list block
1052    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1053        self.list_blocks
1054            .iter()
1055            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1056    }
1057
1058    /// Get the list block containing a specific line
1059    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1060        self.list_blocks
1061            .iter()
1062            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1063    }
1064
1065    // Compatibility methods for DocumentStructure migration
1066
1067    /// Check if a line is within a code block
1068    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1069        if line_num == 0 || line_num > self.lines.len() {
1070            return false;
1071        }
1072        self.lines[line_num - 1].in_code_block
1073    }
1074
1075    /// Check if a line is within front matter
1076    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1077        if line_num == 0 || line_num > self.lines.len() {
1078            return false;
1079        }
1080        self.lines[line_num - 1].in_front_matter
1081    }
1082
1083    /// Check if a line is within an HTML block
1084    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1085        if line_num == 0 || line_num > self.lines.len() {
1086            return false;
1087        }
1088        self.lines[line_num - 1].in_html_block
1089    }
1090
1091    /// Check if a line and column is within a code span
1092    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1093        if line_num == 0 || line_num > self.lines.len() {
1094            return false;
1095        }
1096
1097        // Use the code spans cache to check
1098        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1099        // Convert col to 0-indexed for comparison
1100        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1101        let code_spans = self.code_spans();
1102        code_spans.iter().any(|span| {
1103            // Check if line is within the span's line range
1104            if line_num < span.line || line_num > span.end_line {
1105                return false;
1106            }
1107
1108            if span.line == span.end_line {
1109                // Single-line span: check column bounds
1110                col_0indexed >= span.start_col && col_0indexed < span.end_col
1111            } else if line_num == span.line {
1112                // First line of multi-line span: anything after start_col is in span
1113                col_0indexed >= span.start_col
1114            } else if line_num == span.end_line {
1115                // Last line of multi-line span: anything before end_col is in span
1116                col_0indexed < span.end_col
1117            } else {
1118                // Middle line of multi-line span: entire line is in span
1119                true
1120            }
1121        })
1122    }
1123
1124    /// Check if a byte offset is within a code span
1125    #[inline]
1126    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1127        let code_spans = self.code_spans();
1128        code_spans
1129            .iter()
1130            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1131    }
1132
1133    /// Check if a byte position is within a reference definition
1134    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1135    #[inline]
1136    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1137        self.reference_defs
1138            .iter()
1139            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1140    }
1141
1142    /// Check if a byte position is within an HTML comment
1143    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1144    /// where k is the number of HTML comments (typically very small)
1145    #[inline]
1146    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1147        self.html_comment_ranges
1148            .iter()
1149            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1150    }
1151
1152    /// Check if a byte position is within an HTML tag (including multiline tags)
1153    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1154    #[inline]
1155    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1156        self.html_tags()
1157            .iter()
1158            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1159    }
1160
1161    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1162    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1163        self.jinja_ranges
1164            .iter()
1165            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1166    }
1167
1168    /// Check if a byte position is within a JSX expression (MDX: {expression})
1169    #[inline]
1170    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1171        self.jsx_expression_ranges
1172            .iter()
1173            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174    }
1175
1176    /// Check if a byte position is within an MDX comment ({/* ... */})
1177    #[inline]
1178    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1179        self.mdx_comment_ranges
1180            .iter()
1181            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182    }
1183
1184    /// Get all JSX expression byte ranges
1185    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1186        &self.jsx_expression_ranges
1187    }
1188
1189    /// Get all MDX comment byte ranges
1190    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1191        &self.mdx_comment_ranges
1192    }
1193
1194    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1195    /// Only active in Quarto flavor
1196    #[inline]
1197    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1198        self.citation_ranges
1199            .iter()
1200            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201    }
1202
1203    /// Get all citation byte ranges (Quarto flavor only)
1204    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1205        &self.citation_ranges
1206    }
1207
1208    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1209    #[inline]
1210    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1211        self.shortcode_ranges
1212            .iter()
1213            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214    }
1215
1216    /// Get all shortcode byte ranges
1217    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1218        &self.shortcode_ranges
1219    }
1220
1221    /// Check if a byte position is within a link reference definition title
1222    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1223        self.reference_defs.iter().any(|def| {
1224            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1225                byte_pos >= start && byte_pos < end
1226            } else {
1227                false
1228            }
1229        })
1230    }
1231
1232    /// Check if content has any instances of a specific character (fast)
1233    pub fn has_char(&self, ch: char) -> bool {
1234        match ch {
1235            '#' => self.char_frequency.hash_count > 0,
1236            '*' => self.char_frequency.asterisk_count > 0,
1237            '_' => self.char_frequency.underscore_count > 0,
1238            '-' => self.char_frequency.hyphen_count > 0,
1239            '+' => self.char_frequency.plus_count > 0,
1240            '>' => self.char_frequency.gt_count > 0,
1241            '|' => self.char_frequency.pipe_count > 0,
1242            '[' => self.char_frequency.bracket_count > 0,
1243            '`' => self.char_frequency.backtick_count > 0,
1244            '<' => self.char_frequency.lt_count > 0,
1245            '!' => self.char_frequency.exclamation_count > 0,
1246            '\n' => self.char_frequency.newline_count > 0,
1247            _ => self.content.contains(ch), // Fallback for other characters
1248        }
1249    }
1250
1251    /// Get count of a specific character (fast)
1252    pub fn char_count(&self, ch: char) -> usize {
1253        match ch {
1254            '#' => self.char_frequency.hash_count,
1255            '*' => self.char_frequency.asterisk_count,
1256            '_' => self.char_frequency.underscore_count,
1257            '-' => self.char_frequency.hyphen_count,
1258            '+' => self.char_frequency.plus_count,
1259            '>' => self.char_frequency.gt_count,
1260            '|' => self.char_frequency.pipe_count,
1261            '[' => self.char_frequency.bracket_count,
1262            '`' => self.char_frequency.backtick_count,
1263            '<' => self.char_frequency.lt_count,
1264            '!' => self.char_frequency.exclamation_count,
1265            '\n' => self.char_frequency.newline_count,
1266            _ => self.content.matches(ch).count(), // Fallback for other characters
1267        }
1268    }
1269
1270    /// Check if content likely contains headings (fast)
1271    pub fn likely_has_headings(&self) -> bool {
1272        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1273    }
1274
1275    /// Check if content likely contains lists (fast)
1276    pub fn likely_has_lists(&self) -> bool {
1277        self.char_frequency.asterisk_count > 0
1278            || self.char_frequency.hyphen_count > 0
1279            || self.char_frequency.plus_count > 0
1280    }
1281
1282    /// Check if content likely contains emphasis (fast)
1283    pub fn likely_has_emphasis(&self) -> bool {
1284        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1285    }
1286
1287    /// Check if content likely contains tables (fast)
1288    pub fn likely_has_tables(&self) -> bool {
1289        self.char_frequency.pipe_count > 2
1290    }
1291
1292    /// Check if content likely contains blockquotes (fast)
1293    pub fn likely_has_blockquotes(&self) -> bool {
1294        self.char_frequency.gt_count > 0
1295    }
1296
1297    /// Check if content likely contains code (fast)
1298    pub fn likely_has_code(&self) -> bool {
1299        self.char_frequency.backtick_count > 0
1300    }
1301
1302    /// Check if content likely contains links or images (fast)
1303    pub fn likely_has_links_or_images(&self) -> bool {
1304        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1305    }
1306
1307    /// Check if content likely contains HTML (fast)
1308    pub fn likely_has_html(&self) -> bool {
1309        self.char_frequency.lt_count > 0
1310    }
1311
1312    /// Get the blockquote prefix for inserting a blank line at the given line index.
1313    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1314    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1315    /// Returns an empty string if the line is not inside a blockquote.
1316    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1317        if let Some(line_info) = self.lines.get(line_idx)
1318            && let Some(ref bq) = line_info.blockquote
1319        {
1320            bq.prefix.trim_end().to_string()
1321        } else {
1322            String::new()
1323        }
1324    }
1325
1326    /// Get HTML tags on a specific line
1327    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1328        self.html_tags()
1329            .iter()
1330            .filter(|tag| tag.line == line_num)
1331            .cloned()
1332            .collect()
1333    }
1334
1335    /// Get emphasis spans on a specific line
1336    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1337        self.emphasis_spans()
1338            .iter()
1339            .filter(|span| span.line == line_num)
1340            .cloned()
1341            .collect()
1342    }
1343
1344    /// Get table rows on a specific line
1345    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1346        self.table_rows()
1347            .iter()
1348            .filter(|row| row.line == line_num)
1349            .cloned()
1350            .collect()
1351    }
1352
1353    /// Get bare URLs on a specific line
1354    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1355        self.bare_urls()
1356            .iter()
1357            .filter(|url| url.line == line_num)
1358            .cloned()
1359            .collect()
1360    }
1361
1362    /// Find the line index for a given byte offset using binary search.
1363    /// Returns (line_index, line_number, column) where:
1364    /// - line_index is the 0-based index in the lines array
1365    /// - line_number is the 1-based line number
1366    /// - column is the byte offset within that line
1367    #[inline]
1368    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1369        // Binary search to find the line containing this byte offset
1370        let idx = match lines.binary_search_by(|line| {
1371            if byte_offset < line.byte_offset {
1372                std::cmp::Ordering::Greater
1373            } else if byte_offset > line.byte_offset + line.byte_len {
1374                std::cmp::Ordering::Less
1375            } else {
1376                std::cmp::Ordering::Equal
1377            }
1378        }) {
1379            Ok(idx) => idx,
1380            Err(idx) => idx.saturating_sub(1),
1381        };
1382
1383        let line = &lines[idx];
1384        let line_num = idx + 1;
1385        let col = byte_offset.saturating_sub(line.byte_offset);
1386
1387        (idx, line_num, col)
1388    }
1389
1390    /// Check if a byte offset is within a code span using binary search
1391    #[inline]
1392    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1393        // Since spans are sorted by byte_offset, use partition_point for binary search
1394        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1395
1396        // Check the span that starts at or before our offset
1397        if idx > 0 {
1398            let span = &code_spans[idx - 1];
1399            if offset >= span.byte_offset && offset < span.byte_end {
1400                return true;
1401            }
1402        }
1403
1404        false
1405    }
1406
1407    /// Collect byte ranges of all links using pulldown-cmark
1408    /// This is used to skip heading detection for lines that fall within link syntax
1409    /// (e.g., multiline links like `[text](url\n#fragment)`)
1410    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1411        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1412
1413        let mut link_ranges = Vec::new();
1414        let mut options = Options::empty();
1415        options.insert(Options::ENABLE_WIKILINKS);
1416        options.insert(Options::ENABLE_FOOTNOTES);
1417
1418        let parser = Parser::new_ext(content, options).into_offset_iter();
1419        let mut link_stack: Vec<usize> = Vec::new();
1420
1421        for (event, range) in parser {
1422            match event {
1423                Event::Start(Tag::Link { .. }) => {
1424                    link_stack.push(range.start);
1425                }
1426                Event::End(TagEnd::Link) => {
1427                    if let Some(start_pos) = link_stack.pop() {
1428                        link_ranges.push((start_pos, range.end));
1429                    }
1430                }
1431                _ => {}
1432            }
1433        }
1434
1435        link_ranges
1436    }
1437
1438    /// Parse all links in the content
1439    fn parse_links(
1440        content: &'a str,
1441        lines: &[LineInfo],
1442        code_blocks: &[(usize, usize)],
1443        code_spans: &[CodeSpan],
1444        flavor: MarkdownFlavor,
1445        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1446    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1447        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1448        use std::collections::HashSet;
1449
1450        let mut links = Vec::with_capacity(content.len() / 500);
1451        let mut broken_links = Vec::new();
1452        let mut footnote_refs = Vec::new();
1453
1454        // Track byte positions of links found by pulldown-cmark
1455        let mut found_positions = HashSet::new();
1456
1457        // Use pulldown-cmark's streaming parser with BrokenLink callback
1458        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1459        // This automatically handles:
1460        // - Escaped links (won't generate events)
1461        // - Links in code blocks/spans (won't generate Link events)
1462        // - Images (generates Tag::Image instead)
1463        // - Reference resolution (dest_url is already resolved!)
1464        // - Broken references (callback is invoked)
1465        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1466        let mut options = Options::empty();
1467        options.insert(Options::ENABLE_WIKILINKS);
1468        options.insert(Options::ENABLE_FOOTNOTES);
1469
1470        let parser = Parser::new_with_broken_link_callback(
1471            content,
1472            options,
1473            Some(|link: BrokenLink<'_>| {
1474                broken_links.push(BrokenLinkInfo {
1475                    reference: link.reference.to_string(),
1476                    span: link.span.clone(),
1477                });
1478                None
1479            }),
1480        )
1481        .into_offset_iter();
1482
1483        let mut link_stack: Vec<(
1484            usize,
1485            usize,
1486            pulldown_cmark::CowStr<'a>,
1487            LinkType,
1488            pulldown_cmark::CowStr<'a>,
1489        )> = Vec::new();
1490        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1491
1492        for (event, range) in parser {
1493            match event {
1494                Event::Start(Tag::Link {
1495                    link_type,
1496                    dest_url,
1497                    id,
1498                    ..
1499                }) => {
1500                    // Link start - record position, URL, and reference ID
1501                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1502                    text_chunks.clear();
1503                }
1504                Event::Text(text) if !link_stack.is_empty() => {
1505                    // Track text content with its byte range
1506                    text_chunks.push((text.to_string(), range.start, range.end));
1507                }
1508                Event::Code(code) if !link_stack.is_empty() => {
1509                    // Include inline code in link text (with backticks)
1510                    let code_text = format!("`{code}`");
1511                    text_chunks.push((code_text, range.start, range.end));
1512                }
1513                Event::End(TagEnd::Link) => {
1514                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1515                        // Skip if in HTML comment
1516                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1517                            text_chunks.clear();
1518                            continue;
1519                        }
1520
1521                        // Find line and column information
1522                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1523
1524                        // Skip if this link is on a MkDocs snippet line
1525                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1526                            text_chunks.clear();
1527                            continue;
1528                        }
1529
1530                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1531
1532                        let is_reference = matches!(
1533                            link_type,
1534                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1535                        );
1536
1537                        // Extract link text directly from source bytes to preserve escaping
1538                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1539                        let link_text = if start_pos < content.len() {
1540                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1541
1542                            // Find MATCHING ] by tracking bracket depth for nested brackets
1543                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1544                            // Brackets inside code spans (between backticks) should be ignored
1545                            let mut close_pos = None;
1546                            let mut depth = 0;
1547                            let mut in_code_span = false;
1548
1549                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1550                                // Count preceding backslashes
1551                                let mut backslash_count = 0;
1552                                let mut j = i;
1553                                while j > 0 && link_bytes[j - 1] == b'\\' {
1554                                    backslash_count += 1;
1555                                    j -= 1;
1556                                }
1557                                let is_escaped = backslash_count % 2 != 0;
1558
1559                                // Track code spans - backticks toggle in/out of code
1560                                if byte == b'`' && !is_escaped {
1561                                    in_code_span = !in_code_span;
1562                                }
1563
1564                                // Only count brackets when NOT in a code span
1565                                if !is_escaped && !in_code_span {
1566                                    if byte == b'[' {
1567                                        depth += 1;
1568                                    } else if byte == b']' {
1569                                        if depth == 0 {
1570                                            // Found the matching closing bracket
1571                                            close_pos = Some(i);
1572                                            break;
1573                                        } else {
1574                                            depth -= 1;
1575                                        }
1576                                    }
1577                                }
1578                            }
1579
1580                            if let Some(pos) = close_pos {
1581                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1582                            } else {
1583                                Cow::Borrowed("")
1584                            }
1585                        } else {
1586                            Cow::Borrowed("")
1587                        };
1588
1589                        // For reference links, use the actual reference ID from pulldown-cmark
1590                        let reference_id = if is_reference && !ref_id.is_empty() {
1591                            Some(Cow::Owned(ref_id.to_lowercase()))
1592                        } else if is_reference {
1593                            // For collapsed/shortcut references without explicit ID, use the link text
1594                            Some(Cow::Owned(link_text.to_lowercase()))
1595                        } else {
1596                            None
1597                        };
1598
1599                        // Track this position as found
1600                        found_positions.insert(start_pos);
1601
1602                        links.push(ParsedLink {
1603                            line: line_num,
1604                            start_col: col_start,
1605                            end_col: col_end,
1606                            byte_offset: start_pos,
1607                            byte_end: range.end,
1608                            text: link_text,
1609                            url: Cow::Owned(url.to_string()),
1610                            is_reference,
1611                            reference_id,
1612                            link_type,
1613                        });
1614
1615                        text_chunks.clear();
1616                    }
1617                }
1618                Event::FootnoteReference(footnote_id) => {
1619                    // Capture footnote references like [^1], [^note]
1620                    // Skip if in HTML comment
1621                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1622                        continue;
1623                    }
1624
1625                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1626                    footnote_refs.push(FootnoteRef {
1627                        id: footnote_id.to_string(),
1628                        line: line_num,
1629                        byte_offset: range.start,
1630                        byte_end: range.end,
1631                    });
1632                }
1633                _ => {}
1634            }
1635        }
1636
1637        // Also find undefined references using regex
1638        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1639        // because the reference is undefined
1640        for cap in LINK_PATTERN.captures_iter(content) {
1641            let full_match = cap.get(0).unwrap();
1642            let match_start = full_match.start();
1643            let match_end = full_match.end();
1644
1645            // Skip if this was already found by pulldown-cmark (it's a valid link)
1646            if found_positions.contains(&match_start) {
1647                continue;
1648            }
1649
1650            // Skip if escaped
1651            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1652                continue;
1653            }
1654
1655            // Skip if it's an image
1656            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1657                continue;
1658            }
1659
1660            // Skip if in code block
1661            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1662                continue;
1663            }
1664
1665            // Skip if in code span
1666            if Self::is_offset_in_code_span(code_spans, match_start) {
1667                continue;
1668            }
1669
1670            // Skip if in HTML comment
1671            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1672                continue;
1673            }
1674
1675            // Find line and column information
1676            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1677
1678            // Skip if this link is on a MkDocs snippet line
1679            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1680                continue;
1681            }
1682
1683            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1684
1685            let text = cap.get(1).map_or("", |m| m.as_str());
1686
1687            // Only process reference links (group 6)
1688            if let Some(ref_id) = cap.get(6) {
1689                let ref_id_str = ref_id.as_str();
1690                let normalized_ref = if ref_id_str.is_empty() {
1691                    Cow::Owned(text.to_lowercase()) // Implicit reference
1692                } else {
1693                    Cow::Owned(ref_id_str.to_lowercase())
1694                };
1695
1696                // This is an undefined reference (pulldown-cmark didn't parse it)
1697                links.push(ParsedLink {
1698                    line: line_num,
1699                    start_col: col_start,
1700                    end_col: col_end,
1701                    byte_offset: match_start,
1702                    byte_end: match_end,
1703                    text: Cow::Borrowed(text),
1704                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1705                    is_reference: true,
1706                    reference_id: Some(normalized_ref),
1707                    link_type: LinkType::Reference, // Undefined references are reference-style
1708                });
1709            }
1710        }
1711
1712        (links, broken_links, footnote_refs)
1713    }
1714
1715    /// Parse all images in the content
1716    fn parse_images(
1717        content: &'a str,
1718        lines: &[LineInfo],
1719        code_blocks: &[(usize, usize)],
1720        code_spans: &[CodeSpan],
1721        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1722    ) -> Vec<ParsedImage<'a>> {
1723        use crate::utils::skip_context::is_in_html_comment_ranges;
1724        use std::collections::HashSet;
1725
1726        // Pre-size based on a heuristic: images are less common than links
1727        let mut images = Vec::with_capacity(content.len() / 1000);
1728        let mut found_positions = HashSet::new();
1729
1730        // Use pulldown-cmark for parsing - more accurate and faster
1731        let parser = Parser::new(content).into_offset_iter();
1732        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1733            Vec::new();
1734        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1735
1736        for (event, range) in parser {
1737            match event {
1738                Event::Start(Tag::Image {
1739                    link_type,
1740                    dest_url,
1741                    id,
1742                    ..
1743                }) => {
1744                    image_stack.push((range.start, dest_url, link_type, id));
1745                    text_chunks.clear();
1746                }
1747                Event::Text(text) if !image_stack.is_empty() => {
1748                    text_chunks.push((text.to_string(), range.start, range.end));
1749                }
1750                Event::Code(code) if !image_stack.is_empty() => {
1751                    let code_text = format!("`{code}`");
1752                    text_chunks.push((code_text, range.start, range.end));
1753                }
1754                Event::End(TagEnd::Image) => {
1755                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1756                        // Skip if in code block
1757                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1758                            continue;
1759                        }
1760
1761                        // Skip if in code span
1762                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1763                            continue;
1764                        }
1765
1766                        // Skip if in HTML comment
1767                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1768                            continue;
1769                        }
1770
1771                        // Find line and column using binary search
1772                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1773                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1774
1775                        let is_reference = matches!(
1776                            link_type,
1777                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1778                        );
1779
1780                        // Extract alt text directly from source bytes to preserve escaping
1781                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1782                        let alt_text = if start_pos < content.len() {
1783                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1784
1785                            // Find MATCHING ] by tracking bracket depth for nested brackets
1786                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1787                            let mut close_pos = None;
1788                            let mut depth = 0;
1789
1790                            if image_bytes.len() > 2 {
1791                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1792                                    // Count preceding backslashes
1793                                    let mut backslash_count = 0;
1794                                    let mut j = i;
1795                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1796                                        backslash_count += 1;
1797                                        j -= 1;
1798                                    }
1799                                    let is_escaped = backslash_count % 2 != 0;
1800
1801                                    if !is_escaped {
1802                                        if byte == b'[' {
1803                                            depth += 1;
1804                                        } else if byte == b']' {
1805                                            if depth == 0 {
1806                                                // Found the matching closing bracket
1807                                                close_pos = Some(i);
1808                                                break;
1809                                            } else {
1810                                                depth -= 1;
1811                                            }
1812                                        }
1813                                    }
1814                                }
1815                            }
1816
1817                            if let Some(pos) = close_pos {
1818                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1819                            } else {
1820                                Cow::Borrowed("")
1821                            }
1822                        } else {
1823                            Cow::Borrowed("")
1824                        };
1825
1826                        let reference_id = if is_reference && !ref_id.is_empty() {
1827                            Some(Cow::Owned(ref_id.to_lowercase()))
1828                        } else if is_reference {
1829                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1830                        } else {
1831                            None
1832                        };
1833
1834                        found_positions.insert(start_pos);
1835                        images.push(ParsedImage {
1836                            line: line_num,
1837                            start_col: col_start,
1838                            end_col: col_end,
1839                            byte_offset: start_pos,
1840                            byte_end: range.end,
1841                            alt_text,
1842                            url: Cow::Owned(url.to_string()),
1843                            is_reference,
1844                            reference_id,
1845                            link_type,
1846                        });
1847                    }
1848                }
1849                _ => {}
1850            }
1851        }
1852
1853        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1854        for cap in IMAGE_PATTERN.captures_iter(content) {
1855            let full_match = cap.get(0).unwrap();
1856            let match_start = full_match.start();
1857            let match_end = full_match.end();
1858
1859            // Skip if already found by pulldown-cmark
1860            if found_positions.contains(&match_start) {
1861                continue;
1862            }
1863
1864            // Skip if the ! is escaped
1865            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1866                continue;
1867            }
1868
1869            // Skip if in code block, code span, or HTML comment
1870            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1871                || Self::is_offset_in_code_span(code_spans, match_start)
1872                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1873            {
1874                continue;
1875            }
1876
1877            // Only process reference images (undefined references not found by pulldown-cmark)
1878            if let Some(ref_id) = cap.get(6) {
1879                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1880                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1881                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1882                let ref_id_str = ref_id.as_str();
1883                let normalized_ref = if ref_id_str.is_empty() {
1884                    Cow::Owned(alt_text.to_lowercase())
1885                } else {
1886                    Cow::Owned(ref_id_str.to_lowercase())
1887                };
1888
1889                images.push(ParsedImage {
1890                    line: line_num,
1891                    start_col: col_start,
1892                    end_col: col_end,
1893                    byte_offset: match_start,
1894                    byte_end: match_end,
1895                    alt_text: Cow::Borrowed(alt_text),
1896                    url: Cow::Borrowed(""),
1897                    is_reference: true,
1898                    reference_id: Some(normalized_ref),
1899                    link_type: LinkType::Reference, // Undefined references are reference-style
1900                });
1901            }
1902        }
1903
1904        images
1905    }
1906
1907    /// Parse reference definitions
1908    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1909        // Pre-size based on lines count as reference definitions are line-based
1910        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1911
1912        for (line_idx, line_info) in lines.iter().enumerate() {
1913            // Skip lines in code blocks
1914            if line_info.in_code_block {
1915                continue;
1916            }
1917
1918            let line = line_info.content(content);
1919            let line_num = line_idx + 1;
1920
1921            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1922                let id_raw = cap.get(1).unwrap().as_str();
1923
1924                // Skip footnote definitions - they use [^id]: syntax and are semantically
1925                // different from reference link definitions
1926                if id_raw.starts_with('^') {
1927                    continue;
1928                }
1929
1930                let id = id_raw.to_lowercase();
1931                let url = cap.get(2).unwrap().as_str().to_string();
1932                let title_match = cap.get(3).or_else(|| cap.get(4));
1933                let title = title_match.map(|m| m.as_str().to_string());
1934
1935                // Calculate byte positions
1936                // The match starts at the beginning of the line (0) and extends to the end
1937                let match_obj = cap.get(0).unwrap();
1938                let byte_offset = line_info.byte_offset + match_obj.start();
1939                let byte_end = line_info.byte_offset + match_obj.end();
1940
1941                // Calculate title byte positions (includes the quote character before content)
1942                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1943                    // The match is the content inside quotes, so we include the quote before
1944                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1945                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1946                    (Some(start), Some(end))
1947                } else {
1948                    (None, None)
1949                };
1950
1951                refs.push(ReferenceDef {
1952                    line: line_num,
1953                    id,
1954                    url,
1955                    title,
1956                    byte_offset,
1957                    byte_end,
1958                    title_byte_start,
1959                    title_byte_end,
1960                });
1961            }
1962        }
1963
1964        refs
1965    }
1966
1967    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1968    /// Handles nested blockquotes like `> > > content`
1969    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1970    #[inline]
1971    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1972        let trimmed_start = line.trim_start();
1973        if !trimmed_start.starts_with('>') {
1974            return None;
1975        }
1976
1977        // Track total prefix length to handle nested blockquotes
1978        let mut remaining = line;
1979        let mut total_prefix_len = 0;
1980
1981        loop {
1982            let trimmed = remaining.trim_start();
1983            if !trimmed.starts_with('>') {
1984                break;
1985            }
1986
1987            // Add leading whitespace + '>' to prefix
1988            let leading_ws_len = remaining.len() - trimmed.len();
1989            total_prefix_len += leading_ws_len + 1;
1990
1991            let after_gt = &trimmed[1..];
1992
1993            // Handle optional whitespace after '>' (space or tab)
1994            if let Some(stripped) = after_gt.strip_prefix(' ') {
1995                total_prefix_len += 1;
1996                remaining = stripped;
1997            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1998                total_prefix_len += 1;
1999                remaining = stripped;
2000            } else {
2001                remaining = after_gt;
2002            }
2003        }
2004
2005        Some((&line[..total_prefix_len], remaining))
2006    }
2007
2008    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2009    ///
2010    /// Returns a HashMap keyed by line byte offset, containing:
2011    /// `(is_ordered, marker, marker_column, content_column, number)`
2012    ///
2013    /// ## Why pulldown-cmark?
2014    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2015    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2016    /// This fixes issue #253 where continuation lines were falsely detected.
2017    ///
2018    /// ## Tab indentation quirk
2019    /// Pulldown-cmark reports nested list items at the newline character position
2020    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2021    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2022    /// We detect this and advance to the correct line.
2023    ///
2024    /// ## HashMap key strategy
2025    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2026    /// that resolve to the same line (after newline adjustment). The first event
2027    /// for each line is authoritative.
2028    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2029    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2030    /// This avoids a separate parse for emphasis detection.
2031    fn detect_list_items_and_emphasis_with_pulldown(
2032        content: &str,
2033        line_offsets: &[usize],
2034        flavor: MarkdownFlavor,
2035        front_matter_end: usize,
2036        code_blocks: &[(usize, usize)],
2037    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2038        use std::collections::HashMap;
2039
2040        let mut list_items = HashMap::new();
2041        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2042
2043        let mut options = Options::empty();
2044        options.insert(Options::ENABLE_TABLES);
2045        options.insert(Options::ENABLE_FOOTNOTES);
2046        options.insert(Options::ENABLE_STRIKETHROUGH);
2047        options.insert(Options::ENABLE_TASKLISTS);
2048        // Always enable GFM features for consistency with existing behavior
2049        options.insert(Options::ENABLE_GFM);
2050
2051        // Suppress unused variable warning
2052        let _ = flavor;
2053
2054        let parser = Parser::new_ext(content, options).into_offset_iter();
2055        let mut list_depth: usize = 0;
2056        let mut list_stack: Vec<bool> = Vec::new();
2057
2058        for (event, range) in parser {
2059            match event {
2060                // Capture emphasis spans (for MD030's emphasis detection)
2061                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2062                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2063                        2
2064                    } else {
2065                        1
2066                    };
2067                    let match_start = range.start;
2068                    let match_end = range.end;
2069
2070                    // Skip if in code block
2071                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2072                        // Determine marker character by looking at the content at the start
2073                        let marker = content[match_start..].chars().next().unwrap_or('*');
2074                        if marker == '*' || marker == '_' {
2075                            // Extract content between markers
2076                            let content_start = match_start + marker_count;
2077                            let content_end = if match_end >= marker_count {
2078                                match_end - marker_count
2079                            } else {
2080                                match_end
2081                            };
2082                            let content_part = if content_start < content_end && content_end <= content.len() {
2083                                &content[content_start..content_end]
2084                            } else {
2085                                ""
2086                            };
2087
2088                            // Find which line this emphasis is on using line_offsets
2089                            let line_idx = match line_offsets.binary_search(&match_start) {
2090                                Ok(idx) => idx,
2091                                Err(idx) => idx.saturating_sub(1),
2092                            };
2093                            let line_num = line_idx + 1;
2094                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2095                            let col_start = match_start - line_start;
2096                            let col_end = match_end - line_start;
2097
2098                            emphasis_spans.push(EmphasisSpan {
2099                                line: line_num,
2100                                start_col: col_start,
2101                                end_col: col_end,
2102                                byte_offset: match_start,
2103                                byte_end: match_end,
2104                                marker,
2105                                marker_count,
2106                                content: content_part.to_string(),
2107                            });
2108                        }
2109                    }
2110                }
2111                Event::Start(Tag::List(start_number)) => {
2112                    list_depth += 1;
2113                    list_stack.push(start_number.is_some());
2114                }
2115                Event::End(TagEnd::List(_)) => {
2116                    list_depth = list_depth.saturating_sub(1);
2117                    list_stack.pop();
2118                }
2119                Event::Start(Tag::Item) if list_depth > 0 => {
2120                    // Get the ordered state for the CURRENT (innermost) list
2121                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2122                    // Find which line this byte offset corresponds to
2123                    let item_start = range.start;
2124
2125                    // Binary search to find the line number
2126                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2127                        Ok(idx) => idx,
2128                        Err(idx) => idx.saturating_sub(1),
2129                    };
2130
2131                    // Pulldown-cmark reports nested list items at the newline before the item
2132                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2133                    // Advance to the actual content line in this case.
2134                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2135                        line_idx += 1;
2136                    }
2137
2138                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2139                    if front_matter_end > 0 && line_idx < front_matter_end {
2140                        continue;
2141                    }
2142
2143                    if line_idx < line_offsets.len() {
2144                        let line_start_byte = line_offsets[line_idx];
2145                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2146                        let line = &content[line_start_byte..line_end.min(content.len())];
2147
2148                        // Strip trailing newline
2149                        let line = line
2150                            .strip_suffix('\n')
2151                            .or_else(|| line.strip_suffix("\r\n"))
2152                            .unwrap_or(line);
2153
2154                        // Strip blockquote prefix if present
2155                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2156                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2157                            (prefix.len(), content)
2158                        } else {
2159                            (0, line)
2160                        };
2161
2162                        // Parse the list marker from the actual line
2163                        if current_list_is_ordered {
2164                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2165                                Self::parse_ordered_list(line_to_parse)
2166                            {
2167                                let marker = format!("{number_str}{delimiter}");
2168                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2169                                let content_column = marker_column + marker.len() + spacing.len();
2170                                let number = number_str.parse().ok();
2171
2172                                list_items.entry(line_start_byte).or_insert((
2173                                    true,
2174                                    marker,
2175                                    marker_column,
2176                                    content_column,
2177                                    number,
2178                                ));
2179                            }
2180                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2181                            Self::parse_unordered_list(line_to_parse)
2182                        {
2183                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2184                            let content_column = marker_column + 1 + spacing.len();
2185
2186                            list_items.entry(line_start_byte).or_insert((
2187                                false,
2188                                marker.to_string(),
2189                                marker_column,
2190                                content_column,
2191                                None,
2192                            ));
2193                        }
2194                    }
2195                }
2196                _ => {}
2197            }
2198        }
2199
2200        (list_items, emphasis_spans)
2201    }
2202
2203    /// Fast unordered list parser - replaces regex for 5-10x speedup
2204    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2205    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2206    #[inline]
2207    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2208        let bytes = line.as_bytes();
2209        let mut i = 0;
2210
2211        // Skip leading whitespace
2212        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2213            i += 1;
2214        }
2215
2216        // Check for marker
2217        if i >= bytes.len() {
2218            return None;
2219        }
2220        let marker = bytes[i] as char;
2221        if marker != '-' && marker != '*' && marker != '+' {
2222            return None;
2223        }
2224        let marker_pos = i;
2225        i += 1;
2226
2227        // Collect spacing after marker (space or tab only)
2228        let spacing_start = i;
2229        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2230            i += 1;
2231        }
2232
2233        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2234    }
2235
2236    /// Fast ordered list parser - replaces regex for 5-10x speedup
2237    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2238    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2239    #[inline]
2240    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2241        let bytes = line.as_bytes();
2242        let mut i = 0;
2243
2244        // Skip leading whitespace
2245        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2246            i += 1;
2247        }
2248
2249        // Collect digits
2250        let number_start = i;
2251        while i < bytes.len() && bytes[i].is_ascii_digit() {
2252            i += 1;
2253        }
2254        if i == number_start {
2255            return None; // No digits found
2256        }
2257
2258        // Check for delimiter
2259        if i >= bytes.len() {
2260            return None;
2261        }
2262        let delimiter = bytes[i] as char;
2263        if delimiter != '.' && delimiter != ')' {
2264            return None;
2265        }
2266        let delimiter_pos = i;
2267        i += 1;
2268
2269        // Collect spacing after delimiter (space or tab only)
2270        let spacing_start = i;
2271        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272            i += 1;
2273        }
2274
2275        Some((
2276            &line[..number_start],
2277            &line[number_start..delimiter_pos],
2278            delimiter,
2279            &line[spacing_start..i],
2280            &line[i..],
2281        ))
2282    }
2283
2284    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2285    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2286    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2287        let num_lines = line_offsets.len();
2288        let mut in_code_block = vec![false; num_lines];
2289
2290        // For each code block, mark all lines within it
2291        for &(start, end) in code_blocks {
2292            // Ensure we're at valid UTF-8 boundaries
2293            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2294                let mut boundary = start;
2295                while boundary > 0 && !content.is_char_boundary(boundary) {
2296                    boundary -= 1;
2297                }
2298                boundary
2299            } else {
2300                start
2301            };
2302
2303            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2304                let mut boundary = end;
2305                while boundary < content.len() && !content.is_char_boundary(boundary) {
2306                    boundary += 1;
2307                }
2308                boundary
2309            } else {
2310                end.min(content.len())
2311            };
2312
2313            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2314            // That function now has proper list context awareness (see code_block_utils.rs)
2315            // and correctly distinguishes between:
2316            // - Fenced code blocks (``` or ~~~)
2317            // - Indented code blocks at document level (4 spaces + blank line before)
2318            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2319            //
2320            // We no longer need to re-validate here. The original validation logic
2321            // was causing false positives by marking list continuation paragraphs as
2322            // code blocks when they have 4 spaces of indentation.
2323
2324            // Use binary search to find the first and last line indices
2325            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2326            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2327            //
2328            // Find the line that CONTAINS safe_start: the line with the largest
2329            // start offset that is <= safe_start. partition_point gives us the
2330            // first line that starts AFTER safe_start, so we subtract 1.
2331            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2332            let first_line = first_line_after.saturating_sub(1);
2333            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2334
2335            // Mark all lines in the range at once
2336            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2337                *flag = true;
2338            }
2339        }
2340
2341        in_code_block
2342    }
2343
2344    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2345    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2346    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2347        let content_lines: Vec<&str> = content.lines().collect();
2348        let num_lines = content_lines.len();
2349        let mut in_math_block = vec![false; num_lines];
2350
2351        let mut inside_math = false;
2352
2353        for (i, line) in content_lines.iter().enumerate() {
2354            // Skip lines that are in code blocks - math delimiters inside code are literal
2355            if code_block_map.get(i).copied().unwrap_or(false) {
2356                continue;
2357            }
2358
2359            let trimmed = line.trim();
2360
2361            // Check for math block delimiter ($$)
2362            // A line with just $$ toggles the math block state
2363            if trimmed == "$$" {
2364                if inside_math {
2365                    // Closing delimiter - this line is still part of the math block
2366                    in_math_block[i] = true;
2367                    inside_math = false;
2368                } else {
2369                    // Opening delimiter - this line starts the math block
2370                    in_math_block[i] = true;
2371                    inside_math = true;
2372                }
2373            } else if inside_math {
2374                // Content inside math block
2375                in_math_block[i] = true;
2376            }
2377        }
2378
2379        in_math_block
2380    }
2381
2382    /// Pre-compute basic line information (without headings/blockquotes)
2383    /// Also returns emphasis spans detected during the pulldown-cmark parse
2384    fn compute_basic_line_info(
2385        content: &str,
2386        line_offsets: &[usize],
2387        code_blocks: &[(usize, usize)],
2388        flavor: MarkdownFlavor,
2389        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2390        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2391        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2392    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2393        let content_lines: Vec<&str> = content.lines().collect();
2394        let mut lines = Vec::with_capacity(content_lines.len());
2395
2396        // Pre-compute which lines are in code blocks
2397        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2398
2399        // Pre-compute which lines are in math blocks ($$ ... $$)
2400        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2401
2402        // Detect front matter boundaries FIRST, before any other parsing
2403        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2404        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2405
2406        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2407        // (context-aware, eliminates false positives)
2408        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2409            content,
2410            line_offsets,
2411            flavor,
2412            front_matter_end,
2413            code_blocks,
2414        );
2415
2416        for (i, line) in content_lines.iter().enumerate() {
2417            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2418            let indent = line.len() - line.trim_start().len();
2419            // Compute visual indent with proper CommonMark tab expansion
2420            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2421
2422            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2423            let blockquote_parse = Self::parse_blockquote_prefix(line);
2424
2425            // For blank detection, consider blockquote context
2426            let is_blank = if let Some((_, content)) = blockquote_parse {
2427                // In blockquote context, check if content after prefix is blank
2428                content.trim().is_empty()
2429            } else {
2430                line.trim().is_empty()
2431            };
2432
2433            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2434            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2435
2436            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2437            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2438                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2439            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2440            // This ensures content after `-->` on the same line is not incorrectly skipped
2441            let line_end_offset = byte_offset + line.len();
2442            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2443                html_comment_ranges,
2444                byte_offset,
2445                line_end_offset,
2446            );
2447            // Use pulldown-cmark's list detection for context-aware parsing
2448            // This eliminates false positives on continuation lines (issue #253)
2449            let list_item =
2450                list_item_map
2451                    .get(&byte_offset)
2452                    .map(
2453                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2454                            marker: marker.clone(),
2455                            is_ordered: *is_ordered,
2456                            number: *number,
2457                            marker_column: *marker_column,
2458                            content_column: *content_column,
2459                        },
2460                    );
2461
2462            // Detect horizontal rules (only outside code blocks and frontmatter)
2463            // Uses CommonMark-compliant check including leading indentation validation
2464            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2465            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2466
2467            // Get math block status for this line
2468            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2469
2470            // Check if line is inside a Quarto div block
2471            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2472                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2473
2474            lines.push(LineInfo {
2475                byte_offset,
2476                byte_len: line.len(),
2477                indent,
2478                visual_indent,
2479                is_blank,
2480                in_code_block,
2481                in_front_matter,
2482                in_html_block: false, // Will be populated after line creation
2483                in_html_comment,
2484                list_item,
2485                heading: None,    // Will be populated in second pass for Setext headings
2486                blockquote: None, // Will be populated after line creation
2487                in_mkdocstrings,
2488                in_esm_block: false, // Will be populated after line creation for MDX files
2489                in_code_span_continuation: false, // Will be populated after code spans are parsed
2490                is_horizontal_rule: is_hr,
2491                in_math_block,
2492                in_quarto_div,
2493                in_jsx_expression: false,  // Will be populated for MDX files
2494                in_mdx_comment: false,     // Will be populated for MDX files
2495                in_jsx_component: false,   // Will be populated for MDX files
2496                in_jsx_fragment: false,    // Will be populated for MDX files
2497                in_admonition: false,      // Will be populated for MkDocs files
2498                in_content_tab: false,     // Will be populated for MkDocs files
2499                in_definition_list: false, // Will be populated for MkDocs files
2500            });
2501        }
2502
2503        (lines, emphasis_spans)
2504    }
2505
2506    /// Detect headings and blockquotes (called after HTML block detection)
2507    fn detect_headings_and_blockquotes(
2508        content: &str,
2509        lines: &mut [LineInfo],
2510        flavor: MarkdownFlavor,
2511        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2512        link_byte_ranges: &[(usize, usize)],
2513    ) {
2514        // Regex for heading detection
2515        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2516            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2517        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2518            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2519
2520        let content_lines: Vec<&str> = content.lines().collect();
2521
2522        // Detect front matter boundaries to skip those lines
2523        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2524
2525        // Detect headings (including Setext which needs look-ahead) and blockquotes
2526        for i in 0..lines.len() {
2527            let line = content_lines[i];
2528
2529            // Detect blockquotes FIRST, before any skip conditions.
2530            // A line can be both a blockquote AND contain a code block inside it.
2531            // We need to know about the blockquote marker regardless of code block status.
2532            // Skip only frontmatter lines - those are never blockquotes.
2533            if !(front_matter_end > 0 && i < front_matter_end)
2534                && let Some(bq) = parse_blockquote_detailed(line)
2535            {
2536                let nesting_level = bq.markers.len();
2537                let marker_column = bq.indent.len();
2538                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2539                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2540                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2541                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2542
2543                lines[i].blockquote = Some(BlockquoteInfo {
2544                    nesting_level,
2545                    indent: bq.indent.to_string(),
2546                    marker_column,
2547                    prefix,
2548                    content: bq.content.to_string(),
2549                    has_no_space_after_marker: has_no_space,
2550                    has_multiple_spaces_after_marker: has_multiple_spaces,
2551                    needs_md028_fix,
2552                });
2553
2554                // Update is_horizontal_rule for blockquote content
2555                // The original detection doesn't strip blockquote prefix, so we need to check here
2556                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2557                    lines[i].is_horizontal_rule = true;
2558                }
2559            }
2560
2561            // Now apply skip conditions for heading detection
2562            if lines[i].in_code_block {
2563                continue;
2564            }
2565
2566            // Skip lines in front matter
2567            if front_matter_end > 0 && i < front_matter_end {
2568                continue;
2569            }
2570
2571            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2572            if lines[i].in_html_block {
2573                continue;
2574            }
2575
2576            // Skip heading detection for blank lines
2577            if lines[i].is_blank {
2578                continue;
2579            }
2580
2581            // Check for ATX headings (but skip MkDocs snippet lines)
2582            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2583            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2584                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2585                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2586            } else {
2587                false
2588            };
2589
2590            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2591                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2592                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2593                    continue;
2594                }
2595                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2596                // This prevents false positives where `#fragment` is detected as a heading
2597                let line_offset = lines[i].byte_offset;
2598                if link_byte_ranges
2599                    .iter()
2600                    .any(|&(start, end)| line_offset > start && line_offset < end)
2601                {
2602                    continue;
2603                }
2604                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2605                let hashes = caps.get(2).map_or("", |m| m.as_str());
2606                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2607                let rest = caps.get(4).map_or("", |m| m.as_str());
2608
2609                let level = hashes.len() as u8;
2610                let marker_column = leading_spaces.len();
2611
2612                // Check for closing sequence, but handle custom IDs that might come after
2613                let (text, has_closing, closing_seq) = {
2614                    // First check if there's a custom ID at the end
2615                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2616                        // Check if this looks like a valid custom ID (ends with })
2617                        if rest[id_start..].trim_end().ends_with('}') {
2618                            // Split off the custom ID
2619                            (&rest[..id_start], &rest[id_start..])
2620                        } else {
2621                            (rest, "")
2622                        }
2623                    } else {
2624                        (rest, "")
2625                    };
2626
2627                    // Now look for closing hashes in the part before the custom ID
2628                    let trimmed_rest = rest_without_id.trim_end();
2629                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2630                        // Find the start of the hash sequence by walking backwards
2631                        // Use char_indices to get byte positions at char boundaries
2632                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2633
2634                        // Find which char index corresponds to last_hash_byte_pos
2635                        let last_hash_char_idx = char_positions
2636                            .iter()
2637                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2638
2639                        if let Some(mut char_idx) = last_hash_char_idx {
2640                            // Walk backwards to find start of hash sequence
2641                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2642                                char_idx -= 1;
2643                            }
2644
2645                            // Get the byte position of the start of hashes
2646                            let start_of_hashes = char_positions[char_idx].0;
2647
2648                            // Check if there's at least one space before the closing hashes
2649                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2650
2651                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2652                            let potential_closing = &trimmed_rest[start_of_hashes..];
2653                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2654
2655                            if is_all_hashes && has_space_before {
2656                                // This is a closing sequence
2657                                let closing_hashes = potential_closing.to_string();
2658                                // The text is everything before the closing hashes
2659                                // Don't include the custom ID here - it will be extracted later
2660                                let text_part = if !custom_id_part.is_empty() {
2661                                    // If we have a custom ID, append it back to get the full rest
2662                                    // This allows the extract_header_id function to handle it properly
2663                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2664                                } else {
2665                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2666                                };
2667                                (text_part, true, closing_hashes)
2668                            } else {
2669                                // Not a valid closing sequence, return the full content
2670                                (rest.to_string(), false, String::new())
2671                            }
2672                        } else {
2673                            // Couldn't find char boundary, return the full content
2674                            (rest.to_string(), false, String::new())
2675                        }
2676                    } else {
2677                        // No hashes found, return the full content
2678                        (rest.to_string(), false, String::new())
2679                    }
2680                };
2681
2682                let content_column = marker_column + hashes.len() + spaces_after.len();
2683
2684                // Extract custom header ID if present
2685                let raw_text = text.trim().to_string();
2686                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2687
2688                // If no custom ID was found on the header line, check the next line for standalone attr-list
2689                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2690                    let next_line = content_lines[i + 1];
2691                    if !lines[i + 1].in_code_block
2692                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2693                        && let Some(next_line_id) =
2694                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2695                    {
2696                        custom_id = Some(next_line_id);
2697                    }
2698                }
2699
2700                // ATX heading is "valid" for processing by heading rules if:
2701                // 1. Has space after # (CommonMark compliant): `# Heading`
2702                // 2. Is empty (just hashes): `#`
2703                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2704                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2705                //
2706                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2707                // - `#tag` - single # with lowercase (social hashtag)
2708                // - `#123` - single # with number (GitHub issue ref)
2709                let is_valid = !spaces_after.is_empty()
2710                    || rest.is_empty()
2711                    || level > 1
2712                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2713
2714                lines[i].heading = Some(HeadingInfo {
2715                    level,
2716                    style: HeadingStyle::ATX,
2717                    marker: hashes.to_string(),
2718                    marker_column,
2719                    content_column,
2720                    text: clean_text,
2721                    custom_id,
2722                    raw_text,
2723                    has_closing_sequence: has_closing,
2724                    closing_sequence: closing_seq,
2725                    is_valid,
2726                });
2727            }
2728            // Check for Setext headings (need to look at next line)
2729            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2730                let next_line = content_lines[i + 1];
2731                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2732                    // Skip if next line is front matter delimiter
2733                    if front_matter_end > 0 && i < front_matter_end {
2734                        continue;
2735                    }
2736
2737                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2738                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2739                    {
2740                        continue;
2741                    }
2742
2743                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2744                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2745                    let content_line = line.trim();
2746
2747                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2748                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2749                        continue;
2750                    }
2751
2752                    // Skip underscore thematic breaks (___)
2753                    if content_line.starts_with('_') {
2754                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2755                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2756                            continue;
2757                        }
2758                    }
2759
2760                    // Skip numbered lists (1. Item, 2. Item, etc.)
2761                    if let Some(first_char) = content_line.chars().next()
2762                        && first_char.is_ascii_digit()
2763                    {
2764                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2765                        if num_end < content_line.len() {
2766                            let next = content_line.chars().nth(num_end);
2767                            if next == Some('.') || next == Some(')') {
2768                                continue;
2769                            }
2770                        }
2771                    }
2772
2773                    // Skip ATX headings
2774                    if ATX_HEADING_REGEX.is_match(line) {
2775                        continue;
2776                    }
2777
2778                    // Skip blockquotes
2779                    if content_line.starts_with('>') {
2780                        continue;
2781                    }
2782
2783                    // Skip code fences
2784                    let trimmed_start = line.trim_start();
2785                    if trimmed_start.len() >= 3 {
2786                        let first_three: String = trimmed_start.chars().take(3).collect();
2787                        if first_three == "```" || first_three == "~~~" {
2788                            continue;
2789                        }
2790                    }
2791
2792                    // Skip HTML blocks
2793                    if content_line.starts_with('<') {
2794                        continue;
2795                    }
2796
2797                    let underline = next_line.trim();
2798
2799                    let level = if underline.starts_with('=') { 1 } else { 2 };
2800                    let style = if level == 1 {
2801                        HeadingStyle::Setext1
2802                    } else {
2803                        HeadingStyle::Setext2
2804                    };
2805
2806                    // Extract custom header ID if present
2807                    let raw_text = line.trim().to_string();
2808                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2809
2810                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2811                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2812                        let attr_line = content_lines[i + 2];
2813                        if !lines[i + 2].in_code_block
2814                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2815                            && let Some(attr_line_id) =
2816                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2817                        {
2818                            custom_id = Some(attr_line_id);
2819                        }
2820                    }
2821
2822                    lines[i].heading = Some(HeadingInfo {
2823                        level,
2824                        style,
2825                        marker: underline.to_string(),
2826                        marker_column: next_line.len() - next_line.trim_start().len(),
2827                        content_column: lines[i].indent,
2828                        text: clean_text,
2829                        custom_id,
2830                        raw_text,
2831                        has_closing_sequence: false,
2832                        closing_sequence: String::new(),
2833                        is_valid: true, // Setext headings are always valid
2834                    });
2835                }
2836            }
2837        }
2838    }
2839
2840    /// Detect HTML blocks in the content
2841    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2842        // HTML block elements that trigger block context
2843        // Includes HTML5 media, embedded content, and interactive elements
2844        const BLOCK_ELEMENTS: &[&str] = &[
2845            "address",
2846            "article",
2847            "aside",
2848            "audio",
2849            "blockquote",
2850            "canvas",
2851            "details",
2852            "dialog",
2853            "dd",
2854            "div",
2855            "dl",
2856            "dt",
2857            "embed",
2858            "fieldset",
2859            "figcaption",
2860            "figure",
2861            "footer",
2862            "form",
2863            "h1",
2864            "h2",
2865            "h3",
2866            "h4",
2867            "h5",
2868            "h6",
2869            "header",
2870            "hr",
2871            "iframe",
2872            "li",
2873            "main",
2874            "menu",
2875            "nav",
2876            "noscript",
2877            "object",
2878            "ol",
2879            "p",
2880            "picture",
2881            "pre",
2882            "script",
2883            "search",
2884            "section",
2885            "source",
2886            "style",
2887            "summary",
2888            "svg",
2889            "table",
2890            "tbody",
2891            "td",
2892            "template",
2893            "textarea",
2894            "tfoot",
2895            "th",
2896            "thead",
2897            "tr",
2898            "track",
2899            "ul",
2900            "video",
2901        ];
2902
2903        let mut i = 0;
2904        while i < lines.len() {
2905            // Skip if already in code block or front matter
2906            if lines[i].in_code_block || lines[i].in_front_matter {
2907                i += 1;
2908                continue;
2909            }
2910
2911            let trimmed = lines[i].content(content).trim_start();
2912
2913            // Check if line starts with an HTML tag
2914            if trimmed.starts_with('<') && trimmed.len() > 1 {
2915                // Extract tag name safely
2916                let after_bracket = &trimmed[1..];
2917                let is_closing = after_bracket.starts_with('/');
2918                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2919
2920                // Extract tag name (stop at space, >, /, or end of string)
2921                let tag_name = tag_start
2922                    .chars()
2923                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2924                    .collect::<String>()
2925                    .to_lowercase();
2926
2927                // Check if it's a block element
2928                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2929                    // Mark this line as in HTML block
2930                    lines[i].in_html_block = true;
2931
2932                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2933                    // This avoids complex nesting logic that might cause infinite loops
2934                    if !is_closing {
2935                        let closing_tag = format!("</{tag_name}>");
2936                        // style and script tags can contain blank lines (CSS/JS formatting)
2937                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2938                        let mut j = i + 1;
2939                        let mut found_closing_tag = false;
2940                        while j < lines.len() && j < i + 100 {
2941                            // Limit search to 100 lines
2942                            // Stop at blank lines (except for style/script tags)
2943                            if !allow_blank_lines && lines[j].is_blank {
2944                                break;
2945                            }
2946
2947                            lines[j].in_html_block = true;
2948
2949                            // Check if this line contains the closing tag
2950                            if lines[j].content(content).contains(&closing_tag) {
2951                                found_closing_tag = true;
2952                            }
2953
2954                            // After finding closing tag, continue marking lines as
2955                            // in_html_block until blank line (per CommonMark spec)
2956                            if found_closing_tag {
2957                                j += 1;
2958                                // Continue marking subsequent lines until blank
2959                                while j < lines.len() && j < i + 100 {
2960                                    if lines[j].is_blank {
2961                                        break;
2962                                    }
2963                                    lines[j].in_html_block = true;
2964                                    j += 1;
2965                                }
2966                                break;
2967                            }
2968                            j += 1;
2969                        }
2970                    }
2971                }
2972            }
2973
2974            i += 1;
2975        }
2976    }
2977
2978    /// Detect ESM import/export blocks anywhere in MDX files
2979    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
2980    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2981        // Only process MDX files
2982        if !flavor.supports_esm_blocks() {
2983            return;
2984        }
2985
2986        let mut in_multiline_import = false;
2987
2988        for line in lines.iter_mut() {
2989            // Skip code blocks, front matter, and HTML comments
2990            if line.in_code_block || line.in_front_matter || line.in_html_comment {
2991                in_multiline_import = false;
2992                continue;
2993            }
2994
2995            let line_content = line.content(content);
2996            let trimmed = line_content.trim();
2997
2998            // Handle continuation of multi-line import/export
2999            if in_multiline_import {
3000                line.in_esm_block = true;
3001                // Check if this line completes the statement
3002                // Multi-line import ends when we see the closing quote + optional semicolon
3003                if trimmed.ends_with('\'')
3004                    || trimmed.ends_with('"')
3005                    || trimmed.ends_with("';")
3006                    || trimmed.ends_with("\";")
3007                    || line_content.contains(';')
3008                {
3009                    in_multiline_import = false;
3010                }
3011                continue;
3012            }
3013
3014            // Skip blank lines
3015            if line.is_blank {
3016                continue;
3017            }
3018
3019            // Check if line starts with import or export
3020            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3021                line.in_esm_block = true;
3022
3023                // Determine if this is a complete single-line statement or starts a multi-line one
3024                // Multi-line imports look like:
3025                //   import {
3026                //     Foo,
3027                //     Bar
3028                //   } from 'module'
3029                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3030                let is_import = trimmed.starts_with("import ");
3031
3032                // Check for simple complete statements
3033                let is_complete =
3034                    // Ends with semicolon
3035                    trimmed.ends_with(';')
3036                    // import/export with from clause that ends with quote
3037                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3038                    // Simple export (export const/let/var/function/class without from)
3039                    || (!is_import && !trimmed.contains(" from ") && (
3040                        trimmed.starts_with("export const ")
3041                        || trimmed.starts_with("export let ")
3042                        || trimmed.starts_with("export var ")
3043                        || trimmed.starts_with("export function ")
3044                        || trimmed.starts_with("export class ")
3045                        || trimmed.starts_with("export default ")
3046                    ));
3047
3048                if !is_complete && is_import {
3049                    // Only imports can span multiple lines in the typical case
3050                    // Check if it looks like the start of a multi-line import
3051                    // e.g., "import {" or "import type {"
3052                    if trimmed.contains('{') && !trimmed.contains('}') {
3053                        in_multiline_import = true;
3054                    }
3055                }
3056            }
3057        }
3058    }
3059
3060    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3061    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3062    fn detect_jsx_and_mdx_comments(
3063        content: &str,
3064        lines: &mut [LineInfo],
3065        flavor: MarkdownFlavor,
3066        code_blocks: &[(usize, usize)],
3067    ) -> (ByteRanges, ByteRanges) {
3068        // Only process MDX files
3069        if !flavor.supports_jsx() {
3070            return (Vec::new(), Vec::new());
3071        }
3072
3073        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3074        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3075
3076        // Quick check - if no braces, no JSX expressions or MDX comments
3077        if !content.contains('{') {
3078            return (jsx_expression_ranges, mdx_comment_ranges);
3079        }
3080
3081        let bytes = content.as_bytes();
3082        let mut i = 0;
3083
3084        while i < bytes.len() {
3085            if bytes[i] == b'{' {
3086                // Check if we're in a code block
3087                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3088                    i += 1;
3089                    continue;
3090                }
3091
3092                let start = i;
3093
3094                // Check if it's an MDX comment: {/* ... */}
3095                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3096                    // Find the closing */}
3097                    let mut j = i + 3;
3098                    while j + 2 < bytes.len() {
3099                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3100                            let end = j + 3;
3101                            mdx_comment_ranges.push((start, end));
3102
3103                            // Mark lines as in MDX comment
3104                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3105                                line.in_mdx_comment = true;
3106                            });
3107
3108                            i = end;
3109                            break;
3110                        }
3111                        j += 1;
3112                    }
3113                    if j + 2 >= bytes.len() {
3114                        // Unclosed MDX comment - mark rest as comment
3115                        mdx_comment_ranges.push((start, bytes.len()));
3116                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3117                            line.in_mdx_comment = true;
3118                        });
3119                        break;
3120                    }
3121                } else {
3122                    // Regular JSX expression: { ... }
3123                    // Need to handle nested braces
3124                    let mut brace_depth = 1;
3125                    let mut j = i + 1;
3126                    let mut in_string = false;
3127                    let mut string_char = b'"';
3128
3129                    while j < bytes.len() && brace_depth > 0 {
3130                        let c = bytes[j];
3131
3132                        // Handle strings to avoid counting braces inside them
3133                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3134                            in_string = true;
3135                            string_char = c;
3136                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3137                            in_string = false;
3138                        } else if !in_string {
3139                            if c == b'{' {
3140                                brace_depth += 1;
3141                            } else if c == b'}' {
3142                                brace_depth -= 1;
3143                            }
3144                        }
3145                        j += 1;
3146                    }
3147
3148                    if brace_depth == 0 {
3149                        let end = j;
3150                        jsx_expression_ranges.push((start, end));
3151
3152                        // Mark lines as in JSX expression
3153                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3154                            line.in_jsx_expression = true;
3155                        });
3156
3157                        i = end;
3158                    } else {
3159                        i += 1;
3160                    }
3161                }
3162            } else {
3163                i += 1;
3164            }
3165        }
3166
3167        (jsx_expression_ranges, mdx_comment_ranges)
3168    }
3169
3170    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3171    /// and populate the corresponding fields in LineInfo
3172    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3173        if flavor != MarkdownFlavor::MkDocs {
3174            return;
3175        }
3176
3177        use crate::utils::mkdocs_admonitions;
3178        use crate::utils::mkdocs_definition_lists;
3179        use crate::utils::mkdocs_tabs;
3180
3181        let content_lines: Vec<&str> = content.lines().collect();
3182
3183        // Track admonition context
3184        let mut in_admonition = false;
3185        let mut admonition_indent = 0;
3186
3187        // Track tab context
3188        let mut in_tab = false;
3189        let mut tab_indent = 0;
3190
3191        // Track definition list context
3192        let mut in_definition = false;
3193
3194        for (i, line) in content_lines.iter().enumerate() {
3195            if i >= lines.len() {
3196                break;
3197            }
3198
3199            // Skip lines in code blocks
3200            if lines[i].in_code_block {
3201                continue;
3202            }
3203
3204            // Check for admonition markers
3205            if mkdocs_admonitions::is_admonition_start(line) {
3206                in_admonition = true;
3207                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3208                lines[i].in_admonition = true;
3209            } else if in_admonition {
3210                // Check if still in admonition content
3211                if line.trim().is_empty() {
3212                    // Blank lines are part of admonitions
3213                    lines[i].in_admonition = true;
3214                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3215                    lines[i].in_admonition = true;
3216                } else {
3217                    // End of admonition
3218                    in_admonition = false;
3219                    // Check if this line starts a new admonition
3220                    if mkdocs_admonitions::is_admonition_start(line) {
3221                        in_admonition = true;
3222                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3223                        lines[i].in_admonition = true;
3224                    }
3225                }
3226            }
3227
3228            // Check for tab markers
3229            if mkdocs_tabs::is_tab_marker(line) {
3230                in_tab = true;
3231                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3232                lines[i].in_content_tab = true;
3233            } else if in_tab {
3234                // Check if still in tab content
3235                if line.trim().is_empty() {
3236                    // Blank lines are part of tabs
3237                    lines[i].in_content_tab = true;
3238                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3239                    lines[i].in_content_tab = true;
3240                } else {
3241                    // End of tab content
3242                    in_tab = false;
3243                    // Check if this line starts a new tab
3244                    if mkdocs_tabs::is_tab_marker(line) {
3245                        in_tab = true;
3246                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3247                        lines[i].in_content_tab = true;
3248                    }
3249                }
3250            }
3251
3252            // Check for definition list items
3253            if mkdocs_definition_lists::is_definition_line(line) {
3254                in_definition = true;
3255                lines[i].in_definition_list = true;
3256            } else if in_definition {
3257                // Check if continuation
3258                if mkdocs_definition_lists::is_definition_continuation(line) {
3259                    lines[i].in_definition_list = true;
3260                } else if line.trim().is_empty() {
3261                    // Blank line might continue definition
3262                    lines[i].in_definition_list = true;
3263                } else if mkdocs_definition_lists::could_be_term_line(line) {
3264                    // This could be a new term - check if followed by definition
3265                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3266                    {
3267                        lines[i].in_definition_list = true;
3268                    } else {
3269                        in_definition = false;
3270                    }
3271                } else {
3272                    in_definition = false;
3273                }
3274            } else if mkdocs_definition_lists::could_be_term_line(line) {
3275                // Check if this is a term followed by a definition
3276                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3277                    lines[i].in_definition_list = true;
3278                    in_definition = true;
3279                }
3280            }
3281        }
3282    }
3283
3284    /// Helper to mark lines within a byte range
3285    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3286    where
3287        F: FnMut(&mut LineInfo),
3288    {
3289        // Find lines that overlap with the range
3290        for line in lines.iter_mut() {
3291            let line_start = line.byte_offset;
3292            let line_end = line.byte_offset + line.byte_len;
3293
3294            // Check if this line overlaps with the range
3295            if line_start < end && line_end > start {
3296                f(line);
3297            }
3298        }
3299
3300        // Silence unused warning for content (needed for signature consistency)
3301        let _ = content;
3302    }
3303
3304    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3305    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3306        // Quick check - if no backticks, no code spans
3307        if !content.contains('`') {
3308            return Vec::new();
3309        }
3310
3311        // Use pulldown-cmark's streaming parser with byte offsets
3312        let parser = Parser::new(content).into_offset_iter();
3313        let mut ranges = Vec::new();
3314
3315        for (event, range) in parser {
3316            if let Event::Code(_) = event {
3317                ranges.push((range.start, range.end));
3318            }
3319        }
3320
3321        Self::build_code_spans_from_ranges(content, lines, &ranges)
3322    }
3323
3324    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3325        let mut code_spans = Vec::new();
3326        if ranges.is_empty() {
3327            return code_spans;
3328        }
3329
3330        for &(start_pos, end_pos) in ranges {
3331            // The range includes the backticks, extract the actual content
3332            let full_span = &content[start_pos..end_pos];
3333            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3334
3335            // Extract content between backticks, preserving spaces
3336            let content_start = start_pos + backtick_count;
3337            let content_end = end_pos - backtick_count;
3338            let span_content = if content_start < content_end {
3339                content[content_start..content_end].to_string()
3340            } else {
3341                String::new()
3342            };
3343
3344            // Use binary search to find line number - O(log n) instead of O(n)
3345            // Find the rightmost line whose byte_offset <= start_pos
3346            let line_idx = lines
3347                .partition_point(|line| line.byte_offset <= start_pos)
3348                .saturating_sub(1);
3349            let line_num = line_idx + 1;
3350            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3351
3352            // Find end column using binary search
3353            let end_line_idx = lines
3354                .partition_point(|line| line.byte_offset <= end_pos)
3355                .saturating_sub(1);
3356            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3357
3358            // Convert byte offsets to character positions for correct Unicode handling
3359            // This ensures consistency with warning.column which uses character positions
3360            let line_content = lines[line_idx].content(content);
3361            let col_start = if byte_col_start <= line_content.len() {
3362                line_content[..byte_col_start].chars().count()
3363            } else {
3364                line_content.chars().count()
3365            };
3366
3367            let end_line_content = lines[end_line_idx].content(content);
3368            let col_end = if byte_col_end <= end_line_content.len() {
3369                end_line_content[..byte_col_end].chars().count()
3370            } else {
3371                end_line_content.chars().count()
3372            };
3373
3374            code_spans.push(CodeSpan {
3375                line: line_num,
3376                end_line: end_line_idx + 1,
3377                start_col: col_start,
3378                end_col: col_end,
3379                byte_offset: start_pos,
3380                byte_end: end_pos,
3381                backtick_count,
3382                content: span_content,
3383            });
3384        }
3385
3386        // Sort by position to ensure consistent ordering
3387        code_spans.sort_by_key(|span| span.byte_offset);
3388
3389        code_spans
3390    }
3391
3392    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3393    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3394        let mut math_spans = Vec::new();
3395
3396        // Quick check - if no $ signs, no math spans
3397        if !content.contains('$') {
3398            return math_spans;
3399        }
3400
3401        // Use pulldown-cmark with ENABLE_MATH option
3402        let mut options = Options::empty();
3403        options.insert(Options::ENABLE_MATH);
3404        let parser = Parser::new_ext(content, options).into_offset_iter();
3405
3406        for (event, range) in parser {
3407            let (is_display, math_content) = match &event {
3408                Event::InlineMath(text) => (false, text.as_ref()),
3409                Event::DisplayMath(text) => (true, text.as_ref()),
3410                _ => continue,
3411            };
3412
3413            let start_pos = range.start;
3414            let end_pos = range.end;
3415
3416            // Use binary search to find line number - O(log n) instead of O(n)
3417            let line_idx = lines
3418                .partition_point(|line| line.byte_offset <= start_pos)
3419                .saturating_sub(1);
3420            let line_num = line_idx + 1;
3421            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3422
3423            // Find end column using binary search
3424            let end_line_idx = lines
3425                .partition_point(|line| line.byte_offset <= end_pos)
3426                .saturating_sub(1);
3427            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3428
3429            // Convert byte offsets to character positions for correct Unicode handling
3430            let line_content = lines[line_idx].content(content);
3431            let col_start = if byte_col_start <= line_content.len() {
3432                line_content[..byte_col_start].chars().count()
3433            } else {
3434                line_content.chars().count()
3435            };
3436
3437            let end_line_content = lines[end_line_idx].content(content);
3438            let col_end = if byte_col_end <= end_line_content.len() {
3439                end_line_content[..byte_col_end].chars().count()
3440            } else {
3441                end_line_content.chars().count()
3442            };
3443
3444            math_spans.push(MathSpan {
3445                line: line_num,
3446                end_line: end_line_idx + 1,
3447                start_col: col_start,
3448                end_col: col_end,
3449                byte_offset: start_pos,
3450                byte_end: end_pos,
3451                is_display,
3452                content: math_content.to_string(),
3453            });
3454        }
3455
3456        // Sort by position to ensure consistent ordering
3457        math_spans.sort_by_key(|span| span.byte_offset);
3458
3459        math_spans
3460    }
3461
3462    /// Parse all list blocks in the content (legacy line-by-line approach)
3463    ///
3464    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3465    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3466    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3467    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3468    ///   treated as list continuation (based on the list marker width)
3469    ///
3470    /// When a new list item is encountered, we check if list-breaking content was seen
3471    /// since the last item. If so, we start a new list block.
3472    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3473        // Minimum indentation for unordered list continuation per CommonMark spec
3474        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3475
3476        /// Initialize or reset the forward-scanning tracking state.
3477        /// This helper eliminates code duplication across three initialization sites.
3478        #[inline]
3479        fn reset_tracking_state(
3480            list_item: &ListItemInfo,
3481            has_list_breaking_content: &mut bool,
3482            min_continuation: &mut usize,
3483        ) {
3484            *has_list_breaking_content = false;
3485            let marker_width = if list_item.is_ordered {
3486                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3487            } else {
3488                list_item.marker.len()
3489            };
3490            *min_continuation = if list_item.is_ordered {
3491                marker_width
3492            } else {
3493                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3494            };
3495        }
3496
3497        // Pre-size based on lines that could be list items
3498        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3499        let mut current_block: Option<ListBlock> = None;
3500        let mut last_list_item_line = 0;
3501        let mut current_indent_level = 0;
3502        let mut last_marker_width = 0;
3503
3504        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3505        let mut has_list_breaking_content_since_last_item = false;
3506        let mut min_continuation_for_tracking = 0;
3507
3508        for (line_idx, line_info) in lines.iter().enumerate() {
3509            let line_num = line_idx + 1;
3510
3511            // Enhanced code block handling using Design #3's context analysis
3512            if line_info.in_code_block {
3513                if let Some(ref mut block) = current_block {
3514                    // Calculate minimum indentation for list continuation
3515                    let min_continuation_indent =
3516                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3517
3518                    // Analyze code block context using the three-tier classification
3519                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3520
3521                    match context {
3522                        CodeBlockContext::Indented => {
3523                            // Code block is properly indented - continues the list
3524                            block.end_line = line_num;
3525                            continue;
3526                        }
3527                        CodeBlockContext::Standalone => {
3528                            // Code block separates lists - end current block
3529                            let completed_block = current_block.take().unwrap();
3530                            list_blocks.push(completed_block);
3531                            continue;
3532                        }
3533                        CodeBlockContext::Adjacent => {
3534                            // Edge case - use conservative behavior (continue list)
3535                            block.end_line = line_num;
3536                            continue;
3537                        }
3538                    }
3539                } else {
3540                    // No current list block - skip code block lines
3541                    continue;
3542                }
3543            }
3544
3545            // Extract blockquote prefix if any
3546            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3547                caps.get(0).unwrap().as_str().to_string()
3548            } else {
3549                String::new()
3550            };
3551
3552            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3553            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3554            if let Some(ref block) = current_block
3555                && line_info.list_item.is_none()
3556                && !line_info.is_blank
3557                && !line_info.in_code_span_continuation
3558            {
3559                let line_content = line_info.content(content).trim();
3560
3561                // Check for structural separators that break lists
3562                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3563                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3564                // as they indicate improper indentation rather than lazy continuation.
3565                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3566
3567                // Check if blockquote context changes (different prefix than current block)
3568                // Lines within the SAME blockquote context don't break lists
3569                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3570
3571                let breaks_list = line_info.heading.is_some()
3572                    || line_content.starts_with("---")
3573                    || line_content.starts_with("***")
3574                    || line_content.starts_with("___")
3575                    || crate::utils::skip_context::is_table_line(line_content)
3576                    || blockquote_prefix_changes
3577                    || (line_info.indent > 0
3578                        && line_info.indent < min_continuation_for_tracking
3579                        && !is_lazy_continuation);
3580
3581                if breaks_list {
3582                    has_list_breaking_content_since_last_item = true;
3583                }
3584            }
3585
3586            // If this line is a code span continuation within an active list block,
3587            // extend the block's end_line to include this line (maintains list continuity)
3588            if line_info.in_code_span_continuation
3589                && line_info.list_item.is_none()
3590                && let Some(ref mut block) = current_block
3591            {
3592                block.end_line = line_num;
3593            }
3594
3595            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3596            // properly indented lines within the list). This ensures the workaround at line 2448
3597            // works correctly when there are multiple continuation lines before a nested list item.
3598            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3599            // For blockquote lines, compute effective indent after stripping the prefix
3600            let effective_continuation_indent = if let Some(ref block) = current_block {
3601                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3602                let line_content = line_info.content(content);
3603                let line_bq_level = line_content
3604                    .chars()
3605                    .take_while(|c| *c == '>' || c.is_whitespace())
3606                    .filter(|&c| c == '>')
3607                    .count();
3608                if line_bq_level > 0 && line_bq_level == block_bq_level {
3609                    // Compute indent after blockquote markers
3610                    let mut pos = 0;
3611                    let mut found_markers = 0;
3612                    for c in line_content.chars() {
3613                        pos += c.len_utf8();
3614                        if c == '>' {
3615                            found_markers += 1;
3616                            if found_markers == line_bq_level {
3617                                if line_content.get(pos..pos + 1) == Some(" ") {
3618                                    pos += 1;
3619                                }
3620                                break;
3621                            }
3622                        }
3623                    }
3624                    let after_bq = &line_content[pos..];
3625                    after_bq.len() - after_bq.trim_start().len()
3626                } else {
3627                    line_info.indent
3628                }
3629            } else {
3630                line_info.indent
3631            };
3632            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3633                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3634                if block_bq_level > 0 {
3635                    if block.is_ordered { last_marker_width } else { 2 }
3636                } else {
3637                    min_continuation_for_tracking
3638                }
3639            } else {
3640                min_continuation_for_tracking
3641            };
3642            // Lazy continuation allows unindented text to continue a list item,
3643            // but NOT structural elements like headings, code fences, or horizontal rules
3644            let is_structural_element = line_info.heading.is_some()
3645                || line_info.content(content).trim().starts_with("```")
3646                || line_info.content(content).trim().starts_with("~~~");
3647            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3648                || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3649
3650            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3651                eprintln!(
3652                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3653                    line_num,
3654                    effective_continuation_indent,
3655                    adjusted_min_continuation_for_tracking,
3656                    is_valid_continuation,
3657                    line_info.in_code_span_continuation,
3658                    line_info.in_code_block,
3659                    current_block.is_some()
3660                );
3661            }
3662
3663            if !line_info.in_code_span_continuation
3664                && line_info.list_item.is_none()
3665                && !line_info.is_blank
3666                && !line_info.in_code_block
3667                && is_valid_continuation
3668                && let Some(ref mut block) = current_block
3669            {
3670                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3671                    eprintln!(
3672                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3673                        line_num, block.end_line, line_num
3674                    );
3675                }
3676                block.end_line = line_num;
3677            }
3678
3679            // Check if this line is a list item
3680            if let Some(list_item) = &line_info.list_item {
3681                // Calculate nesting level based on indentation
3682                let item_indent = list_item.marker_column;
3683                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3684
3685                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3686                    eprintln!(
3687                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3688                        line_num, list_item.marker, item_indent
3689                    );
3690                }
3691
3692                if let Some(ref mut block) = current_block {
3693                    // Check if this continues the current block
3694                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3695                    // or a continuation at the same or lower level
3696                    let is_nested = nesting > block.nesting_level;
3697                    let same_type =
3698                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3699                    let same_context = block.blockquote_prefix == blockquote_prefix;
3700                    // Allow one blank line after last item, or lines immediately after block content
3701                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3702
3703                    // For unordered lists, also check marker consistency
3704                    let marker_compatible =
3705                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3706
3707                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3708                    // This eliminates the quadratic bottleneck from issue #148
3709                    let has_non_list_content = has_list_breaking_content_since_last_item;
3710
3711                    // A list continues if:
3712                    // 1. It's a nested item (indented more than the parent), OR
3713                    // 2. It's the same type at the same level with reasonable distance
3714                    let mut continues_list = if is_nested {
3715                        // Nested items always continue the list if they're in the same context
3716                        same_context && reasonable_distance && !has_non_list_content
3717                    } else {
3718                        // Same-level items need to match type and markers
3719                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3720                    };
3721
3722                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3723                        eprintln!(
3724                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3725                            line_num,
3726                            continues_list,
3727                            is_nested,
3728                            same_type,
3729                            same_context,
3730                            reasonable_distance,
3731                            marker_compatible,
3732                            has_non_list_content,
3733                            last_list_item_line,
3734                            block.end_line
3735                        );
3736                    }
3737
3738                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3739                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3740                    // Apply for: nested items (different types OK), OR same-level same-type items
3741                    if !continues_list
3742                        && (is_nested || same_type)
3743                        && reasonable_distance
3744                        && line_num > 0
3745                        && block.end_line == line_num - 1
3746                    {
3747                        // Check if the previous line was a list item or a continuation of a list item
3748                        // (including lazy continuation lines)
3749                        if block.item_lines.contains(&(line_num - 1)) {
3750                            // They're consecutive list items - force them to be in the same list
3751                            continues_list = true;
3752                        } else {
3753                            // Previous line is a continuation line within this block
3754                            // (e.g., lazy continuation with indent=0)
3755                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3756                            continues_list = true;
3757                        }
3758                    }
3759
3760                    if continues_list {
3761                        // Extend current block
3762                        block.end_line = line_num;
3763                        block.item_lines.push(line_num);
3764
3765                        // Update max marker width
3766                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3767                            list_item.marker.len() + 1
3768                        } else {
3769                            list_item.marker.len()
3770                        });
3771
3772                        // Update marker consistency for unordered lists
3773                        if !block.is_ordered
3774                            && block.marker.is_some()
3775                            && block.marker.as_ref() != Some(&list_item.marker)
3776                        {
3777                            // Mixed markers, clear the marker field
3778                            block.marker = None;
3779                        }
3780
3781                        // Reset tracked state for issue #148 optimization
3782                        reset_tracking_state(
3783                            list_item,
3784                            &mut has_list_breaking_content_since_last_item,
3785                            &mut min_continuation_for_tracking,
3786                        );
3787                    } else {
3788                        // End current block and start a new one
3789                        // When a different list type starts AT THE SAME LEVEL (not nested),
3790                        // trim back lazy continuation lines (they become part of the gap, not the list)
3791                        // For nested items, different types are fine - they're sub-lists
3792                        if !same_type
3793                            && !is_nested
3794                            && let Some(&last_item) = block.item_lines.last()
3795                        {
3796                            block.end_line = last_item;
3797                        }
3798
3799                        list_blocks.push(block.clone());
3800
3801                        *block = ListBlock {
3802                            start_line: line_num,
3803                            end_line: line_num,
3804                            is_ordered: list_item.is_ordered,
3805                            marker: if list_item.is_ordered {
3806                                None
3807                            } else {
3808                                Some(list_item.marker.clone())
3809                            },
3810                            blockquote_prefix: blockquote_prefix.clone(),
3811                            item_lines: vec![line_num],
3812                            nesting_level: nesting,
3813                            max_marker_width: if list_item.is_ordered {
3814                                list_item.marker.len() + 1
3815                            } else {
3816                                list_item.marker.len()
3817                            },
3818                        };
3819
3820                        // Initialize tracked state for new block (issue #148 optimization)
3821                        reset_tracking_state(
3822                            list_item,
3823                            &mut has_list_breaking_content_since_last_item,
3824                            &mut min_continuation_for_tracking,
3825                        );
3826                    }
3827                } else {
3828                    // Start a new block
3829                    current_block = Some(ListBlock {
3830                        start_line: line_num,
3831                        end_line: line_num,
3832                        is_ordered: list_item.is_ordered,
3833                        marker: if list_item.is_ordered {
3834                            None
3835                        } else {
3836                            Some(list_item.marker.clone())
3837                        },
3838                        blockquote_prefix,
3839                        item_lines: vec![line_num],
3840                        nesting_level: nesting,
3841                        max_marker_width: list_item.marker.len(),
3842                    });
3843
3844                    // Initialize tracked state for new block (issue #148 optimization)
3845                    reset_tracking_state(
3846                        list_item,
3847                        &mut has_list_breaking_content_since_last_item,
3848                        &mut min_continuation_for_tracking,
3849                    );
3850                }
3851
3852                last_list_item_line = line_num;
3853                current_indent_level = item_indent;
3854                last_marker_width = if list_item.is_ordered {
3855                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3856                } else {
3857                    list_item.marker.len()
3858                };
3859            } else if let Some(ref mut block) = current_block {
3860                // Not a list item - check if it continues the current block
3861                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3862                    eprintln!(
3863                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3864                        line_num, line_info.is_blank
3865                    );
3866                }
3867
3868                // For MD032 compatibility, we use a simple approach:
3869                // - Indented lines continue the list
3870                // - Blank lines followed by indented content continue the list
3871                // - Everything else ends the list
3872
3873                // Check if the last line in the list block ended with a backslash (hard line break)
3874                // This handles cases where list items use backslash for hard line breaks
3875                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3876                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3877                } else {
3878                    false
3879                };
3880
3881                // Calculate minimum indentation for list continuation
3882                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3883                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3884                let min_continuation_indent = if block.is_ordered {
3885                    current_indent_level + last_marker_width
3886                } else {
3887                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3888                };
3889
3890                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3891                    // Indented line or backslash continuation continues the list
3892                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3893                        eprintln!(
3894                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3895                            line_num, line_info.indent, min_continuation_indent
3896                        );
3897                    }
3898                    block.end_line = line_num;
3899                } else if line_info.is_blank {
3900                    // Blank line - check if it's internal to the list or ending it
3901                    // We only include blank lines that are followed by more list content
3902                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3903                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3904                    }
3905                    let mut check_idx = line_idx + 1;
3906                    let mut found_continuation = false;
3907
3908                    // Skip additional blank lines
3909                    while check_idx < lines.len() && lines[check_idx].is_blank {
3910                        check_idx += 1;
3911                    }
3912
3913                    if check_idx < lines.len() {
3914                        let next_line = &lines[check_idx];
3915                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3916                        let next_content = next_line.content(content);
3917                        // Use blockquote level (count of >) to compare, not the full prefix
3918                        // This avoids issues where the regex captures extra whitespace
3919                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3920                        let next_bq_level_for_indent = next_content
3921                            .chars()
3922                            .take_while(|c| *c == '>' || c.is_whitespace())
3923                            .filter(|&c| c == '>')
3924                            .count();
3925                        let effective_indent =
3926                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3927                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3928                                // Find position after ">" and one space
3929                                let mut pos = 0;
3930                                let mut found_markers = 0;
3931                                for c in next_content.chars() {
3932                                    pos += c.len_utf8();
3933                                    if c == '>' {
3934                                        found_markers += 1;
3935                                        if found_markers == next_bq_level_for_indent {
3936                                            // Skip optional space after last >
3937                                            if next_content.get(pos..pos + 1) == Some(" ") {
3938                                                pos += 1;
3939                                            }
3940                                            break;
3941                                        }
3942                                    }
3943                                }
3944                                let after_blockquote_marker = &next_content[pos..];
3945                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3946                            } else {
3947                                next_line.indent
3948                            };
3949                        // Also adjust min_continuation_indent for blockquote lists
3950                        // The marker_column includes blockquote prefix, so subtract it
3951                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3952                            // For blockquote lists, the continuation is relative to blockquote content
3953                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3954                            if block.is_ordered { last_marker_width } else { 2 }
3955                        } else {
3956                            min_continuation_indent
3957                        };
3958                        // Check if followed by indented content (list continuation)
3959                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3960                            eprintln!(
3961                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3962                                line_num,
3963                                check_idx + 1,
3964                                effective_indent,
3965                                adjusted_min_continuation,
3966                                next_line.list_item.is_some(),
3967                                next_line.in_code_block
3968                            );
3969                        }
3970                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3971                            found_continuation = true;
3972                        }
3973                        // Check if followed by another list item at the same level
3974                        else if !next_line.in_code_block
3975                            && next_line.list_item.is_some()
3976                            && let Some(item) = &next_line.list_item
3977                        {
3978                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3979                                .find(next_line.content(content))
3980                                .map_or(String::new(), |m| m.as_str().to_string());
3981                            if item.marker_column == current_indent_level
3982                                && item.is_ordered == block.is_ordered
3983                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3984                            {
3985                                // Check if there was meaningful content between the list items (unused now)
3986                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3987                                // Pre-compute block's blockquote level for use in closures
3988                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3989                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3990                                    if let Some(between_line) = lines.get(idx) {
3991                                        let between_content = between_line.content(content);
3992                                        let trimmed = between_content.trim();
3993                                        // Skip empty lines
3994                                        if trimmed.is_empty() {
3995                                            return false;
3996                                        }
3997                                        // Check for meaningful content
3998                                        let line_indent = between_content.len() - between_content.trim_start().len();
3999
4000                                        // Check if blockquote level changed (not just if line starts with ">")
4001                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4002                                            .find(between_content)
4003                                            .map_or(String::new(), |m| m.as_str().to_string());
4004                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4005                                        let blockquote_level_changed =
4006                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4007
4008                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4009                                        if trimmed.starts_with("```")
4010                                            || trimmed.starts_with("~~~")
4011                                            || trimmed.starts_with("---")
4012                                            || trimmed.starts_with("***")
4013                                            || trimmed.starts_with("___")
4014                                            || blockquote_level_changed
4015                                            || crate::utils::skip_context::is_table_line(trimmed)
4016                                            || between_line.heading.is_some()
4017                                        {
4018                                            return true; // These are structural separators - meaningful content that breaks lists
4019                                        }
4020
4021                                        // Only properly indented content continues the list
4022                                        line_indent >= min_continuation_indent
4023                                    } else {
4024                                        false
4025                                    }
4026                                });
4027
4028                                if block.is_ordered {
4029                                    // For ordered lists: don't continue if there are structural separators
4030                                    // Check if there are structural separators between the list items
4031                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4032                                        if let Some(between_line) = lines.get(idx) {
4033                                            let between_content = between_line.content(content);
4034                                            let trimmed = between_content.trim();
4035                                            if trimmed.is_empty() {
4036                                                return false;
4037                                            }
4038                                            // Check if blockquote level changed (not just if line starts with ">")
4039                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4040                                                .find(between_content)
4041                                                .map_or(String::new(), |m| m.as_str().to_string());
4042                                            let between_bq_level =
4043                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4044                                            let blockquote_level_changed =
4045                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4046                                            // Check for structural separators that break lists
4047                                            trimmed.starts_with("```")
4048                                                || trimmed.starts_with("~~~")
4049                                                || trimmed.starts_with("---")
4050                                                || trimmed.starts_with("***")
4051                                                || trimmed.starts_with("___")
4052                                                || blockquote_level_changed
4053                                                || crate::utils::skip_context::is_table_line(trimmed)
4054                                                || between_line.heading.is_some()
4055                                        } else {
4056                                            false
4057                                        }
4058                                    });
4059                                    found_continuation = !has_structural_separators;
4060                                } else {
4061                                    // For unordered lists: also check for structural separators
4062                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4063                                        if let Some(between_line) = lines.get(idx) {
4064                                            let between_content = between_line.content(content);
4065                                            let trimmed = between_content.trim();
4066                                            if trimmed.is_empty() {
4067                                                return false;
4068                                            }
4069                                            // Check if blockquote level changed (not just if line starts with ">")
4070                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4071                                                .find(between_content)
4072                                                .map_or(String::new(), |m| m.as_str().to_string());
4073                                            let between_bq_level =
4074                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4075                                            let blockquote_level_changed =
4076                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4077                                            // Check for structural separators that break lists
4078                                            trimmed.starts_with("```")
4079                                                || trimmed.starts_with("~~~")
4080                                                || trimmed.starts_with("---")
4081                                                || trimmed.starts_with("***")
4082                                                || trimmed.starts_with("___")
4083                                                || blockquote_level_changed
4084                                                || crate::utils::skip_context::is_table_line(trimmed)
4085                                                || between_line.heading.is_some()
4086                                        } else {
4087                                            false
4088                                        }
4089                                    });
4090                                    found_continuation = !has_structural_separators;
4091                                }
4092                            }
4093                        }
4094                    }
4095
4096                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4097                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4098                    }
4099                    if found_continuation {
4100                        // Include the blank line in the block
4101                        block.end_line = line_num;
4102                    } else {
4103                        // Blank line ends the list - don't include it
4104                        list_blocks.push(block.clone());
4105                        current_block = None;
4106                    }
4107                } else {
4108                    // Check for lazy continuation - non-indented line immediately after a list item
4109                    // But only if the line has sufficient indentation for the list type
4110                    let min_required_indent = if block.is_ordered {
4111                        current_indent_level + last_marker_width
4112                    } else {
4113                        current_indent_level + 2
4114                    };
4115
4116                    // For lazy continuation to apply, the line must either:
4117                    // 1. Have no indentation (true lazy continuation)
4118                    // 2. Have sufficient indentation for the list type
4119                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4120                    let line_content = line_info.content(content).trim();
4121
4122                    // Check for table-like patterns
4123                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4124
4125                    // Check if blockquote level changed (not just if line starts with ">")
4126                    // Lines within the same blockquote level are NOT structural separators
4127                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4128                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4129                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4130
4131                    let is_structural_separator = line_info.heading.is_some()
4132                        || line_content.starts_with("```")
4133                        || line_content.starts_with("~~~")
4134                        || line_content.starts_with("---")
4135                        || line_content.starts_with("***")
4136                        || line_content.starts_with("___")
4137                        || blockquote_level_changed
4138                        || looks_like_table;
4139
4140                    // Allow lazy continuation if we're still within the same list block
4141                    // (not just immediately after a list item)
4142                    // Also treat code span continuations as valid continuations regardless of indent
4143                    let is_lazy_continuation = !is_structural_separator
4144                        && !line_info.is_blank
4145                        && (line_info.indent == 0
4146                            || line_info.indent >= min_required_indent
4147                            || line_info.in_code_span_continuation);
4148
4149                    if is_lazy_continuation {
4150                        // Per CommonMark, lazy continuation continues until a blank line
4151                        // or structural element, regardless of uppercase at line start
4152                        block.end_line = line_num;
4153                    } else {
4154                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4155                        list_blocks.push(block.clone());
4156                        current_block = None;
4157                    }
4158                }
4159            }
4160        }
4161
4162        // Don't forget the last block
4163        if let Some(block) = current_block {
4164            list_blocks.push(block);
4165        }
4166
4167        // Merge adjacent blocks that should be one
4168        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4169
4170        list_blocks
4171    }
4172
4173    /// Compute character frequency for fast content analysis
4174    fn compute_char_frequency(content: &str) -> CharFrequency {
4175        let mut frequency = CharFrequency::default();
4176
4177        for ch in content.chars() {
4178            match ch {
4179                '#' => frequency.hash_count += 1,
4180                '*' => frequency.asterisk_count += 1,
4181                '_' => frequency.underscore_count += 1,
4182                '-' => frequency.hyphen_count += 1,
4183                '+' => frequency.plus_count += 1,
4184                '>' => frequency.gt_count += 1,
4185                '|' => frequency.pipe_count += 1,
4186                '[' => frequency.bracket_count += 1,
4187                '`' => frequency.backtick_count += 1,
4188                '<' => frequency.lt_count += 1,
4189                '!' => frequency.exclamation_count += 1,
4190                '\n' => frequency.newline_count += 1,
4191                _ => {}
4192            }
4193        }
4194
4195        frequency
4196    }
4197
4198    /// Parse HTML tags in the content
4199    fn parse_html_tags(
4200        content: &str,
4201        lines: &[LineInfo],
4202        code_blocks: &[(usize, usize)],
4203        flavor: MarkdownFlavor,
4204    ) -> Vec<HtmlTag> {
4205        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4206            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4207
4208        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4209
4210        for cap in HTML_TAG_REGEX.captures_iter(content) {
4211            let full_match = cap.get(0).unwrap();
4212            let match_start = full_match.start();
4213            let match_end = full_match.end();
4214
4215            // Skip if in code block
4216            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4217                continue;
4218            }
4219
4220            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4221            let tag_name_original = cap.get(2).unwrap().as_str();
4222            let tag_name = tag_name_original.to_lowercase();
4223            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4224
4225            // Skip JSX components in MDX files (tags starting with uppercase letter)
4226            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4227            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4228                continue;
4229            }
4230
4231            // Find which line this tag is on
4232            let mut line_num = 1;
4233            let mut col_start = match_start;
4234            let mut col_end = match_end;
4235            for (idx, line_info) in lines.iter().enumerate() {
4236                if match_start >= line_info.byte_offset {
4237                    line_num = idx + 1;
4238                    col_start = match_start - line_info.byte_offset;
4239                    col_end = match_end - line_info.byte_offset;
4240                } else {
4241                    break;
4242                }
4243            }
4244
4245            html_tags.push(HtmlTag {
4246                line: line_num,
4247                start_col: col_start,
4248                end_col: col_end,
4249                byte_offset: match_start,
4250                byte_end: match_end,
4251                tag_name,
4252                is_closing,
4253                is_self_closing,
4254                raw_content: full_match.as_str().to_string(),
4255            });
4256        }
4257
4258        html_tags
4259    }
4260
4261    /// Parse table rows in the content
4262    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4263        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4264
4265        for (line_idx, line_info) in lines.iter().enumerate() {
4266            // Skip lines in code blocks or blank lines
4267            if line_info.in_code_block || line_info.is_blank {
4268                continue;
4269            }
4270
4271            let line = line_info.content(content);
4272            let line_num = line_idx + 1;
4273
4274            // Check if this line contains pipes (potential table row)
4275            if !line.contains('|') {
4276                continue;
4277            }
4278
4279            // Count columns by splitting on pipes
4280            let parts: Vec<&str> = line.split('|').collect();
4281            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4282
4283            // Check if this is a separator row
4284            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4285            let mut column_alignments = Vec::new();
4286
4287            if is_separator {
4288                for part in &parts[1..parts.len() - 1] {
4289                    // Skip first and last empty parts
4290                    let trimmed = part.trim();
4291                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4292                        "center".to_string()
4293                    } else if trimmed.ends_with(':') {
4294                        "right".to_string()
4295                    } else if trimmed.starts_with(':') {
4296                        "left".to_string()
4297                    } else {
4298                        "none".to_string()
4299                    };
4300                    column_alignments.push(alignment);
4301                }
4302            }
4303
4304            table_rows.push(TableRow {
4305                line: line_num,
4306                is_separator,
4307                column_count,
4308                column_alignments,
4309            });
4310        }
4311
4312        table_rows
4313    }
4314
4315    /// Parse bare URLs and emails in the content
4316    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4317        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4318
4319        // Check for bare URLs (not in angle brackets or markdown links)
4320        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4321            let full_match = cap.get(0).unwrap();
4322            let match_start = full_match.start();
4323            let match_end = full_match.end();
4324
4325            // Skip if in code block
4326            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4327                continue;
4328            }
4329
4330            // Skip if already in angle brackets or markdown links
4331            let preceding_char = if match_start > 0 {
4332                content.chars().nth(match_start - 1)
4333            } else {
4334                None
4335            };
4336            let following_char = content.chars().nth(match_end);
4337
4338            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4339                continue;
4340            }
4341            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4342                continue;
4343            }
4344
4345            let url = full_match.as_str();
4346            let url_type = if url.starts_with("https://") {
4347                "https"
4348            } else if url.starts_with("http://") {
4349                "http"
4350            } else if url.starts_with("ftp://") {
4351                "ftp"
4352            } else {
4353                "other"
4354            };
4355
4356            // Find which line this URL is on
4357            let mut line_num = 1;
4358            let mut col_start = match_start;
4359            let mut col_end = match_end;
4360            for (idx, line_info) in lines.iter().enumerate() {
4361                if match_start >= line_info.byte_offset {
4362                    line_num = idx + 1;
4363                    col_start = match_start - line_info.byte_offset;
4364                    col_end = match_end - line_info.byte_offset;
4365                } else {
4366                    break;
4367                }
4368            }
4369
4370            bare_urls.push(BareUrl {
4371                line: line_num,
4372                start_col: col_start,
4373                end_col: col_end,
4374                byte_offset: match_start,
4375                byte_end: match_end,
4376                url: url.to_string(),
4377                url_type: url_type.to_string(),
4378            });
4379        }
4380
4381        // Check for bare email addresses
4382        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4383            let full_match = cap.get(0).unwrap();
4384            let match_start = full_match.start();
4385            let match_end = full_match.end();
4386
4387            // Skip if in code block
4388            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4389                continue;
4390            }
4391
4392            // Skip if already in angle brackets or markdown links
4393            let preceding_char = if match_start > 0 {
4394                content.chars().nth(match_start - 1)
4395            } else {
4396                None
4397            };
4398            let following_char = content.chars().nth(match_end);
4399
4400            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4401                continue;
4402            }
4403            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4404                continue;
4405            }
4406
4407            let email = full_match.as_str();
4408
4409            // Find which line this email is on
4410            let mut line_num = 1;
4411            let mut col_start = match_start;
4412            let mut col_end = match_end;
4413            for (idx, line_info) in lines.iter().enumerate() {
4414                if match_start >= line_info.byte_offset {
4415                    line_num = idx + 1;
4416                    col_start = match_start - line_info.byte_offset;
4417                    col_end = match_end - line_info.byte_offset;
4418                } else {
4419                    break;
4420                }
4421            }
4422
4423            bare_urls.push(BareUrl {
4424                line: line_num,
4425                start_col: col_start,
4426                end_col: col_end,
4427                byte_offset: match_start,
4428                byte_end: match_end,
4429                url: email.to_string(),
4430                url_type: "email".to_string(),
4431            });
4432        }
4433
4434        bare_urls
4435    }
4436
4437    /// Get an iterator over valid CommonMark headings
4438    ///
4439    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4440    /// that should be flagged by MD018 but should not be processed by other heading rules.
4441    ///
4442    /// # Examples
4443    ///
4444    /// ```rust
4445    /// use rumdl_lib::lint_context::LintContext;
4446    /// use rumdl_lib::config::MarkdownFlavor;
4447    ///
4448    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4449    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4450    ///
4451    /// for heading in ctx.valid_headings() {
4452    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4453    /// }
4454    /// // Only prints valid headings, skips `#NoSpace`
4455    /// ```
4456    #[must_use]
4457    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4458        ValidHeadingsIter::new(&self.lines)
4459    }
4460
4461    /// Check if the document contains any valid CommonMark headings
4462    ///
4463    /// Returns `true` if there is at least one heading with proper space after `#`.
4464    #[must_use]
4465    pub fn has_valid_headings(&self) -> bool {
4466        self.lines
4467            .iter()
4468            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4469    }
4470}
4471
4472/// Merge adjacent list blocks that should be treated as one
4473fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4474    if list_blocks.len() < 2 {
4475        return;
4476    }
4477
4478    let mut merger = ListBlockMerger::new(content, lines);
4479    *list_blocks = merger.merge(list_blocks);
4480}
4481
4482/// Helper struct to manage the complex logic of merging list blocks
4483struct ListBlockMerger<'a> {
4484    content: &'a str,
4485    lines: &'a [LineInfo],
4486}
4487
4488impl<'a> ListBlockMerger<'a> {
4489    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4490        Self { content, lines }
4491    }
4492
4493    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4494        let mut merged = Vec::with_capacity(list_blocks.len());
4495        let mut current = list_blocks[0].clone();
4496
4497        for next in list_blocks.iter().skip(1) {
4498            if self.should_merge_blocks(&current, next) {
4499                current = self.merge_two_blocks(current, next);
4500            } else {
4501                merged.push(current);
4502                current = next.clone();
4503            }
4504        }
4505
4506        merged.push(current);
4507        merged
4508    }
4509
4510    /// Determine if two adjacent list blocks should be merged
4511    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4512        // Basic compatibility checks
4513        if !self.blocks_are_compatible(current, next) {
4514            return false;
4515        }
4516
4517        // Check spacing and content between blocks
4518        let spacing = self.analyze_spacing_between(current, next);
4519        match spacing {
4520            BlockSpacing::Consecutive => true,
4521            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4522            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4523                self.can_merge_with_content_between(current, next)
4524            }
4525        }
4526    }
4527
4528    /// Check if blocks have compatible structure for merging
4529    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4530        current.is_ordered == next.is_ordered
4531            && current.blockquote_prefix == next.blockquote_prefix
4532            && current.nesting_level == next.nesting_level
4533    }
4534
4535    /// Analyze the spacing between two list blocks
4536    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4537        let gap = next.start_line - current.end_line;
4538
4539        match gap {
4540            1 => BlockSpacing::Consecutive,
4541            2 => BlockSpacing::SingleBlank,
4542            _ if gap > 2 => {
4543                if self.has_only_blank_lines_between(current, next) {
4544                    BlockSpacing::MultipleBlanks
4545                } else {
4546                    BlockSpacing::ContentBetween
4547                }
4548            }
4549            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4550        }
4551    }
4552
4553    /// Check if unordered lists can be merged with a single blank line between
4554    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4555        // Check if there are structural separators between the blocks
4556        // If has_meaningful_content_between returns true, it means there are structural separators
4557        if has_meaningful_content_between(self.content, current, next, self.lines) {
4558            return false; // Structural separators prevent merging
4559        }
4560
4561        // Only merge unordered lists with same marker across single blank
4562        !current.is_ordered && current.marker == next.marker
4563    }
4564
4565    /// Check if ordered lists can be merged when there's content between them
4566    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4567        // Do not merge lists if there are structural separators between them
4568        if has_meaningful_content_between(self.content, current, next, self.lines) {
4569            return false; // Structural separators prevent merging
4570        }
4571
4572        // Only consider merging ordered lists if there's no structural content between
4573        current.is_ordered && next.is_ordered
4574    }
4575
4576    /// Check if there are only blank lines between blocks
4577    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4578        for line_num in (current.end_line + 1)..next.start_line {
4579            if let Some(line_info) = self.lines.get(line_num - 1)
4580                && !line_info.content(self.content).trim().is_empty()
4581            {
4582                return false;
4583            }
4584        }
4585        true
4586    }
4587
4588    /// Merge two compatible list blocks into one
4589    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4590        current.end_line = next.end_line;
4591        current.item_lines.extend_from_slice(&next.item_lines);
4592
4593        // Update max marker width
4594        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4595
4596        // Handle marker consistency for unordered lists
4597        if !current.is_ordered && self.markers_differ(&current, next) {
4598            current.marker = None; // Mixed markers
4599        }
4600
4601        current
4602    }
4603
4604    /// Check if two blocks have different markers
4605    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4606        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4607    }
4608}
4609
4610/// Types of spacing between list blocks
4611#[derive(Debug, PartialEq)]
4612enum BlockSpacing {
4613    Consecutive,    // No gap between blocks
4614    SingleBlank,    // One blank line between blocks
4615    MultipleBlanks, // Multiple blank lines but no content
4616    ContentBetween, // Content exists between blocks
4617}
4618
4619/// Check if there's meaningful content (not just blank lines) between two list blocks
4620fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4621    // Check lines between current.end_line and next.start_line
4622    for line_num in (current.end_line + 1)..next.start_line {
4623        if let Some(line_info) = lines.get(line_num - 1) {
4624            // Convert to 0-indexed
4625            let trimmed = line_info.content(content).trim();
4626
4627            // Skip empty lines
4628            if trimmed.is_empty() {
4629                continue;
4630            }
4631
4632            // Check for structural separators that should separate lists (CommonMark compliant)
4633
4634            // Headings separate lists
4635            if line_info.heading.is_some() {
4636                return true; // Has meaningful content - headings separate lists
4637            }
4638
4639            // Horizontal rules separate lists (---, ***, ___)
4640            if is_horizontal_rule(trimmed) {
4641                return true; // Has meaningful content - horizontal rules separate lists
4642            }
4643
4644            // Tables separate lists
4645            if crate::utils::skip_context::is_table_line(trimmed) {
4646                return true; // Has meaningful content - tables separate lists
4647            }
4648
4649            // Blockquotes separate lists
4650            if trimmed.starts_with('>') {
4651                return true; // Has meaningful content - blockquotes separate lists
4652            }
4653
4654            // Code block fences separate lists (unless properly indented as list content)
4655            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4656                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4657
4658                // Check if this code block is properly indented as list continuation
4659                let min_continuation_indent = if current.is_ordered {
4660                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4661                } else {
4662                    current.nesting_level + 2
4663                };
4664
4665                if line_indent < min_continuation_indent {
4666                    // This is a standalone code block that separates lists
4667                    return true; // Has meaningful content - standalone code blocks separate lists
4668                }
4669            }
4670
4671            // Check if this line has proper indentation for list continuation
4672            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4673
4674            // Calculate minimum indentation needed to be list continuation
4675            let min_indent = if current.is_ordered {
4676                current.nesting_level + current.max_marker_width
4677            } else {
4678                current.nesting_level + 2
4679            };
4680
4681            // If the line is not indented enough to be list continuation, it's meaningful content
4682            if line_indent < min_indent {
4683                return true; // Has meaningful content - content not indented as list continuation
4684            }
4685
4686            // If we reach here, the line is properly indented as list continuation
4687            // Continue checking other lines
4688        }
4689    }
4690
4691    // Only blank lines or properly indented list continuation content between blocks
4692    false
4693}
4694
4695/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4696/// CommonMark rules for thematic breaks (horizontal rules):
4697/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4698/// - Must have 3+ of the same character (-, *, or _)
4699/// - May have spaces between characters
4700/// - No other characters allowed
4701pub fn is_horizontal_rule_line(line: &str) -> bool {
4702    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4703    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4704    if leading_spaces > 3 || line.starts_with('\t') {
4705        return false;
4706    }
4707
4708    is_horizontal_rule_content(line.trim())
4709}
4710
4711/// Check if trimmed content matches horizontal rule pattern.
4712/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4713pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4714    if trimmed.len() < 3 {
4715        return false;
4716    }
4717
4718    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4719    let chars: Vec<char> = trimmed.chars().collect();
4720    if let Some(&first_char) = chars.first()
4721        && (first_char == '-' || first_char == '*' || first_char == '_')
4722    {
4723        let mut count = 0;
4724        for &ch in &chars {
4725            if ch == first_char {
4726                count += 1;
4727            } else if ch != ' ' && ch != '\t' {
4728                return false; // Non-matching, non-whitespace character
4729            }
4730        }
4731        return count >= 3;
4732    }
4733    false
4734}
4735
4736/// Backwards-compatible alias for `is_horizontal_rule_content`
4737pub fn is_horizontal_rule(trimmed: &str) -> bool {
4738    is_horizontal_rule_content(trimmed)
4739}
4740
4741/// Check if content contains patterns that cause the markdown crate to panic
4742#[cfg(test)]
4743mod tests {
4744    use super::*;
4745
4746    #[test]
4747    fn test_empty_content() {
4748        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4749        assert_eq!(ctx.content, "");
4750        assert_eq!(ctx.line_offsets, vec![0]);
4751        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4752        assert_eq!(ctx.lines.len(), 0);
4753    }
4754
4755    #[test]
4756    fn test_single_line() {
4757        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4758        assert_eq!(ctx.content, "# Hello");
4759        assert_eq!(ctx.line_offsets, vec![0]);
4760        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4761        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4762    }
4763
4764    #[test]
4765    fn test_multi_line() {
4766        let content = "# Title\n\nSecond line\nThird line";
4767        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4768        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4769        // Test offset to line/col
4770        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4771        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4772        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4773        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4774        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4775    }
4776
4777    #[test]
4778    fn test_line_info() {
4779        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4780        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4781
4782        // Test line info
4783        assert_eq!(ctx.lines.len(), 7);
4784
4785        // Line 1: "# Title"
4786        let line1 = &ctx.lines[0];
4787        assert_eq!(line1.content(ctx.content), "# Title");
4788        assert_eq!(line1.byte_offset, 0);
4789        assert_eq!(line1.indent, 0);
4790        assert!(!line1.is_blank);
4791        assert!(!line1.in_code_block);
4792        assert!(line1.list_item.is_none());
4793
4794        // Line 2: "    indented"
4795        let line2 = &ctx.lines[1];
4796        assert_eq!(line2.content(ctx.content), "    indented");
4797        assert_eq!(line2.byte_offset, 8);
4798        assert_eq!(line2.indent, 4);
4799        assert!(!line2.is_blank);
4800
4801        // Line 3: "" (blank)
4802        let line3 = &ctx.lines[2];
4803        assert_eq!(line3.content(ctx.content), "");
4804        assert!(line3.is_blank);
4805
4806        // Test helper methods
4807        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4808        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4809        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4810        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4811    }
4812
4813    #[test]
4814    fn test_list_item_detection() {
4815        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4816        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4817
4818        // Line 1: "- Unordered item"
4819        let line1 = &ctx.lines[0];
4820        assert!(line1.list_item.is_some());
4821        let list1 = line1.list_item.as_ref().unwrap();
4822        assert_eq!(list1.marker, "-");
4823        assert!(!list1.is_ordered);
4824        assert_eq!(list1.marker_column, 0);
4825        assert_eq!(list1.content_column, 2);
4826
4827        // Line 2: "  * Nested item"
4828        let line2 = &ctx.lines[1];
4829        assert!(line2.list_item.is_some());
4830        let list2 = line2.list_item.as_ref().unwrap();
4831        assert_eq!(list2.marker, "*");
4832        assert_eq!(list2.marker_column, 2);
4833
4834        // Line 3: "1. Ordered item"
4835        let line3 = &ctx.lines[2];
4836        assert!(line3.list_item.is_some());
4837        let list3 = line3.list_item.as_ref().unwrap();
4838        assert_eq!(list3.marker, "1.");
4839        assert!(list3.is_ordered);
4840        assert_eq!(list3.number, Some(1));
4841
4842        // Line 6: "Not a list"
4843        let line6 = &ctx.lines[5];
4844        assert!(line6.list_item.is_none());
4845    }
4846
4847    #[test]
4848    fn test_offset_to_line_col_edge_cases() {
4849        let content = "a\nb\nc";
4850        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4851        // line_offsets: [0, 2, 4]
4852        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4853        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4854        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4855        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4856        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4857        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4858    }
4859
4860    #[test]
4861    fn test_mdx_esm_blocks() {
4862        let content = r##"import {Chart} from './snowfall.js'
4863export const year = 2023
4864
4865# Last year's snowfall
4866
4867In {year}, the snowfall was above average.
4868It was followed by a warm spring which caused
4869flood conditions in many of the nearby rivers.
4870
4871<Chart color="#fcb32c" year={year} />
4872"##;
4873
4874        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4875
4876        // Check that lines 1 and 2 are marked as ESM blocks
4877        assert_eq!(ctx.lines.len(), 10);
4878        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4879        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4880        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4881        assert!(
4882            !ctx.lines[3].in_esm_block,
4883            "Line 4 (heading) should NOT be in_esm_block"
4884        );
4885        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4886        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4887    }
4888
4889    #[test]
4890    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4891        let content = r#"import {Chart} from './snowfall.js'
4892export const year = 2023
4893
4894# Last year's snowfall
4895"#;
4896
4897        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4898
4899        // ESM blocks should NOT be detected in Standard flavor
4900        assert!(
4901            !ctx.lines[0].in_esm_block,
4902            "Line 1 should NOT be in_esm_block in Standard flavor"
4903        );
4904        assert!(
4905            !ctx.lines[1].in_esm_block,
4906            "Line 2 should NOT be in_esm_block in Standard flavor"
4907        );
4908    }
4909
4910    #[test]
4911    fn test_blockquote_with_indented_content() {
4912        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4913        // The content inside the blockquote may also be detected as a code block (which is correct),
4914        // but for MD046 purposes, we need to know the line is inside a blockquote.
4915        let content = r#"# Heading
4916
4917>      -S socket-path
4918>                    More text
4919"#;
4920        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4921
4922        // Line 3 (index 2) should be detected as blockquote
4923        assert!(
4924            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4925            "Line 3 should be a blockquote"
4926        );
4927        // Line 4 (index 3) should also be blockquote
4928        assert!(
4929            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4930            "Line 4 should be a blockquote"
4931        );
4932
4933        // Verify blockquote content is correctly parsed
4934        // Note: spaces_after includes the spaces between `>` and content
4935        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4936        assert_eq!(bq3.content, "-S socket-path");
4937        assert_eq!(bq3.nesting_level, 1);
4938        // 6 spaces after the `>` marker
4939        assert!(bq3.has_multiple_spaces_after_marker);
4940
4941        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4942        assert_eq!(bq4.content, "More text");
4943        assert_eq!(bq4.nesting_level, 1);
4944    }
4945
4946    #[test]
4947    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4948        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4949        let content = r#"# Title
4950
4951A footnote[^1].
4952
4953[^1]: This is the footnote content.
4954
4955[^note]: Another footnote with [link](https://example.com).
4956
4957[regular]: ./path.md "A real reference definition"
4958"#;
4959        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4960
4961        // Should only have one reference definition (the regular one)
4962        assert_eq!(
4963            ctx.reference_defs.len(),
4964            1,
4965            "Footnotes should not be parsed as reference definitions"
4966        );
4967
4968        // The only reference def should be the regular one
4969        assert_eq!(ctx.reference_defs[0].id, "regular");
4970        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4971        assert_eq!(
4972            ctx.reference_defs[0].title,
4973            Some("A real reference definition".to_string())
4974        );
4975    }
4976
4977    #[test]
4978    fn test_footnote_with_inline_link_not_misidentified() {
4979        // Regression test for issue #286: footnote containing an inline link
4980        // was incorrectly parsed as a reference definition with URL "[link](url)"
4981        let content = r#"# Title
4982
4983A footnote[^1].
4984
4985[^1]: [link](https://www.google.com).
4986"#;
4987        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4988
4989        // Should have no reference definitions
4990        assert!(
4991            ctx.reference_defs.is_empty(),
4992            "Footnote with inline link should not create a reference definition"
4993        );
4994    }
4995
4996    #[test]
4997    fn test_various_footnote_formats_excluded() {
4998        // Test various footnote ID formats are all excluded
4999        let content = r#"[^1]: Numeric footnote
5000[^note]: Named footnote
5001[^a]: Single char footnote
5002[^long-footnote-name]: Long named footnote
5003[^123abc]: Mixed alphanumeric
5004
5005[ref1]: ./file1.md
5006[ref2]: ./file2.md
5007"#;
5008        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5009
5010        // Should only have the two regular reference definitions
5011        assert_eq!(
5012            ctx.reference_defs.len(),
5013            2,
5014            "Only regular reference definitions should be parsed"
5015        );
5016
5017        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5018        assert!(ids.contains(&"ref1"));
5019        assert!(ids.contains(&"ref2"));
5020        assert!(!ids.iter().any(|id| id.starts_with('^')));
5021    }
5022
5023    // =========================================================================
5024    // Tests for has_char and char_count methods
5025    // =========================================================================
5026
5027    #[test]
5028    fn test_has_char_tracked_characters() {
5029        // Test all 12 tracked characters
5030        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5031        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5032
5033        // All tracked characters should be detected
5034        assert!(ctx.has_char('#'), "Should detect hash");
5035        assert!(ctx.has_char('*'), "Should detect asterisk");
5036        assert!(ctx.has_char('_'), "Should detect underscore");
5037        assert!(ctx.has_char('-'), "Should detect hyphen");
5038        assert!(ctx.has_char('+'), "Should detect plus");
5039        assert!(ctx.has_char('>'), "Should detect gt");
5040        assert!(ctx.has_char('|'), "Should detect pipe");
5041        assert!(ctx.has_char('['), "Should detect bracket");
5042        assert!(ctx.has_char('`'), "Should detect backtick");
5043        assert!(ctx.has_char('<'), "Should detect lt");
5044        assert!(ctx.has_char('!'), "Should detect exclamation");
5045        assert!(ctx.has_char('\n'), "Should detect newline");
5046    }
5047
5048    #[test]
5049    fn test_has_char_absent_characters() {
5050        let content = "Simple text without special chars";
5051        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5052
5053        // None of the tracked characters should be present
5054        assert!(!ctx.has_char('#'), "Should not detect hash");
5055        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5056        assert!(!ctx.has_char('_'), "Should not detect underscore");
5057        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5058        assert!(!ctx.has_char('+'), "Should not detect plus");
5059        assert!(!ctx.has_char('>'), "Should not detect gt");
5060        assert!(!ctx.has_char('|'), "Should not detect pipe");
5061        assert!(!ctx.has_char('['), "Should not detect bracket");
5062        assert!(!ctx.has_char('`'), "Should not detect backtick");
5063        assert!(!ctx.has_char('<'), "Should not detect lt");
5064        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5065        // Note: single line content has no newlines
5066        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5067    }
5068
5069    #[test]
5070    fn test_has_char_fallback_for_untracked() {
5071        let content = "Text with @mention and $dollar and %percent";
5072        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5073
5074        // Untracked characters should fall back to content.contains()
5075        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5076        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5077        assert!(ctx.has_char('%'), "Should detect % via fallback");
5078        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5079    }
5080
5081    #[test]
5082    fn test_char_count_tracked_characters() {
5083        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5084        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5085
5086        // Count each tracked character
5087        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5088        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5089        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5090        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5091        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5092        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5093        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5094        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5095        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5096        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5097        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5098        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5099    }
5100
5101    #[test]
5102    fn test_char_count_zero_for_absent() {
5103        let content = "Plain text";
5104        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5105
5106        assert_eq!(ctx.char_count('#'), 0);
5107        assert_eq!(ctx.char_count('*'), 0);
5108        assert_eq!(ctx.char_count('_'), 0);
5109        assert_eq!(ctx.char_count('\n'), 0);
5110    }
5111
5112    #[test]
5113    fn test_char_count_fallback_for_untracked() {
5114        let content = "@@@ $$ %%%";
5115        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5116
5117        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5118        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5119        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5120        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5121    }
5122
5123    #[test]
5124    fn test_char_count_empty_content() {
5125        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5126
5127        assert_eq!(ctx.char_count('#'), 0);
5128        assert_eq!(ctx.char_count('*'), 0);
5129        assert_eq!(ctx.char_count('@'), 0);
5130        assert!(!ctx.has_char('#'));
5131        assert!(!ctx.has_char('@'));
5132    }
5133
5134    // =========================================================================
5135    // Tests for is_in_html_tag method
5136    // =========================================================================
5137
5138    #[test]
5139    fn test_is_in_html_tag_simple() {
5140        let content = "<div>content</div>";
5141        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5142
5143        // Inside opening tag
5144        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5145        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5146        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5147
5148        // Outside tag (in content)
5149        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5150        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5151
5152        // Inside closing tag
5153        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5154        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5155    }
5156
5157    #[test]
5158    fn test_is_in_html_tag_self_closing() {
5159        let content = "Text <br/> more text";
5160        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5161
5162        // Before tag
5163        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5164        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5165
5166        // Inside self-closing tag
5167        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5168        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5169        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5170
5171        // After tag
5172        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5173    }
5174
5175    #[test]
5176    fn test_is_in_html_tag_with_attributes() {
5177        let content = r#"<a href="url" class="link">text</a>"#;
5178        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5179
5180        // All positions inside opening tag with attributes
5181        assert!(ctx.is_in_html_tag(0), "Start of tag");
5182        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5183        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5184        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5185
5186        // Content between tags
5187        assert!(!ctx.is_in_html_tag(27), "Start of content");
5188        assert!(!ctx.is_in_html_tag(30), "End of content");
5189
5190        // Closing tag
5191        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5192    }
5193
5194    #[test]
5195    fn test_is_in_html_tag_multiline() {
5196        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5197        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5198
5199        // Opening tag spans multiple lines
5200        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5201        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5202        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5203
5204        // After closing > of opening tag
5205        let closing_bracket_pos = content.find(">\n").unwrap();
5206        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5207    }
5208
5209    #[test]
5210    fn test_is_in_html_tag_no_tags() {
5211        let content = "Plain text without any HTML";
5212        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5213
5214        // No position should be in an HTML tag
5215        for i in 0..content.len() {
5216            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5217        }
5218    }
5219
5220    // =========================================================================
5221    // Tests for is_in_jinja_range method
5222    // =========================================================================
5223
5224    #[test]
5225    fn test_is_in_jinja_range_expression() {
5226        let content = "Hello {{ name }}!";
5227        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5228
5229        // Before Jinja
5230        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5231        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5232
5233        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5234        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5235        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5236        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5237        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5238        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5239
5240        // After Jinja
5241        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5242    }
5243
5244    #[test]
5245    fn test_is_in_jinja_range_statement() {
5246        let content = "{% if condition %}content{% endif %}";
5247        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5248
5249        // Inside opening statement
5250        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5251        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5252        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5253
5254        // Content between
5255        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5256
5257        // Inside closing statement
5258        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5259        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5260    }
5261
5262    #[test]
5263    fn test_is_in_jinja_range_multiple() {
5264        let content = "{{ a }} and {{ b }}";
5265        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5266
5267        // First Jinja expression
5268        assert!(ctx.is_in_jinja_range(0));
5269        assert!(ctx.is_in_jinja_range(3));
5270        assert!(ctx.is_in_jinja_range(6));
5271
5272        // Between expressions
5273        assert!(!ctx.is_in_jinja_range(8));
5274        assert!(!ctx.is_in_jinja_range(11));
5275
5276        // Second Jinja expression
5277        assert!(ctx.is_in_jinja_range(12));
5278        assert!(ctx.is_in_jinja_range(15));
5279        assert!(ctx.is_in_jinja_range(18));
5280    }
5281
5282    #[test]
5283    fn test_is_in_jinja_range_no_jinja() {
5284        let content = "Plain text with single braces but not Jinja";
5285        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5286
5287        // No position should be in Jinja
5288        for i in 0..content.len() {
5289            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5290        }
5291    }
5292
5293    // =========================================================================
5294    // Tests for is_in_link_title method
5295    // =========================================================================
5296
5297    #[test]
5298    fn test_is_in_link_title_with_title() {
5299        let content = r#"[ref]: https://example.com "Title text"
5300
5301Some content."#;
5302        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5303
5304        // Verify we have a reference def with title
5305        assert_eq!(ctx.reference_defs.len(), 1);
5306        let def = &ctx.reference_defs[0];
5307        assert!(def.title_byte_start.is_some());
5308        assert!(def.title_byte_end.is_some());
5309
5310        let title_start = def.title_byte_start.unwrap();
5311        let title_end = def.title_byte_end.unwrap();
5312
5313        // Before title (in URL)
5314        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5315
5316        // Inside title
5317        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5318        assert!(
5319            ctx.is_in_link_title(title_start + 5),
5320            "Middle of title should be in title"
5321        );
5322        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5323
5324        // After title
5325        assert!(
5326            !ctx.is_in_link_title(title_end),
5327            "After title end should not be in title"
5328        );
5329    }
5330
5331    #[test]
5332    fn test_is_in_link_title_without_title() {
5333        let content = "[ref]: https://example.com\n\nSome content.";
5334        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5335
5336        // Reference def without title
5337        assert_eq!(ctx.reference_defs.len(), 1);
5338        let def = &ctx.reference_defs[0];
5339        assert!(def.title_byte_start.is_none());
5340        assert!(def.title_byte_end.is_none());
5341
5342        // No position should be in a title
5343        for i in 0..content.len() {
5344            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5345        }
5346    }
5347
5348    #[test]
5349    fn test_is_in_link_title_multiple_refs() {
5350        let content = r#"[ref1]: /url1 "Title One"
5351[ref2]: /url2
5352[ref3]: /url3 "Title Three"
5353"#;
5354        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5355
5356        // Should have 3 reference defs
5357        assert_eq!(ctx.reference_defs.len(), 3);
5358
5359        // ref1 has title
5360        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5361        assert!(ref1.title_byte_start.is_some());
5362
5363        // ref2 has no title
5364        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5365        assert!(ref2.title_byte_start.is_none());
5366
5367        // ref3 has title
5368        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5369        assert!(ref3.title_byte_start.is_some());
5370
5371        // Check positions in ref1's title
5372        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5373            assert!(ctx.is_in_link_title(start + 1));
5374            assert!(!ctx.is_in_link_title(end + 5));
5375        }
5376
5377        // Check positions in ref3's title
5378        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5379            assert!(ctx.is_in_link_title(start + 1));
5380        }
5381    }
5382
5383    #[test]
5384    fn test_is_in_link_title_single_quotes() {
5385        let content = "[ref]: /url 'Single quoted title'\n";
5386        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5387
5388        assert_eq!(ctx.reference_defs.len(), 1);
5389        let def = &ctx.reference_defs[0];
5390
5391        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5392            assert!(ctx.is_in_link_title(start));
5393            assert!(ctx.is_in_link_title(start + 5));
5394            assert!(!ctx.is_in_link_title(end));
5395        }
5396    }
5397
5398    #[test]
5399    fn test_is_in_link_title_parentheses() {
5400        // Note: The reference def parser may not support parenthesized titles
5401        // This test verifies the is_in_link_title method works when titles exist
5402        let content = "[ref]: /url (Parenthesized title)\n";
5403        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5404
5405        // Parser behavior: may or may not parse parenthesized titles
5406        // We test that is_in_link_title correctly reflects whatever was parsed
5407        if ctx.reference_defs.is_empty() {
5408            // Parser didn't recognize this as a reference def
5409            for i in 0..content.len() {
5410                assert!(!ctx.is_in_link_title(i));
5411            }
5412        } else {
5413            let def = &ctx.reference_defs[0];
5414            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5415                assert!(ctx.is_in_link_title(start));
5416                assert!(ctx.is_in_link_title(start + 5));
5417                assert!(!ctx.is_in_link_title(end));
5418            } else {
5419                // Title wasn't parsed, so no position should be in title
5420                for i in 0..content.len() {
5421                    assert!(!ctx.is_in_link_title(i));
5422                }
5423            }
5424        }
5425    }
5426
5427    #[test]
5428    fn test_is_in_link_title_no_refs() {
5429        let content = "Just plain text without any reference definitions.";
5430        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5431
5432        assert!(ctx.reference_defs.is_empty());
5433
5434        for i in 0..content.len() {
5435            assert!(!ctx.is_in_link_title(i));
5436        }
5437    }
5438
5439    // =========================================================================
5440    // Math span tests (Issue #289)
5441    // =========================================================================
5442
5443    #[test]
5444    fn test_math_spans_inline() {
5445        let content = "Text with inline math $[f](x)$ in it.";
5446        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5447
5448        let math_spans = ctx.math_spans();
5449        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5450
5451        let span = &math_spans[0];
5452        assert!(!span.is_display, "Should be inline math, not display");
5453        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5454    }
5455
5456    #[test]
5457    fn test_math_spans_display_single_line() {
5458        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5459        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5460
5461        let math_spans = ctx.math_spans();
5462        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5463
5464        let span = &math_spans[0];
5465        assert!(span.is_display, "Should be display math");
5466        assert!(
5467            span.content.contains("[x](\\zeta)"),
5468            "Content should contain the link-like pattern"
5469        );
5470    }
5471
5472    #[test]
5473    fn test_math_spans_display_multiline() {
5474        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5475        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5476
5477        let math_spans = ctx.math_spans();
5478        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5479
5480        let span = &math_spans[0];
5481        assert!(span.is_display, "Should be display math");
5482    }
5483
5484    #[test]
5485    fn test_is_in_math_span() {
5486        let content = "Text $[f](x)$ more text";
5487        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5488
5489        // Position inside the math span
5490        let math_start = content.find('$').unwrap();
5491        let math_end = content.rfind('$').unwrap() + 1;
5492
5493        assert!(
5494            ctx.is_in_math_span(math_start + 1),
5495            "Position inside math span should return true"
5496        );
5497        assert!(
5498            ctx.is_in_math_span(math_start + 3),
5499            "Position inside math span should return true"
5500        );
5501
5502        // Position outside the math span
5503        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5504        assert!(
5505            !ctx.is_in_math_span(math_end + 1),
5506            "Position after math span should return false"
5507        );
5508    }
5509
5510    #[test]
5511    fn test_math_spans_mixed_with_code() {
5512        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5513        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5514
5515        let math_spans = ctx.math_spans();
5516        let code_spans = ctx.code_spans();
5517
5518        assert_eq!(math_spans.len(), 1, "Should have one math span");
5519        assert_eq!(code_spans.len(), 1, "Should have one code span");
5520
5521        // Verify math span content
5522        assert_eq!(math_spans[0].content, "[f](x)");
5523        // Verify code span content
5524        assert_eq!(code_spans[0].content, "[g](y)");
5525    }
5526
5527    #[test]
5528    fn test_math_spans_no_math() {
5529        let content = "Regular text without any math at all.";
5530        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5531
5532        let math_spans = ctx.math_spans();
5533        assert!(math_spans.is_empty(), "Should have no math spans");
5534    }
5535
5536    #[test]
5537    fn test_math_spans_multiple() {
5538        let content = "First $a$ and second $b$ and display $$c$$";
5539        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5540
5541        let math_spans = ctx.math_spans();
5542        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5543
5544        // Two inline, one display
5545        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5546        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5547
5548        assert_eq!(inline_count, 2, "Should have two inline math spans");
5549        assert_eq!(display_count, 1, "Should have one display math span");
5550    }
5551
5552    #[test]
5553    fn test_is_in_math_span_boundary_positions() {
5554        // Test exact boundary positions: $[f](x)$
5555        // Byte positions:                0123456789
5556        let content = "$[f](x)$";
5557        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5558
5559        let math_spans = ctx.math_spans();
5560        assert_eq!(math_spans.len(), 1, "Should have one math span");
5561
5562        let span = &math_spans[0];
5563
5564        // Position at opening $ should be in span (byte 0)
5565        assert!(
5566            ctx.is_in_math_span(span.byte_offset),
5567            "Start position should be in span"
5568        );
5569
5570        // Position just inside should be in span
5571        assert!(
5572            ctx.is_in_math_span(span.byte_offset + 1),
5573            "Position after start should be in span"
5574        );
5575
5576        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5577        assert!(
5578            ctx.is_in_math_span(span.byte_end - 1),
5579            "Position at end-1 should be in span"
5580        );
5581
5582        // Position at byte_end should NOT be in span (exclusive end)
5583        assert!(
5584            !ctx.is_in_math_span(span.byte_end),
5585            "Position at byte_end should NOT be in span (exclusive)"
5586        );
5587    }
5588
5589    #[test]
5590    fn test_math_spans_at_document_start() {
5591        let content = "$x$ text";
5592        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5593
5594        let math_spans = ctx.math_spans();
5595        assert_eq!(math_spans.len(), 1);
5596        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5597    }
5598
5599    #[test]
5600    fn test_math_spans_at_document_end() {
5601        let content = "text $x$";
5602        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5603
5604        let math_spans = ctx.math_spans();
5605        assert_eq!(math_spans.len(), 1);
5606        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5607    }
5608
5609    #[test]
5610    fn test_math_spans_consecutive() {
5611        let content = "$a$$b$";
5612        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5613
5614        let math_spans = ctx.math_spans();
5615        // pulldown-cmark should parse these as separate spans
5616        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5617
5618        // All positions should be in some math span
5619        for i in 0..content.len() {
5620            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5621        }
5622    }
5623
5624    #[test]
5625    fn test_math_spans_currency_not_math() {
5626        // Unbalanced $ should not create math spans
5627        let content = "Price is $100";
5628        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5629
5630        let math_spans = ctx.math_spans();
5631        // pulldown-cmark requires balanced delimiters for math
5632        // $100 alone is not math
5633        assert!(
5634            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5635            "Unbalanced $ should not create math span containing 100"
5636        );
5637    }
5638
5639    // =========================================================================
5640    // Tests for O(1) reference definition lookups via HashMap
5641    // =========================================================================
5642
5643    #[test]
5644    fn test_reference_lookup_o1_basic() {
5645        let content = r#"[ref1]: /url1
5646[REF2]: /url2 "Title"
5647[Ref3]: /url3
5648
5649Use [link][ref1] and [link][REF2]."#;
5650        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5651
5652        // Verify we have 3 reference defs
5653        assert_eq!(ctx.reference_defs.len(), 3);
5654
5655        // Test get_reference_url with various cases
5656        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5657        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5658        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5659        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5660        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5661        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5662        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5663    }
5664
5665    #[test]
5666    fn test_reference_lookup_o1_get_reference_def() {
5667        let content = r#"[myref]: https://example.com "My Title"
5668"#;
5669        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5670
5671        // Test get_reference_def
5672        let def = ctx.get_reference_def("myref").expect("Should find myref");
5673        assert_eq!(def.url, "https://example.com");
5674        assert_eq!(def.title.as_deref(), Some("My Title"));
5675
5676        // Case insensitive
5677        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5678        assert_eq!(def2.url, "https://example.com");
5679
5680        // Non-existent
5681        assert!(ctx.get_reference_def("nonexistent").is_none());
5682    }
5683
5684    #[test]
5685    fn test_reference_lookup_o1_has_reference_def() {
5686        let content = r#"[foo]: /foo
5687[BAR]: /bar
5688"#;
5689        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5690
5691        // Test has_reference_def
5692        assert!(ctx.has_reference_def("foo"));
5693        assert!(ctx.has_reference_def("FOO")); // case insensitive
5694        assert!(ctx.has_reference_def("bar"));
5695        assert!(ctx.has_reference_def("Bar")); // case insensitive
5696        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5697    }
5698
5699    #[test]
5700    fn test_reference_lookup_o1_empty_content() {
5701        let content = "No references here.";
5702        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5703
5704        assert!(ctx.reference_defs.is_empty());
5705        assert_eq!(ctx.get_reference_url("anything"), None);
5706        assert!(ctx.get_reference_def("anything").is_none());
5707        assert!(!ctx.has_reference_def("anything"));
5708    }
5709
5710    #[test]
5711    fn test_reference_lookup_o1_special_characters_in_id() {
5712        let content = r#"[ref-with-dash]: /url1
5713[ref_with_underscore]: /url2
5714[ref.with.dots]: /url3
5715"#;
5716        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5717
5718        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5719        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5720        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5721    }
5722
5723    #[test]
5724    fn test_reference_lookup_o1_unicode_id() {
5725        let content = r#"[日本語]: /japanese
5726[émoji]: /emoji
5727"#;
5728        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5729
5730        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5731        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5732        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5733    }
5734}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs