rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14/// Macro for profiling sections - only active in non-WASM builds
15#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17    ($name:expr, $profile:expr, $code:expr) => {{
18        let start = std::time::Instant::now();
19        let result = $code;
20        if $profile {
21            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22        }
23        result
24    }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32// Comprehensive link pattern that captures both inline and reference links
33// Use (?s) flag to make . match newlines
34static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(
36        r#"(?sx)
37        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38        (?:
39            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
40            |
41            \[([^\]]*)\]      # Reference ID in group 6
42        )"#
43    ).unwrap()
44});
45
46// Image pattern (similar to links but with ! prefix)
47// Use (?s) flag to make . match newlines
48static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r#"(?sx)
51        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52        (?:
53            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
54            |
55            \[([^\]]*)\]      # Reference ID in group 6
56        )"#
57    ).unwrap()
58});
59
60// Reference definition pattern
61static REF_DEF_PATTERN: LazyLock<Regex> =
62    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64// Pattern for bare URLs - uses centralized URL pattern from regex_cache
65
66// Pattern for email addresses
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70// Pattern for blockquote prefix in parse_list_blocks
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73/// Pre-computed information about a line
74#[derive(Debug, Clone)]
75pub struct LineInfo {
76    /// Byte offset where this line starts in the document
77    pub byte_offset: usize,
78    /// Length of the line in bytes (without newline)
79    pub byte_len: usize,
80    /// Number of bytes of leading whitespace (for substring extraction)
81    pub indent: usize,
82    /// Visual column width of leading whitespace (with proper tab expansion)
83    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
84    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
85    pub visual_indent: usize,
86    /// Whether the line is blank (empty or only whitespace)
87    pub is_blank: bool,
88    /// Whether this line is inside a code block
89    pub in_code_block: bool,
90    /// Whether this line is inside front matter
91    pub in_front_matter: bool,
92    /// Whether this line is inside an HTML block
93    pub in_html_block: bool,
94    /// Whether this line is inside an HTML comment
95    pub in_html_comment: bool,
96    /// List item information if this line starts a list item
97    pub list_item: Option<ListItemInfo>,
98    /// Heading information if this line is a heading
99    pub heading: Option<HeadingInfo>,
100    /// Blockquote information if this line is a blockquote
101    pub blockquote: Option<BlockquoteInfo>,
102    /// Whether this line is inside a mkdocstrings autodoc block
103    pub in_mkdocstrings: bool,
104    /// Whether this line is part of an ESM import/export block (MDX only)
105    pub in_esm_block: bool,
106    /// Whether this line is a continuation of a multi-line code span from a previous line
107    pub in_code_span_continuation: bool,
108    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
109    /// Pre-computed for consistent detection across all rules
110    pub is_horizontal_rule: bool,
111    /// Whether this line is inside a math block ($$ ... $$)
112    pub in_math_block: bool,
113    /// Whether this line is inside a Quarto div block (::: ... :::)
114    pub in_quarto_div: bool,
115    /// Whether this line contains or is inside a JSX expression (MDX only)
116    pub in_jsx_expression: bool,
117    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
118    pub in_mdx_comment: bool,
119    /// Whether this line is inside a JSX component (MDX only)
120    pub in_jsx_component: bool,
121    /// Whether this line is inside a JSX fragment (MDX only)
122    pub in_jsx_fragment: bool,
123    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
124    pub in_admonition: bool,
125    /// Whether this line is inside an MkDocs content tab block (===)
126    pub in_content_tab: bool,
127    /// Whether this line is a definition list item (: definition)
128    pub in_definition_list: bool,
129}
130
131impl LineInfo {
132    /// Get the line content as a string slice from the source document
133    pub fn content<'a>(&self, source: &'a str) -> &'a str {
134        &source[self.byte_offset..self.byte_offset + self.byte_len]
135    }
136
137    /// Check if this line is inside MkDocs-specific indented content (admonitions or tabs).
138    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
139    /// but in MkDocs flavor it's actually container content that should be preserved.
140    #[inline]
141    pub fn in_mkdocs_container(&self) -> bool {
142        self.in_admonition || self.in_content_tab
143    }
144}
145
146/// Information about a list item
147#[derive(Debug, Clone)]
148pub struct ListItemInfo {
149    /// The marker used (*, -, +, or number with . or ))
150    pub marker: String,
151    /// Whether it's ordered (true) or unordered (false)
152    pub is_ordered: bool,
153    /// The number for ordered lists
154    pub number: Option<usize>,
155    /// Column where the marker starts (0-based)
156    pub marker_column: usize,
157    /// Column where content after marker starts
158    pub content_column: usize,
159}
160
161/// Heading style type
162#[derive(Debug, Clone, PartialEq)]
163pub enum HeadingStyle {
164    /// ATX style heading (# Heading)
165    ATX,
166    /// Setext style heading with = underline
167    Setext1,
168    /// Setext style heading with - underline
169    Setext2,
170}
171
172/// Parsed link information
173#[derive(Debug, Clone)]
174pub struct ParsedLink<'a> {
175    /// Line number (1-indexed)
176    pub line: usize,
177    /// Start column (0-indexed) in the line
178    pub start_col: usize,
179    /// End column (0-indexed) in the line
180    pub end_col: usize,
181    /// Byte offset in document
182    pub byte_offset: usize,
183    /// End byte offset in document
184    pub byte_end: usize,
185    /// Link text
186    pub text: Cow<'a, str>,
187    /// Link URL or reference
188    pub url: Cow<'a, str>,
189    /// Whether this is a reference link [text][ref] vs inline [text](url)
190    pub is_reference: bool,
191    /// Reference ID for reference links
192    pub reference_id: Option<Cow<'a, str>>,
193    /// Link type from pulldown-cmark
194    pub link_type: LinkType,
195}
196
197/// Information about a broken link reported by pulldown-cmark
198#[derive(Debug, Clone)]
199pub struct BrokenLinkInfo {
200    /// The reference text that couldn't be resolved
201    pub reference: String,
202    /// Byte span in the source document
203    pub span: std::ops::Range<usize>,
204}
205
206/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
207#[derive(Debug, Clone)]
208pub struct FootnoteRef {
209    /// The footnote ID (without the ^ prefix)
210    pub id: String,
211    /// Line number (1-indexed)
212    pub line: usize,
213    /// Start byte offset in document
214    pub byte_offset: usize,
215    /// End byte offset in document
216    pub byte_end: usize,
217}
218
219/// Parsed image information
220#[derive(Debug, Clone)]
221pub struct ParsedImage<'a> {
222    /// Line number (1-indexed)
223    pub line: usize,
224    /// Start column (0-indexed) in the line
225    pub start_col: usize,
226    /// End column (0-indexed) in the line
227    pub end_col: usize,
228    /// Byte offset in document
229    pub byte_offset: usize,
230    /// End byte offset in document
231    pub byte_end: usize,
232    /// Alt text
233    pub alt_text: Cow<'a, str>,
234    /// Image URL or reference
235    pub url: Cow<'a, str>,
236    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
237    pub is_reference: bool,
238    /// Reference ID for reference images
239    pub reference_id: Option<Cow<'a, str>>,
240    /// Link type from pulldown-cmark
241    pub link_type: LinkType,
242}
243
244/// Reference definition [ref]: url "title"
245#[derive(Debug, Clone)]
246pub struct ReferenceDef {
247    /// Line number (1-indexed)
248    pub line: usize,
249    /// Reference ID (normalized to lowercase)
250    pub id: String,
251    /// URL
252    pub url: String,
253    /// Optional title
254    pub title: Option<String>,
255    /// Byte offset where the reference definition starts
256    pub byte_offset: usize,
257    /// Byte offset where the reference definition ends
258    pub byte_end: usize,
259    /// Byte offset where the title starts (if present, includes quote)
260    pub title_byte_start: Option<usize>,
261    /// Byte offset where the title ends (if present, includes quote)
262    pub title_byte_end: Option<usize>,
263}
264
265/// Parsed code span information
266#[derive(Debug, Clone)]
267pub struct CodeSpan {
268    /// Line number where the code span starts (1-indexed)
269    pub line: usize,
270    /// Line number where the code span ends (1-indexed)
271    pub end_line: usize,
272    /// Start column (0-indexed) in the line
273    pub start_col: usize,
274    /// End column (0-indexed) in the line
275    pub end_col: usize,
276    /// Byte offset in document
277    pub byte_offset: usize,
278    /// End byte offset in document
279    pub byte_end: usize,
280    /// Number of backticks used (1, 2, 3, etc.)
281    pub backtick_count: usize,
282    /// Content inside the code span (without backticks)
283    pub content: String,
284}
285
286/// Parsed math span information (inline $...$ or display $$...$$)
287#[derive(Debug, Clone)]
288pub struct MathSpan {
289    /// Line number where the math span starts (1-indexed)
290    pub line: usize,
291    /// Line number where the math span ends (1-indexed)
292    pub end_line: usize,
293    /// Start column (0-indexed) in the line
294    pub start_col: usize,
295    /// End column (0-indexed) in the line
296    pub end_col: usize,
297    /// Byte offset in document
298    pub byte_offset: usize,
299    /// End byte offset in document
300    pub byte_end: usize,
301    /// Whether this is display math ($$...$$) vs inline ($...$)
302    pub is_display: bool,
303    /// Content inside the math delimiters
304    pub content: String,
305}
306
307/// Information about a heading
308#[derive(Debug, Clone)]
309pub struct HeadingInfo {
310    /// Heading level (1-6 for ATX, 1-2 for Setext)
311    pub level: u8,
312    /// Style of heading
313    pub style: HeadingStyle,
314    /// The heading marker (# characters or underline)
315    pub marker: String,
316    /// Column where the marker starts (0-based)
317    pub marker_column: usize,
318    /// Column where heading text starts
319    pub content_column: usize,
320    /// The heading text (without markers and without custom ID syntax)
321    pub text: String,
322    /// Custom header ID if present (e.g., from {#custom-id} syntax)
323    pub custom_id: Option<String>,
324    /// Original heading text including custom ID syntax
325    pub raw_text: String,
326    /// Whether it has a closing sequence (for ATX)
327    pub has_closing_sequence: bool,
328    /// The closing sequence if present
329    pub closing_sequence: String,
330    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
331    /// False for malformed headings like `#NoSpace` that MD018 should flag
332    pub is_valid: bool,
333}
334
335/// A valid heading from a filtered iteration
336///
337/// Only includes headings that are CommonMark-compliant (have space after #).
338/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
339#[derive(Debug, Clone)]
340pub struct ValidHeading<'a> {
341    /// The 1-indexed line number in the document
342    pub line_num: usize,
343    /// Reference to the heading information
344    pub heading: &'a HeadingInfo,
345    /// Reference to the full line info (for rules that need additional context)
346    pub line_info: &'a LineInfo,
347}
348
349/// Iterator over valid CommonMark headings in a document
350///
351/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
352/// but should not be processed by other heading rules.
353pub struct ValidHeadingsIter<'a> {
354    lines: &'a [LineInfo],
355    current_index: usize,
356}
357
358impl<'a> ValidHeadingsIter<'a> {
359    fn new(lines: &'a [LineInfo]) -> Self {
360        Self {
361            lines,
362            current_index: 0,
363        }
364    }
365}
366
367impl<'a> Iterator for ValidHeadingsIter<'a> {
368    type Item = ValidHeading<'a>;
369
370    fn next(&mut self) -> Option<Self::Item> {
371        while self.current_index < self.lines.len() {
372            let idx = self.current_index;
373            self.current_index += 1;
374
375            let line_info = &self.lines[idx];
376            if let Some(heading) = &line_info.heading
377                && heading.is_valid
378            {
379                return Some(ValidHeading {
380                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
381                    heading,
382                    line_info,
383                });
384            }
385        }
386        None
387    }
388}
389
390/// Information about a blockquote line
391#[derive(Debug, Clone)]
392pub struct BlockquoteInfo {
393    /// Nesting level (1 for >, 2 for >>, etc.)
394    pub nesting_level: usize,
395    /// The indentation before the blockquote marker
396    pub indent: String,
397    /// Column where the first > starts (0-based)
398    pub marker_column: usize,
399    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
400    pub prefix: String,
401    /// Content after the blockquote marker(s)
402    pub content: String,
403    /// Whether the line has no space after the marker
404    pub has_no_space_after_marker: bool,
405    /// Whether the line has multiple spaces after the marker
406    pub has_multiple_spaces_after_marker: bool,
407    /// Whether this is an empty blockquote line needing MD028 fix
408    pub needs_md028_fix: bool,
409}
410
411/// Information about a list block
412#[derive(Debug, Clone)]
413pub struct ListBlock {
414    /// Line number where the list starts (1-indexed)
415    pub start_line: usize,
416    /// Line number where the list ends (1-indexed)
417    pub end_line: usize,
418    /// Whether it's ordered or unordered
419    pub is_ordered: bool,
420    /// The consistent marker for unordered lists (if any)
421    pub marker: Option<String>,
422    /// Blockquote prefix for this list (empty if not in blockquote)
423    pub blockquote_prefix: String,
424    /// Lines that are list items within this block
425    pub item_lines: Vec<usize>,
426    /// Nesting level (0 for top-level lists)
427    pub nesting_level: usize,
428    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
429    pub max_marker_width: usize,
430}
431
432use std::sync::{Arc, OnceLock};
433
434/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
435type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
436
437/// Type alias for byte ranges used in JSX expression and MDX comment detection
438type ByteRanges = Vec<(usize, usize)>;
439
440/// Character frequency data for fast content analysis
441#[derive(Debug, Clone, Default)]
442pub struct CharFrequency {
443    /// Count of # characters (headings)
444    pub hash_count: usize,
445    /// Count of * characters (emphasis, lists, horizontal rules)
446    pub asterisk_count: usize,
447    /// Count of _ characters (emphasis, horizontal rules)
448    pub underscore_count: usize,
449    /// Count of - characters (lists, horizontal rules, setext headings)
450    pub hyphen_count: usize,
451    /// Count of + characters (lists)
452    pub plus_count: usize,
453    /// Count of > characters (blockquotes)
454    pub gt_count: usize,
455    /// Count of | characters (tables)
456    pub pipe_count: usize,
457    /// Count of [ characters (links, images)
458    pub bracket_count: usize,
459    /// Count of ` characters (code spans, code blocks)
460    pub backtick_count: usize,
461    /// Count of < characters (HTML tags, autolinks)
462    pub lt_count: usize,
463    /// Count of ! characters (images)
464    pub exclamation_count: usize,
465    /// Count of newline characters
466    pub newline_count: usize,
467}
468
469/// Pre-parsed HTML tag information
470#[derive(Debug, Clone)]
471pub struct HtmlTag {
472    /// Line number (1-indexed)
473    pub line: usize,
474    /// Start column (0-indexed) in the line
475    pub start_col: usize,
476    /// End column (0-indexed) in the line
477    pub end_col: usize,
478    /// Byte offset in document
479    pub byte_offset: usize,
480    /// End byte offset in document
481    pub byte_end: usize,
482    /// Tag name (e.g., "div", "img", "br")
483    pub tag_name: String,
484    /// Whether it's a closing tag (`</tag>`)
485    pub is_closing: bool,
486    /// Whether it's self-closing (`<tag />`)
487    pub is_self_closing: bool,
488    /// Raw tag content
489    pub raw_content: String,
490}
491
492/// Pre-parsed emphasis span information
493#[derive(Debug, Clone)]
494pub struct EmphasisSpan {
495    /// Line number (1-indexed)
496    pub line: usize,
497    /// Start column (0-indexed) in the line
498    pub start_col: usize,
499    /// End column (0-indexed) in the line
500    pub end_col: usize,
501    /// Byte offset in document
502    pub byte_offset: usize,
503    /// End byte offset in document
504    pub byte_end: usize,
505    /// Type of emphasis ('*' or '_')
506    pub marker: char,
507    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
508    pub marker_count: usize,
509    /// Content inside the emphasis
510    pub content: String,
511}
512
513/// Pre-parsed table row information
514#[derive(Debug, Clone)]
515pub struct TableRow {
516    /// Line number (1-indexed)
517    pub line: usize,
518    /// Whether this is a separator row (contains only |, -, :, and spaces)
519    pub is_separator: bool,
520    /// Number of columns (pipe-separated cells)
521    pub column_count: usize,
522    /// Alignment info from separator row
523    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
524}
525
526/// Pre-parsed bare URL information (not in links)
527#[derive(Debug, Clone)]
528pub struct BareUrl {
529    /// Line number (1-indexed)
530    pub line: usize,
531    /// Start column (0-indexed) in the line
532    pub start_col: usize,
533    /// End column (0-indexed) in the line
534    pub end_col: usize,
535    /// Byte offset in document
536    pub byte_offset: usize,
537    /// End byte offset in document
538    pub byte_end: usize,
539    /// The URL string
540    pub url: String,
541    /// Type of URL ("http", "https", "ftp", "email")
542    pub url_type: String,
543}
544
545pub struct LintContext<'a> {
546    pub content: &'a str,
547    pub line_offsets: Vec<usize>,
548    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
549    pub lines: Vec<LineInfo>,             // Pre-computed line information
550    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
551    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
552    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
553    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
554    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
555    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
556    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
557    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
558    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
559    pub char_frequency: CharFrequency,    // Character frequency analysis
560    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
561    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
562    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
563    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
564    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
565    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
566    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
567    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
568    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
569    pub flavor: MarkdownFlavor,           // Markdown flavor being used
570    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
571    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
572    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
573    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
574    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
575    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
576}
577
578/// Detailed blockquote parse result with all components
579struct BlockquoteComponents<'a> {
580    indent: &'a str,
581    markers: &'a str,
582    spaces_after: &'a str,
583    content: &'a str,
584}
585
586/// Parse blockquote prefix with detailed components using manual parsing
587#[inline]
588fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
589    let bytes = line.as_bytes();
590    let mut pos = 0;
591
592    // Parse leading whitespace (indent)
593    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
594        pos += 1;
595    }
596    let indent_end = pos;
597
598    // Must have at least one '>' marker
599    if pos >= bytes.len() || bytes[pos] != b'>' {
600        return None;
601    }
602
603    // Parse '>' markers
604    while pos < bytes.len() && bytes[pos] == b'>' {
605        pos += 1;
606    }
607    let markers_end = pos;
608
609    // Parse spaces after markers
610    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
611        pos += 1;
612    }
613    let spaces_end = pos;
614
615    Some(BlockquoteComponents {
616        indent: &line[0..indent_end],
617        markers: &line[indent_end..markers_end],
618        spaces_after: &line[markers_end..spaces_end],
619        content: &line[spaces_end..],
620    })
621}
622
623impl<'a> LintContext<'a> {
624    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
625        #[cfg(not(target_arch = "wasm32"))]
626        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
627        #[cfg(target_arch = "wasm32")]
628        let profile = false;
629
630        let line_offsets = profile_section!("Line offsets", profile, {
631            let mut offsets = vec![0];
632            for (i, c) in content.char_indices() {
633                if c == '\n' {
634                    offsets.push(i + 1);
635                }
636            }
637            offsets
638        });
639
640        // Detect code blocks and code spans once and cache them
641        let (code_blocks, code_span_ranges) = profile_section!(
642            "Code blocks",
643            profile,
644            CodeBlockUtils::detect_code_blocks_and_spans(content)
645        );
646
647        // Pre-compute HTML comment ranges ONCE for all operations
648        let html_comment_ranges = profile_section!(
649            "HTML comment ranges",
650            profile,
651            crate::utils::skip_context::compute_html_comment_ranges(content)
652        );
653
654        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
655        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
656            if flavor == MarkdownFlavor::MkDocs {
657                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
658            } else {
659                Vec::new()
660            }
661        });
662
663        // Pre-compute Quarto div block ranges for Quarto flavor
664        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
665            if flavor == MarkdownFlavor::Quarto {
666                crate::utils::quarto_divs::detect_div_block_ranges(content)
667            } else {
668                Vec::new()
669            }
670        });
671
672        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
673        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
674        let (mut lines, emphasis_spans) = profile_section!(
675            "Basic line info",
676            profile,
677            Self::compute_basic_line_info(
678                content,
679                &line_offsets,
680                &code_blocks,
681                flavor,
682                &html_comment_ranges,
683                &autodoc_ranges,
684                &quarto_div_ranges,
685            )
686        );
687
688        // Detect HTML blocks BEFORE heading detection
689        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
690
691        // Detect ESM import/export blocks in MDX files BEFORE heading detection
692        profile_section!(
693            "ESM blocks",
694            profile,
695            Self::detect_esm_blocks(content, &mut lines, flavor)
696        );
697
698        // Detect JSX expressions and MDX comments in MDX files
699        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
700            "JSX/MDX detection",
701            profile,
702            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
703        );
704
705        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
706        profile_section!(
707            "MkDocs constructs",
708            profile,
709            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
710        );
711
712        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
713        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
714
715        // Now detect headings and blockquotes
716        profile_section!(
717            "Headings & blockquotes",
718            profile,
719            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
720        );
721
722        // Parse code spans early so we can exclude them from link/image parsing
723        let code_spans = profile_section!(
724            "Code spans",
725            profile,
726            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
727        );
728
729        // Mark lines that are continuations of multi-line code spans
730        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
731        for span in &code_spans {
732            if span.end_line > span.line {
733                // Mark lines after the first line as continuations
734                for line_num in (span.line + 1)..=span.end_line {
735                    if let Some(line_info) = lines.get_mut(line_num - 1) {
736                        line_info.in_code_span_continuation = true;
737                    }
738                }
739            }
740        }
741
742        // Parse links, images, references, and list blocks
743        let (links, broken_links, footnote_refs) = profile_section!(
744            "Links",
745            profile,
746            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
747        );
748
749        let images = profile_section!(
750            "Images",
751            profile,
752            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
753        );
754
755        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
756
757        // Build O(1) lookup map for reference definitions by lowercase ID
758        let reference_defs_map: HashMap<String, usize> = reference_defs
759            .iter()
760            .enumerate()
761            .map(|(idx, def)| (def.id.to_lowercase(), idx))
762            .collect();
763
764        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
765
766        // Compute character frequency for fast content analysis
767        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
768
769        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
770        let table_blocks = profile_section!(
771            "Table blocks",
772            profile,
773            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
774                content,
775                &code_blocks,
776                &code_spans,
777                &html_comment_ranges,
778            )
779        );
780
781        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
782        let line_index = profile_section!(
783            "Line index",
784            profile,
785            crate::utils::range_utils::LineIndex::new(content)
786        );
787
788        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
789        let jinja_ranges = profile_section!(
790            "Jinja ranges",
791            profile,
792            crate::utils::jinja_utils::find_jinja_ranges(content)
793        );
794
795        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
796        let citation_ranges = profile_section!("Citation ranges", profile, {
797            if flavor == MarkdownFlavor::Quarto {
798                crate::utils::quarto_divs::find_citation_ranges(content)
799            } else {
800                Vec::new()
801            }
802        });
803
804        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
805        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
806            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
807            let mut ranges = Vec::new();
808            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
809                ranges.push((mat.start(), mat.end()));
810            }
811            ranges
812        });
813
814        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
815
816        Self {
817            content,
818            line_offsets,
819            code_blocks,
820            lines,
821            links,
822            images,
823            broken_links,
824            footnote_refs,
825            reference_defs,
826            reference_defs_map,
827            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
828            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
829            list_blocks,
830            char_frequency,
831            html_tags_cache: OnceLock::new(),
832            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
833            table_rows_cache: OnceLock::new(),
834            bare_urls_cache: OnceLock::new(),
835            has_mixed_list_nesting_cache: OnceLock::new(),
836            html_comment_ranges,
837            table_blocks,
838            line_index,
839            jinja_ranges,
840            flavor,
841            source_file,
842            jsx_expression_ranges,
843            mdx_comment_ranges,
844            citation_ranges,
845            shortcode_ranges,
846            inline_config,
847        }
848    }
849
850    /// Check if a rule is disabled at a specific line number (1-indexed)
851    ///
852    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
853    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
854    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
855        self.inline_config.is_rule_disabled(rule_name, line_number)
856    }
857
858    /// Get code spans - computed lazily on first access
859    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
860        Arc::clone(
861            self.code_spans_cache
862                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
863        )
864    }
865
866    /// Get math spans - computed lazily on first access
867    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
868        Arc::clone(
869            self.math_spans_cache
870                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
871        )
872    }
873
874    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
875    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
876        let math_spans = self.math_spans();
877        math_spans
878            .iter()
879            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
880    }
881
882    /// Get HTML comment ranges - pre-computed during LintContext construction
883    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
884        &self.html_comment_ranges
885    }
886
887    /// Get HTML tags - computed lazily on first access
888    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
889        Arc::clone(self.html_tags_cache.get_or_init(|| {
890            Arc::new(Self::parse_html_tags(
891                self.content,
892                &self.lines,
893                &self.code_blocks,
894                self.flavor,
895            ))
896        }))
897    }
898
899    /// Get emphasis spans - pre-computed during construction
900    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
901        Arc::clone(
902            self.emphasis_spans_cache
903                .get()
904                .expect("emphasis_spans_cache initialized during construction"),
905        )
906    }
907
908    /// Get table rows - computed lazily on first access
909    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
910        Arc::clone(
911            self.table_rows_cache
912                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
913        )
914    }
915
916    /// Get bare URLs - computed lazily on first access
917    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
918        Arc::clone(
919            self.bare_urls_cache
920                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
921        )
922    }
923
924    /// Check if document has mixed ordered/unordered list nesting.
925    /// Result is cached after first computation (document-level invariant).
926    /// This is used by MD007 for smart style auto-detection.
927    pub fn has_mixed_list_nesting(&self) -> bool {
928        *self
929            .has_mixed_list_nesting_cache
930            .get_or_init(|| self.compute_mixed_list_nesting())
931    }
932
933    /// Internal computation for mixed list nesting (only called once per LintContext).
934    fn compute_mixed_list_nesting(&self) -> bool {
935        // Track parent list items by their marker position and type
936        // Using marker_column instead of indent because it works correctly
937        // for blockquoted content where indent doesn't account for the prefix
938        // Stack stores: (marker_column, is_ordered)
939        let mut stack: Vec<(usize, bool)> = Vec::new();
940        let mut last_was_blank = false;
941
942        for line_info in &self.lines {
943            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
944            if line_info.in_code_block
945                || line_info.in_front_matter
946                || line_info.in_mkdocstrings
947                || line_info.in_html_comment
948                || line_info.in_esm_block
949            {
950                continue;
951            }
952
953            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
954            if line_info.is_blank {
955                last_was_blank = true;
956                continue;
957            }
958
959            if let Some(list_item) = &line_info.list_item {
960                // Normalize column 1 to column 0 (consistent with MD007 check function)
961                let current_pos = if list_item.marker_column == 1 {
962                    0
963                } else {
964                    list_item.marker_column
965                };
966
967                // If there was a blank line and this item is at root level, reset stack
968                if last_was_blank && current_pos == 0 {
969                    stack.clear();
970                }
971                last_was_blank = false;
972
973                // Pop items at same or greater position (they're siblings or deeper, not parents)
974                while let Some(&(pos, _)) = stack.last() {
975                    if pos >= current_pos {
976                        stack.pop();
977                    } else {
978                        break;
979                    }
980                }
981
982                // Check if immediate parent has different type - this is mixed nesting
983                if let Some(&(_, parent_is_ordered)) = stack.last()
984                    && parent_is_ordered != list_item.is_ordered
985                {
986                    return true; // Found mixed nesting - early exit
987                }
988
989                stack.push((current_pos, list_item.is_ordered));
990            } else {
991                // Non-list line (but not blank) - could be paragraph or other content
992                last_was_blank = false;
993            }
994        }
995
996        false
997    }
998
999    /// Map a byte offset to (line, column)
1000    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1001        match self.line_offsets.binary_search(&offset) {
1002            Ok(line) => (line + 1, 1),
1003            Err(line) => {
1004                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1005                (line, offset - line_start + 1)
1006            }
1007        }
1008    }
1009
1010    /// Check if a position is within a code block or code span
1011    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1012        // Check code blocks first
1013        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1014            return true;
1015        }
1016
1017        // Check inline code spans (lazy load if needed)
1018        self.code_spans()
1019            .iter()
1020            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1021    }
1022
1023    /// Get line information by line number (1-indexed)
1024    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1025        if line_num > 0 {
1026            self.lines.get(line_num - 1)
1027        } else {
1028            None
1029        }
1030    }
1031
1032    /// Get byte offset for a line number (1-indexed)
1033    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1034        self.line_info(line_num).map(|info| info.byte_offset)
1035    }
1036
1037    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1038    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1039        let normalized_id = ref_id.to_lowercase();
1040        self.reference_defs_map
1041            .get(&normalized_id)
1042            .map(|&idx| self.reference_defs[idx].url.as_str())
1043    }
1044
1045    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1046    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1047        let normalized_id = ref_id.to_lowercase();
1048        self.reference_defs_map
1049            .get(&normalized_id)
1050            .map(|&idx| &self.reference_defs[idx])
1051    }
1052
1053    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1054    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1055        let normalized_id = ref_id.to_lowercase();
1056        self.reference_defs_map.contains_key(&normalized_id)
1057    }
1058
1059    /// Check if a line is part of a list block
1060    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1061        self.list_blocks
1062            .iter()
1063            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1064    }
1065
1066    /// Get the list block containing a specific line
1067    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1068        self.list_blocks
1069            .iter()
1070            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1071    }
1072
1073    // Compatibility methods for DocumentStructure migration
1074
1075    /// Check if a line is within a code block
1076    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1077        if line_num == 0 || line_num > self.lines.len() {
1078            return false;
1079        }
1080        self.lines[line_num - 1].in_code_block
1081    }
1082
1083    /// Check if a line is within front matter
1084    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1085        if line_num == 0 || line_num > self.lines.len() {
1086            return false;
1087        }
1088        self.lines[line_num - 1].in_front_matter
1089    }
1090
1091    /// Check if a line is within an HTML block
1092    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1093        if line_num == 0 || line_num > self.lines.len() {
1094            return false;
1095        }
1096        self.lines[line_num - 1].in_html_block
1097    }
1098
1099    /// Check if a line and column is within a code span
1100    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1101        if line_num == 0 || line_num > self.lines.len() {
1102            return false;
1103        }
1104
1105        // Use the code spans cache to check
1106        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1107        // Convert col to 0-indexed for comparison
1108        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1109        let code_spans = self.code_spans();
1110        code_spans.iter().any(|span| {
1111            // Check if line is within the span's line range
1112            if line_num < span.line || line_num > span.end_line {
1113                return false;
1114            }
1115
1116            if span.line == span.end_line {
1117                // Single-line span: check column bounds
1118                col_0indexed >= span.start_col && col_0indexed < span.end_col
1119            } else if line_num == span.line {
1120                // First line of multi-line span: anything after start_col is in span
1121                col_0indexed >= span.start_col
1122            } else if line_num == span.end_line {
1123                // Last line of multi-line span: anything before end_col is in span
1124                col_0indexed < span.end_col
1125            } else {
1126                // Middle line of multi-line span: entire line is in span
1127                true
1128            }
1129        })
1130    }
1131
1132    /// Check if a byte offset is within a code span
1133    #[inline]
1134    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1135        let code_spans = self.code_spans();
1136        code_spans
1137            .iter()
1138            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1139    }
1140
1141    /// Check if a byte position is within a reference definition
1142    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1143    #[inline]
1144    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1145        self.reference_defs
1146            .iter()
1147            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1148    }
1149
1150    /// Check if a byte position is within an HTML comment
1151    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1152    /// where k is the number of HTML comments (typically very small)
1153    #[inline]
1154    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1155        self.html_comment_ranges
1156            .iter()
1157            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1158    }
1159
1160    /// Check if a byte position is within an HTML tag (including multiline tags)
1161    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1162    #[inline]
1163    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1164        self.html_tags()
1165            .iter()
1166            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1167    }
1168
1169    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1170    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1171        self.jinja_ranges
1172            .iter()
1173            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1174    }
1175
1176    /// Check if a byte position is within a JSX expression (MDX: {expression})
1177    #[inline]
1178    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1179        self.jsx_expression_ranges
1180            .iter()
1181            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1182    }
1183
1184    /// Check if a byte position is within an MDX comment ({/* ... */})
1185    #[inline]
1186    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1187        self.mdx_comment_ranges
1188            .iter()
1189            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1190    }
1191
1192    /// Get all JSX expression byte ranges
1193    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1194        &self.jsx_expression_ranges
1195    }
1196
1197    /// Get all MDX comment byte ranges
1198    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1199        &self.mdx_comment_ranges
1200    }
1201
1202    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1203    /// Only active in Quarto flavor
1204    #[inline]
1205    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1206        self.citation_ranges
1207            .iter()
1208            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1209    }
1210
1211    /// Get all citation byte ranges (Quarto flavor only)
1212    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1213        &self.citation_ranges
1214    }
1215
1216    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1217    #[inline]
1218    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1219        self.shortcode_ranges
1220            .iter()
1221            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1222    }
1223
1224    /// Get all shortcode byte ranges
1225    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1226        &self.shortcode_ranges
1227    }
1228
1229    /// Check if a byte position is within a link reference definition title
1230    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1231        self.reference_defs.iter().any(|def| {
1232            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1233                byte_pos >= start && byte_pos < end
1234            } else {
1235                false
1236            }
1237        })
1238    }
1239
1240    /// Check if content has any instances of a specific character (fast)
1241    pub fn has_char(&self, ch: char) -> bool {
1242        match ch {
1243            '#' => self.char_frequency.hash_count > 0,
1244            '*' => self.char_frequency.asterisk_count > 0,
1245            '_' => self.char_frequency.underscore_count > 0,
1246            '-' => self.char_frequency.hyphen_count > 0,
1247            '+' => self.char_frequency.plus_count > 0,
1248            '>' => self.char_frequency.gt_count > 0,
1249            '|' => self.char_frequency.pipe_count > 0,
1250            '[' => self.char_frequency.bracket_count > 0,
1251            '`' => self.char_frequency.backtick_count > 0,
1252            '<' => self.char_frequency.lt_count > 0,
1253            '!' => self.char_frequency.exclamation_count > 0,
1254            '\n' => self.char_frequency.newline_count > 0,
1255            _ => self.content.contains(ch), // Fallback for other characters
1256        }
1257    }
1258
1259    /// Get count of a specific character (fast)
1260    pub fn char_count(&self, ch: char) -> usize {
1261        match ch {
1262            '#' => self.char_frequency.hash_count,
1263            '*' => self.char_frequency.asterisk_count,
1264            '_' => self.char_frequency.underscore_count,
1265            '-' => self.char_frequency.hyphen_count,
1266            '+' => self.char_frequency.plus_count,
1267            '>' => self.char_frequency.gt_count,
1268            '|' => self.char_frequency.pipe_count,
1269            '[' => self.char_frequency.bracket_count,
1270            '`' => self.char_frequency.backtick_count,
1271            '<' => self.char_frequency.lt_count,
1272            '!' => self.char_frequency.exclamation_count,
1273            '\n' => self.char_frequency.newline_count,
1274            _ => self.content.matches(ch).count(), // Fallback for other characters
1275        }
1276    }
1277
1278    /// Check if content likely contains headings (fast)
1279    pub fn likely_has_headings(&self) -> bool {
1280        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1281    }
1282
1283    /// Check if content likely contains lists (fast)
1284    pub fn likely_has_lists(&self) -> bool {
1285        self.char_frequency.asterisk_count > 0
1286            || self.char_frequency.hyphen_count > 0
1287            || self.char_frequency.plus_count > 0
1288    }
1289
1290    /// Check if content likely contains emphasis (fast)
1291    pub fn likely_has_emphasis(&self) -> bool {
1292        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1293    }
1294
1295    /// Check if content likely contains tables (fast)
1296    pub fn likely_has_tables(&self) -> bool {
1297        self.char_frequency.pipe_count > 2
1298    }
1299
1300    /// Check if content likely contains blockquotes (fast)
1301    pub fn likely_has_blockquotes(&self) -> bool {
1302        self.char_frequency.gt_count > 0
1303    }
1304
1305    /// Check if content likely contains code (fast)
1306    pub fn likely_has_code(&self) -> bool {
1307        self.char_frequency.backtick_count > 0
1308    }
1309
1310    /// Check if content likely contains links or images (fast)
1311    pub fn likely_has_links_or_images(&self) -> bool {
1312        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1313    }
1314
1315    /// Check if content likely contains HTML (fast)
1316    pub fn likely_has_html(&self) -> bool {
1317        self.char_frequency.lt_count > 0
1318    }
1319
1320    /// Get the blockquote prefix for inserting a blank line at the given line index.
1321    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1322    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1323    /// Returns an empty string if the line is not inside a blockquote.
1324    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1325        if let Some(line_info) = self.lines.get(line_idx)
1326            && let Some(ref bq) = line_info.blockquote
1327        {
1328            bq.prefix.trim_end().to_string()
1329        } else {
1330            String::new()
1331        }
1332    }
1333
1334    /// Get HTML tags on a specific line
1335    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1336        self.html_tags()
1337            .iter()
1338            .filter(|tag| tag.line == line_num)
1339            .cloned()
1340            .collect()
1341    }
1342
1343    /// Get emphasis spans on a specific line
1344    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1345        self.emphasis_spans()
1346            .iter()
1347            .filter(|span| span.line == line_num)
1348            .cloned()
1349            .collect()
1350    }
1351
1352    /// Get table rows on a specific line
1353    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1354        self.table_rows()
1355            .iter()
1356            .filter(|row| row.line == line_num)
1357            .cloned()
1358            .collect()
1359    }
1360
1361    /// Get bare URLs on a specific line
1362    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1363        self.bare_urls()
1364            .iter()
1365            .filter(|url| url.line == line_num)
1366            .cloned()
1367            .collect()
1368    }
1369
1370    /// Find the line index for a given byte offset using binary search.
1371    /// Returns (line_index, line_number, column) where:
1372    /// - line_index is the 0-based index in the lines array
1373    /// - line_number is the 1-based line number
1374    /// - column is the byte offset within that line
1375    #[inline]
1376    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1377        // Binary search to find the line containing this byte offset
1378        let idx = match lines.binary_search_by(|line| {
1379            if byte_offset < line.byte_offset {
1380                std::cmp::Ordering::Greater
1381            } else if byte_offset > line.byte_offset + line.byte_len {
1382                std::cmp::Ordering::Less
1383            } else {
1384                std::cmp::Ordering::Equal
1385            }
1386        }) {
1387            Ok(idx) => idx,
1388            Err(idx) => idx.saturating_sub(1),
1389        };
1390
1391        let line = &lines[idx];
1392        let line_num = idx + 1;
1393        let col = byte_offset.saturating_sub(line.byte_offset);
1394
1395        (idx, line_num, col)
1396    }
1397
1398    /// Check if a byte offset is within a code span using binary search
1399    #[inline]
1400    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1401        // Since spans are sorted by byte_offset, use partition_point for binary search
1402        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1403
1404        // Check the span that starts at or before our offset
1405        if idx > 0 {
1406            let span = &code_spans[idx - 1];
1407            if offset >= span.byte_offset && offset < span.byte_end {
1408                return true;
1409            }
1410        }
1411
1412        false
1413    }
1414
1415    /// Collect byte ranges of all links using pulldown-cmark
1416    /// This is used to skip heading detection for lines that fall within link syntax
1417    /// (e.g., multiline links like `[text](url\n#fragment)`)
1418    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1419        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1420
1421        let mut link_ranges = Vec::new();
1422        let mut options = Options::empty();
1423        options.insert(Options::ENABLE_WIKILINKS);
1424        options.insert(Options::ENABLE_FOOTNOTES);
1425
1426        let parser = Parser::new_ext(content, options).into_offset_iter();
1427        let mut link_stack: Vec<usize> = Vec::new();
1428
1429        for (event, range) in parser {
1430            match event {
1431                Event::Start(Tag::Link { .. }) => {
1432                    link_stack.push(range.start);
1433                }
1434                Event::End(TagEnd::Link) => {
1435                    if let Some(start_pos) = link_stack.pop() {
1436                        link_ranges.push((start_pos, range.end));
1437                    }
1438                }
1439                _ => {}
1440            }
1441        }
1442
1443        link_ranges
1444    }
1445
1446    /// Parse all links in the content
1447    fn parse_links(
1448        content: &'a str,
1449        lines: &[LineInfo],
1450        code_blocks: &[(usize, usize)],
1451        code_spans: &[CodeSpan],
1452        flavor: MarkdownFlavor,
1453        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1454    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1455        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1456        use std::collections::HashSet;
1457
1458        let mut links = Vec::with_capacity(content.len() / 500);
1459        let mut broken_links = Vec::new();
1460        let mut footnote_refs = Vec::new();
1461
1462        // Track byte positions of links found by pulldown-cmark
1463        let mut found_positions = HashSet::new();
1464
1465        // Use pulldown-cmark's streaming parser with BrokenLink callback
1466        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1467        // This automatically handles:
1468        // - Escaped links (won't generate events)
1469        // - Links in code blocks/spans (won't generate Link events)
1470        // - Images (generates Tag::Image instead)
1471        // - Reference resolution (dest_url is already resolved!)
1472        // - Broken references (callback is invoked)
1473        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1474        let mut options = Options::empty();
1475        options.insert(Options::ENABLE_WIKILINKS);
1476        options.insert(Options::ENABLE_FOOTNOTES);
1477
1478        let parser = Parser::new_with_broken_link_callback(
1479            content,
1480            options,
1481            Some(|link: BrokenLink<'_>| {
1482                broken_links.push(BrokenLinkInfo {
1483                    reference: link.reference.to_string(),
1484                    span: link.span.clone(),
1485                });
1486                None
1487            }),
1488        )
1489        .into_offset_iter();
1490
1491        let mut link_stack: Vec<(
1492            usize,
1493            usize,
1494            pulldown_cmark::CowStr<'a>,
1495            LinkType,
1496            pulldown_cmark::CowStr<'a>,
1497        )> = Vec::new();
1498        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1499
1500        for (event, range) in parser {
1501            match event {
1502                Event::Start(Tag::Link {
1503                    link_type,
1504                    dest_url,
1505                    id,
1506                    ..
1507                }) => {
1508                    // Link start - record position, URL, and reference ID
1509                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1510                    text_chunks.clear();
1511                }
1512                Event::Text(text) if !link_stack.is_empty() => {
1513                    // Track text content with its byte range
1514                    text_chunks.push((text.to_string(), range.start, range.end));
1515                }
1516                Event::Code(code) if !link_stack.is_empty() => {
1517                    // Include inline code in link text (with backticks)
1518                    let code_text = format!("`{code}`");
1519                    text_chunks.push((code_text, range.start, range.end));
1520                }
1521                Event::End(TagEnd::Link) => {
1522                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1523                        // Skip if in HTML comment
1524                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1525                            text_chunks.clear();
1526                            continue;
1527                        }
1528
1529                        // Find line and column information
1530                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1531
1532                        // Skip if this link is on a MkDocs snippet line
1533                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1534                            text_chunks.clear();
1535                            continue;
1536                        }
1537
1538                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1539
1540                        let is_reference = matches!(
1541                            link_type,
1542                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1543                        );
1544
1545                        // Extract link text directly from source bytes to preserve escaping
1546                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1547                        let link_text = if start_pos < content.len() {
1548                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1549
1550                            // Find MATCHING ] by tracking bracket depth for nested brackets
1551                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1552                            // Brackets inside code spans (between backticks) should be ignored
1553                            let mut close_pos = None;
1554                            let mut depth = 0;
1555                            let mut in_code_span = false;
1556
1557                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1558                                // Count preceding backslashes
1559                                let mut backslash_count = 0;
1560                                let mut j = i;
1561                                while j > 0 && link_bytes[j - 1] == b'\\' {
1562                                    backslash_count += 1;
1563                                    j -= 1;
1564                                }
1565                                let is_escaped = backslash_count % 2 != 0;
1566
1567                                // Track code spans - backticks toggle in/out of code
1568                                if byte == b'`' && !is_escaped {
1569                                    in_code_span = !in_code_span;
1570                                }
1571
1572                                // Only count brackets when NOT in a code span
1573                                if !is_escaped && !in_code_span {
1574                                    if byte == b'[' {
1575                                        depth += 1;
1576                                    } else if byte == b']' {
1577                                        if depth == 0 {
1578                                            // Found the matching closing bracket
1579                                            close_pos = Some(i);
1580                                            break;
1581                                        } else {
1582                                            depth -= 1;
1583                                        }
1584                                    }
1585                                }
1586                            }
1587
1588                            if let Some(pos) = close_pos {
1589                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1590                            } else {
1591                                Cow::Borrowed("")
1592                            }
1593                        } else {
1594                            Cow::Borrowed("")
1595                        };
1596
1597                        // For reference links, use the actual reference ID from pulldown-cmark
1598                        let reference_id = if is_reference && !ref_id.is_empty() {
1599                            Some(Cow::Owned(ref_id.to_lowercase()))
1600                        } else if is_reference {
1601                            // For collapsed/shortcut references without explicit ID, use the link text
1602                            Some(Cow::Owned(link_text.to_lowercase()))
1603                        } else {
1604                            None
1605                        };
1606
1607                        // Track this position as found
1608                        found_positions.insert(start_pos);
1609
1610                        links.push(ParsedLink {
1611                            line: line_num,
1612                            start_col: col_start,
1613                            end_col: col_end,
1614                            byte_offset: start_pos,
1615                            byte_end: range.end,
1616                            text: link_text,
1617                            url: Cow::Owned(url.to_string()),
1618                            is_reference,
1619                            reference_id,
1620                            link_type,
1621                        });
1622
1623                        text_chunks.clear();
1624                    }
1625                }
1626                Event::FootnoteReference(footnote_id) => {
1627                    // Capture footnote references like [^1], [^note]
1628                    // Skip if in HTML comment
1629                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1630                        continue;
1631                    }
1632
1633                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1634                    footnote_refs.push(FootnoteRef {
1635                        id: footnote_id.to_string(),
1636                        line: line_num,
1637                        byte_offset: range.start,
1638                        byte_end: range.end,
1639                    });
1640                }
1641                _ => {}
1642            }
1643        }
1644
1645        // Also find undefined references using regex
1646        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1647        // because the reference is undefined
1648        for cap in LINK_PATTERN.captures_iter(content) {
1649            let full_match = cap.get(0).unwrap();
1650            let match_start = full_match.start();
1651            let match_end = full_match.end();
1652
1653            // Skip if this was already found by pulldown-cmark (it's a valid link)
1654            if found_positions.contains(&match_start) {
1655                continue;
1656            }
1657
1658            // Skip if escaped
1659            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1660                continue;
1661            }
1662
1663            // Skip if it's an image
1664            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1665                continue;
1666            }
1667
1668            // Skip if in code block
1669            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1670                continue;
1671            }
1672
1673            // Skip if in code span
1674            if Self::is_offset_in_code_span(code_spans, match_start) {
1675                continue;
1676            }
1677
1678            // Skip if in HTML comment
1679            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1680                continue;
1681            }
1682
1683            // Find line and column information
1684            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1685
1686            // Skip if this link is on a MkDocs snippet line
1687            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1688                continue;
1689            }
1690
1691            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1692
1693            let text = cap.get(1).map_or("", |m| m.as_str());
1694
1695            // Only process reference links (group 6)
1696            if let Some(ref_id) = cap.get(6) {
1697                let ref_id_str = ref_id.as_str();
1698                let normalized_ref = if ref_id_str.is_empty() {
1699                    Cow::Owned(text.to_lowercase()) // Implicit reference
1700                } else {
1701                    Cow::Owned(ref_id_str.to_lowercase())
1702                };
1703
1704                // This is an undefined reference (pulldown-cmark didn't parse it)
1705                links.push(ParsedLink {
1706                    line: line_num,
1707                    start_col: col_start,
1708                    end_col: col_end,
1709                    byte_offset: match_start,
1710                    byte_end: match_end,
1711                    text: Cow::Borrowed(text),
1712                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1713                    is_reference: true,
1714                    reference_id: Some(normalized_ref),
1715                    link_type: LinkType::Reference, // Undefined references are reference-style
1716                });
1717            }
1718        }
1719
1720        (links, broken_links, footnote_refs)
1721    }
1722
1723    /// Parse all images in the content
1724    fn parse_images(
1725        content: &'a str,
1726        lines: &[LineInfo],
1727        code_blocks: &[(usize, usize)],
1728        code_spans: &[CodeSpan],
1729        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1730    ) -> Vec<ParsedImage<'a>> {
1731        use crate::utils::skip_context::is_in_html_comment_ranges;
1732        use std::collections::HashSet;
1733
1734        // Pre-size based on a heuristic: images are less common than links
1735        let mut images = Vec::with_capacity(content.len() / 1000);
1736        let mut found_positions = HashSet::new();
1737
1738        // Use pulldown-cmark for parsing - more accurate and faster
1739        let parser = Parser::new(content).into_offset_iter();
1740        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1741            Vec::new();
1742        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1743
1744        for (event, range) in parser {
1745            match event {
1746                Event::Start(Tag::Image {
1747                    link_type,
1748                    dest_url,
1749                    id,
1750                    ..
1751                }) => {
1752                    image_stack.push((range.start, dest_url, link_type, id));
1753                    text_chunks.clear();
1754                }
1755                Event::Text(text) if !image_stack.is_empty() => {
1756                    text_chunks.push((text.to_string(), range.start, range.end));
1757                }
1758                Event::Code(code) if !image_stack.is_empty() => {
1759                    let code_text = format!("`{code}`");
1760                    text_chunks.push((code_text, range.start, range.end));
1761                }
1762                Event::End(TagEnd::Image) => {
1763                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1764                        // Skip if in code block
1765                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1766                            continue;
1767                        }
1768
1769                        // Skip if in code span
1770                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1771                            continue;
1772                        }
1773
1774                        // Skip if in HTML comment
1775                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1776                            continue;
1777                        }
1778
1779                        // Find line and column using binary search
1780                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1781                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1782
1783                        let is_reference = matches!(
1784                            link_type,
1785                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1786                        );
1787
1788                        // Extract alt text directly from source bytes to preserve escaping
1789                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1790                        let alt_text = if start_pos < content.len() {
1791                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1792
1793                            // Find MATCHING ] by tracking bracket depth for nested brackets
1794                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1795                            let mut close_pos = None;
1796                            let mut depth = 0;
1797
1798                            if image_bytes.len() > 2 {
1799                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1800                                    // Count preceding backslashes
1801                                    let mut backslash_count = 0;
1802                                    let mut j = i;
1803                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1804                                        backslash_count += 1;
1805                                        j -= 1;
1806                                    }
1807                                    let is_escaped = backslash_count % 2 != 0;
1808
1809                                    if !is_escaped {
1810                                        if byte == b'[' {
1811                                            depth += 1;
1812                                        } else if byte == b']' {
1813                                            if depth == 0 {
1814                                                // Found the matching closing bracket
1815                                                close_pos = Some(i);
1816                                                break;
1817                                            } else {
1818                                                depth -= 1;
1819                                            }
1820                                        }
1821                                    }
1822                                }
1823                            }
1824
1825                            if let Some(pos) = close_pos {
1826                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1827                            } else {
1828                                Cow::Borrowed("")
1829                            }
1830                        } else {
1831                            Cow::Borrowed("")
1832                        };
1833
1834                        let reference_id = if is_reference && !ref_id.is_empty() {
1835                            Some(Cow::Owned(ref_id.to_lowercase()))
1836                        } else if is_reference {
1837                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1838                        } else {
1839                            None
1840                        };
1841
1842                        found_positions.insert(start_pos);
1843                        images.push(ParsedImage {
1844                            line: line_num,
1845                            start_col: col_start,
1846                            end_col: col_end,
1847                            byte_offset: start_pos,
1848                            byte_end: range.end,
1849                            alt_text,
1850                            url: Cow::Owned(url.to_string()),
1851                            is_reference,
1852                            reference_id,
1853                            link_type,
1854                        });
1855                    }
1856                }
1857                _ => {}
1858            }
1859        }
1860
1861        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1862        for cap in IMAGE_PATTERN.captures_iter(content) {
1863            let full_match = cap.get(0).unwrap();
1864            let match_start = full_match.start();
1865            let match_end = full_match.end();
1866
1867            // Skip if already found by pulldown-cmark
1868            if found_positions.contains(&match_start) {
1869                continue;
1870            }
1871
1872            // Skip if the ! is escaped
1873            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1874                continue;
1875            }
1876
1877            // Skip if in code block, code span, or HTML comment
1878            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1879                || Self::is_offset_in_code_span(code_spans, match_start)
1880                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1881            {
1882                continue;
1883            }
1884
1885            // Only process reference images (undefined references not found by pulldown-cmark)
1886            if let Some(ref_id) = cap.get(6) {
1887                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1888                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1889                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1890                let ref_id_str = ref_id.as_str();
1891                let normalized_ref = if ref_id_str.is_empty() {
1892                    Cow::Owned(alt_text.to_lowercase())
1893                } else {
1894                    Cow::Owned(ref_id_str.to_lowercase())
1895                };
1896
1897                images.push(ParsedImage {
1898                    line: line_num,
1899                    start_col: col_start,
1900                    end_col: col_end,
1901                    byte_offset: match_start,
1902                    byte_end: match_end,
1903                    alt_text: Cow::Borrowed(alt_text),
1904                    url: Cow::Borrowed(""),
1905                    is_reference: true,
1906                    reference_id: Some(normalized_ref),
1907                    link_type: LinkType::Reference, // Undefined references are reference-style
1908                });
1909            }
1910        }
1911
1912        images
1913    }
1914
1915    /// Parse reference definitions
1916    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1917        // Pre-size based on lines count as reference definitions are line-based
1918        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1919
1920        for (line_idx, line_info) in lines.iter().enumerate() {
1921            // Skip lines in code blocks
1922            if line_info.in_code_block {
1923                continue;
1924            }
1925
1926            let line = line_info.content(content);
1927            let line_num = line_idx + 1;
1928
1929            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1930                let id_raw = cap.get(1).unwrap().as_str();
1931
1932                // Skip footnote definitions - they use [^id]: syntax and are semantically
1933                // different from reference link definitions
1934                if id_raw.starts_with('^') {
1935                    continue;
1936                }
1937
1938                let id = id_raw.to_lowercase();
1939                let url = cap.get(2).unwrap().as_str().to_string();
1940                let title_match = cap.get(3).or_else(|| cap.get(4));
1941                let title = title_match.map(|m| m.as_str().to_string());
1942
1943                // Calculate byte positions
1944                // The match starts at the beginning of the line (0) and extends to the end
1945                let match_obj = cap.get(0).unwrap();
1946                let byte_offset = line_info.byte_offset + match_obj.start();
1947                let byte_end = line_info.byte_offset + match_obj.end();
1948
1949                // Calculate title byte positions (includes the quote character before content)
1950                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1951                    // The match is the content inside quotes, so we include the quote before
1952                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1953                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1954                    (Some(start), Some(end))
1955                } else {
1956                    (None, None)
1957                };
1958
1959                refs.push(ReferenceDef {
1960                    line: line_num,
1961                    id,
1962                    url,
1963                    title,
1964                    byte_offset,
1965                    byte_end,
1966                    title_byte_start,
1967                    title_byte_end,
1968                });
1969            }
1970        }
1971
1972        refs
1973    }
1974
1975    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1976    /// Handles nested blockquotes like `> > > content`
1977    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1978    #[inline]
1979    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1980        let trimmed_start = line.trim_start();
1981        if !trimmed_start.starts_with('>') {
1982            return None;
1983        }
1984
1985        // Track total prefix length to handle nested blockquotes
1986        let mut remaining = line;
1987        let mut total_prefix_len = 0;
1988
1989        loop {
1990            let trimmed = remaining.trim_start();
1991            if !trimmed.starts_with('>') {
1992                break;
1993            }
1994
1995            // Add leading whitespace + '>' to prefix
1996            let leading_ws_len = remaining.len() - trimmed.len();
1997            total_prefix_len += leading_ws_len + 1;
1998
1999            let after_gt = &trimmed[1..];
2000
2001            // Handle optional whitespace after '>' (space or tab)
2002            if let Some(stripped) = after_gt.strip_prefix(' ') {
2003                total_prefix_len += 1;
2004                remaining = stripped;
2005            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2006                total_prefix_len += 1;
2007                remaining = stripped;
2008            } else {
2009                remaining = after_gt;
2010            }
2011        }
2012
2013        Some((&line[..total_prefix_len], remaining))
2014    }
2015
2016    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2017    ///
2018    /// Returns a HashMap keyed by line byte offset, containing:
2019    /// `(is_ordered, marker, marker_column, content_column, number)`
2020    ///
2021    /// ## Why pulldown-cmark?
2022    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2023    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2024    /// This fixes issue #253 where continuation lines were falsely detected.
2025    ///
2026    /// ## Tab indentation quirk
2027    /// Pulldown-cmark reports nested list items at the newline character position
2028    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2029    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2030    /// We detect this and advance to the correct line.
2031    ///
2032    /// ## HashMap key strategy
2033    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2034    /// that resolve to the same line (after newline adjustment). The first event
2035    /// for each line is authoritative.
2036    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2037    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2038    /// This avoids a separate parse for emphasis detection.
2039    fn detect_list_items_and_emphasis_with_pulldown(
2040        content: &str,
2041        line_offsets: &[usize],
2042        flavor: MarkdownFlavor,
2043        front_matter_end: usize,
2044        code_blocks: &[(usize, usize)],
2045    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2046        use std::collections::HashMap;
2047
2048        let mut list_items = HashMap::new();
2049        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2050
2051        let mut options = Options::empty();
2052        options.insert(Options::ENABLE_TABLES);
2053        options.insert(Options::ENABLE_FOOTNOTES);
2054        options.insert(Options::ENABLE_STRIKETHROUGH);
2055        options.insert(Options::ENABLE_TASKLISTS);
2056        // Always enable GFM features for consistency with existing behavior
2057        options.insert(Options::ENABLE_GFM);
2058
2059        // Suppress unused variable warning
2060        let _ = flavor;
2061
2062        let parser = Parser::new_ext(content, options).into_offset_iter();
2063        let mut list_depth: usize = 0;
2064        let mut list_stack: Vec<bool> = Vec::new();
2065
2066        for (event, range) in parser {
2067            match event {
2068                // Capture emphasis spans (for MD030's emphasis detection)
2069                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2070                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2071                        2
2072                    } else {
2073                        1
2074                    };
2075                    let match_start = range.start;
2076                    let match_end = range.end;
2077
2078                    // Skip if in code block
2079                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2080                        // Determine marker character by looking at the content at the start
2081                        let marker = content[match_start..].chars().next().unwrap_or('*');
2082                        if marker == '*' || marker == '_' {
2083                            // Extract content between markers
2084                            let content_start = match_start + marker_count;
2085                            let content_end = if match_end >= marker_count {
2086                                match_end - marker_count
2087                            } else {
2088                                match_end
2089                            };
2090                            let content_part = if content_start < content_end && content_end <= content.len() {
2091                                &content[content_start..content_end]
2092                            } else {
2093                                ""
2094                            };
2095
2096                            // Find which line this emphasis is on using line_offsets
2097                            let line_idx = match line_offsets.binary_search(&match_start) {
2098                                Ok(idx) => idx,
2099                                Err(idx) => idx.saturating_sub(1),
2100                            };
2101                            let line_num = line_idx + 1;
2102                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2103                            let col_start = match_start - line_start;
2104                            let col_end = match_end - line_start;
2105
2106                            emphasis_spans.push(EmphasisSpan {
2107                                line: line_num,
2108                                start_col: col_start,
2109                                end_col: col_end,
2110                                byte_offset: match_start,
2111                                byte_end: match_end,
2112                                marker,
2113                                marker_count,
2114                                content: content_part.to_string(),
2115                            });
2116                        }
2117                    }
2118                }
2119                Event::Start(Tag::List(start_number)) => {
2120                    list_depth += 1;
2121                    list_stack.push(start_number.is_some());
2122                }
2123                Event::End(TagEnd::List(_)) => {
2124                    list_depth = list_depth.saturating_sub(1);
2125                    list_stack.pop();
2126                }
2127                Event::Start(Tag::Item) if list_depth > 0 => {
2128                    // Get the ordered state for the CURRENT (innermost) list
2129                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2130                    // Find which line this byte offset corresponds to
2131                    let item_start = range.start;
2132
2133                    // Binary search to find the line number
2134                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2135                        Ok(idx) => idx,
2136                        Err(idx) => idx.saturating_sub(1),
2137                    };
2138
2139                    // Pulldown-cmark reports nested list items at the newline before the item
2140                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2141                    // Advance to the actual content line in this case.
2142                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2143                        line_idx += 1;
2144                    }
2145
2146                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2147                    if front_matter_end > 0 && line_idx < front_matter_end {
2148                        continue;
2149                    }
2150
2151                    if line_idx < line_offsets.len() {
2152                        let line_start_byte = line_offsets[line_idx];
2153                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2154                        let line = &content[line_start_byte..line_end.min(content.len())];
2155
2156                        // Strip trailing newline
2157                        let line = line
2158                            .strip_suffix('\n')
2159                            .or_else(|| line.strip_suffix("\r\n"))
2160                            .unwrap_or(line);
2161
2162                        // Strip blockquote prefix if present
2163                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2164                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2165                            (prefix.len(), content)
2166                        } else {
2167                            (0, line)
2168                        };
2169
2170                        // Parse the list marker from the actual line
2171                        if current_list_is_ordered {
2172                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2173                                Self::parse_ordered_list(line_to_parse)
2174                            {
2175                                let marker = format!("{number_str}{delimiter}");
2176                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2177                                let content_column = marker_column + marker.len() + spacing.len();
2178                                let number = number_str.parse().ok();
2179
2180                                list_items.entry(line_start_byte).or_insert((
2181                                    true,
2182                                    marker,
2183                                    marker_column,
2184                                    content_column,
2185                                    number,
2186                                ));
2187                            }
2188                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2189                            Self::parse_unordered_list(line_to_parse)
2190                        {
2191                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2192                            let content_column = marker_column + 1 + spacing.len();
2193
2194                            list_items.entry(line_start_byte).or_insert((
2195                                false,
2196                                marker.to_string(),
2197                                marker_column,
2198                                content_column,
2199                                None,
2200                            ));
2201                        }
2202                    }
2203                }
2204                _ => {}
2205            }
2206        }
2207
2208        (list_items, emphasis_spans)
2209    }
2210
2211    /// Fast unordered list parser - replaces regex for 5-10x speedup
2212    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2213    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2214    #[inline]
2215    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2216        let bytes = line.as_bytes();
2217        let mut i = 0;
2218
2219        // Skip leading whitespace
2220        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2221            i += 1;
2222        }
2223
2224        // Check for marker
2225        if i >= bytes.len() {
2226            return None;
2227        }
2228        let marker = bytes[i] as char;
2229        if marker != '-' && marker != '*' && marker != '+' {
2230            return None;
2231        }
2232        let marker_pos = i;
2233        i += 1;
2234
2235        // Collect spacing after marker (space or tab only)
2236        let spacing_start = i;
2237        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2238            i += 1;
2239        }
2240
2241        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2242    }
2243
2244    /// Fast ordered list parser - replaces regex for 5-10x speedup
2245    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2246    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2247    #[inline]
2248    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2249        let bytes = line.as_bytes();
2250        let mut i = 0;
2251
2252        // Skip leading whitespace
2253        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2254            i += 1;
2255        }
2256
2257        // Collect digits
2258        let number_start = i;
2259        while i < bytes.len() && bytes[i].is_ascii_digit() {
2260            i += 1;
2261        }
2262        if i == number_start {
2263            return None; // No digits found
2264        }
2265
2266        // Check for delimiter
2267        if i >= bytes.len() {
2268            return None;
2269        }
2270        let delimiter = bytes[i] as char;
2271        if delimiter != '.' && delimiter != ')' {
2272            return None;
2273        }
2274        let delimiter_pos = i;
2275        i += 1;
2276
2277        // Collect spacing after delimiter (space or tab only)
2278        let spacing_start = i;
2279        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2280            i += 1;
2281        }
2282
2283        Some((
2284            &line[..number_start],
2285            &line[number_start..delimiter_pos],
2286            delimiter,
2287            &line[spacing_start..i],
2288            &line[i..],
2289        ))
2290    }
2291
2292    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2293    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2294    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2295        let num_lines = line_offsets.len();
2296        let mut in_code_block = vec![false; num_lines];
2297
2298        // For each code block, mark all lines within it
2299        for &(start, end) in code_blocks {
2300            // Ensure we're at valid UTF-8 boundaries
2301            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2302                let mut boundary = start;
2303                while boundary > 0 && !content.is_char_boundary(boundary) {
2304                    boundary -= 1;
2305                }
2306                boundary
2307            } else {
2308                start
2309            };
2310
2311            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2312                let mut boundary = end;
2313                while boundary < content.len() && !content.is_char_boundary(boundary) {
2314                    boundary += 1;
2315                }
2316                boundary
2317            } else {
2318                end.min(content.len())
2319            };
2320
2321            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2322            // That function now has proper list context awareness (see code_block_utils.rs)
2323            // and correctly distinguishes between:
2324            // - Fenced code blocks (``` or ~~~)
2325            // - Indented code blocks at document level (4 spaces + blank line before)
2326            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2327            //
2328            // We no longer need to re-validate here. The original validation logic
2329            // was causing false positives by marking list continuation paragraphs as
2330            // code blocks when they have 4 spaces of indentation.
2331
2332            // Use binary search to find the first and last line indices
2333            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2334            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2335            //
2336            // Find the line that CONTAINS safe_start: the line with the largest
2337            // start offset that is <= safe_start. partition_point gives us the
2338            // first line that starts AFTER safe_start, so we subtract 1.
2339            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2340            let first_line = first_line_after.saturating_sub(1);
2341            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2342
2343            // Mark all lines in the range at once
2344            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2345                *flag = true;
2346            }
2347        }
2348
2349        in_code_block
2350    }
2351
2352    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2353    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2354    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2355        let content_lines: Vec<&str> = content.lines().collect();
2356        let num_lines = content_lines.len();
2357        let mut in_math_block = vec![false; num_lines];
2358
2359        let mut inside_math = false;
2360
2361        for (i, line) in content_lines.iter().enumerate() {
2362            // Skip lines that are in code blocks - math delimiters inside code are literal
2363            if code_block_map.get(i).copied().unwrap_or(false) {
2364                continue;
2365            }
2366
2367            let trimmed = line.trim();
2368
2369            // Check for math block delimiter ($$)
2370            // A line with just $$ toggles the math block state
2371            if trimmed == "$$" {
2372                if inside_math {
2373                    // Closing delimiter - this line is still part of the math block
2374                    in_math_block[i] = true;
2375                    inside_math = false;
2376                } else {
2377                    // Opening delimiter - this line starts the math block
2378                    in_math_block[i] = true;
2379                    inside_math = true;
2380                }
2381            } else if inside_math {
2382                // Content inside math block
2383                in_math_block[i] = true;
2384            }
2385        }
2386
2387        in_math_block
2388    }
2389
2390    /// Pre-compute basic line information (without headings/blockquotes)
2391    /// Also returns emphasis spans detected during the pulldown-cmark parse
2392    fn compute_basic_line_info(
2393        content: &str,
2394        line_offsets: &[usize],
2395        code_blocks: &[(usize, usize)],
2396        flavor: MarkdownFlavor,
2397        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2398        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2399        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2400    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2401        let content_lines: Vec<&str> = content.lines().collect();
2402        let mut lines = Vec::with_capacity(content_lines.len());
2403
2404        // Pre-compute which lines are in code blocks
2405        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2406
2407        // Pre-compute which lines are in math blocks ($$ ... $$)
2408        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2409
2410        // Detect front matter boundaries FIRST, before any other parsing
2411        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2412        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2413
2414        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2415        // (context-aware, eliminates false positives)
2416        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2417            content,
2418            line_offsets,
2419            flavor,
2420            front_matter_end,
2421            code_blocks,
2422        );
2423
2424        for (i, line) in content_lines.iter().enumerate() {
2425            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2426            let indent = line.len() - line.trim_start().len();
2427            // Compute visual indent with proper CommonMark tab expansion
2428            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2429
2430            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2431            let blockquote_parse = Self::parse_blockquote_prefix(line);
2432
2433            // For blank detection, consider blockquote context
2434            let is_blank = if let Some((_, content)) = blockquote_parse {
2435                // In blockquote context, check if content after prefix is blank
2436                content.trim().is_empty()
2437            } else {
2438                line.trim().is_empty()
2439            };
2440
2441            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2442            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2443
2444            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2445            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2446                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2447            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2448            // This ensures content after `-->` on the same line is not incorrectly skipped
2449            let line_end_offset = byte_offset + line.len();
2450            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2451                html_comment_ranges,
2452                byte_offset,
2453                line_end_offset,
2454            );
2455            // Use pulldown-cmark's list detection for context-aware parsing
2456            // This eliminates false positives on continuation lines (issue #253)
2457            let list_item =
2458                list_item_map
2459                    .get(&byte_offset)
2460                    .map(
2461                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2462                            marker: marker.clone(),
2463                            is_ordered: *is_ordered,
2464                            number: *number,
2465                            marker_column: *marker_column,
2466                            content_column: *content_column,
2467                        },
2468                    );
2469
2470            // Detect horizontal rules (only outside code blocks and frontmatter)
2471            // Uses CommonMark-compliant check including leading indentation validation
2472            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2473            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2474
2475            // Get math block status for this line
2476            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2477
2478            // Check if line is inside a Quarto div block
2479            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2480                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2481
2482            lines.push(LineInfo {
2483                byte_offset,
2484                byte_len: line.len(),
2485                indent,
2486                visual_indent,
2487                is_blank,
2488                in_code_block,
2489                in_front_matter,
2490                in_html_block: false, // Will be populated after line creation
2491                in_html_comment,
2492                list_item,
2493                heading: None,    // Will be populated in second pass for Setext headings
2494                blockquote: None, // Will be populated after line creation
2495                in_mkdocstrings,
2496                in_esm_block: false, // Will be populated after line creation for MDX files
2497                in_code_span_continuation: false, // Will be populated after code spans are parsed
2498                is_horizontal_rule: is_hr,
2499                in_math_block,
2500                in_quarto_div,
2501                in_jsx_expression: false,  // Will be populated for MDX files
2502                in_mdx_comment: false,     // Will be populated for MDX files
2503                in_jsx_component: false,   // Will be populated for MDX files
2504                in_jsx_fragment: false,    // Will be populated for MDX files
2505                in_admonition: false,      // Will be populated for MkDocs files
2506                in_content_tab: false,     // Will be populated for MkDocs files
2507                in_definition_list: false, // Will be populated for MkDocs files
2508            });
2509        }
2510
2511        (lines, emphasis_spans)
2512    }
2513
2514    /// Detect headings and blockquotes (called after HTML block detection)
2515    fn detect_headings_and_blockquotes(
2516        content: &str,
2517        lines: &mut [LineInfo],
2518        flavor: MarkdownFlavor,
2519        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2520        link_byte_ranges: &[(usize, usize)],
2521    ) {
2522        // Regex for heading detection
2523        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2524            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2525        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2526            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2527
2528        let content_lines: Vec<&str> = content.lines().collect();
2529
2530        // Detect front matter boundaries to skip those lines
2531        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2532
2533        // Detect headings (including Setext which needs look-ahead) and blockquotes
2534        for i in 0..lines.len() {
2535            let line = content_lines[i];
2536
2537            // Detect blockquotes FIRST, before any skip conditions.
2538            // A line can be both a blockquote AND contain a code block inside it.
2539            // We need to know about the blockquote marker regardless of code block status.
2540            // Skip only frontmatter lines - those are never blockquotes.
2541            if !(front_matter_end > 0 && i < front_matter_end)
2542                && let Some(bq) = parse_blockquote_detailed(line)
2543            {
2544                let nesting_level = bq.markers.len();
2545                let marker_column = bq.indent.len();
2546                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2547                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2548                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2549                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2550
2551                lines[i].blockquote = Some(BlockquoteInfo {
2552                    nesting_level,
2553                    indent: bq.indent.to_string(),
2554                    marker_column,
2555                    prefix,
2556                    content: bq.content.to_string(),
2557                    has_no_space_after_marker: has_no_space,
2558                    has_multiple_spaces_after_marker: has_multiple_spaces,
2559                    needs_md028_fix,
2560                });
2561
2562                // Update is_horizontal_rule for blockquote content
2563                // The original detection doesn't strip blockquote prefix, so we need to check here
2564                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2565                    lines[i].is_horizontal_rule = true;
2566                }
2567            }
2568
2569            // Now apply skip conditions for heading detection
2570            if lines[i].in_code_block {
2571                continue;
2572            }
2573
2574            // Skip lines in front matter
2575            if front_matter_end > 0 && i < front_matter_end {
2576                continue;
2577            }
2578
2579            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2580            if lines[i].in_html_block {
2581                continue;
2582            }
2583
2584            // Skip heading detection for blank lines
2585            if lines[i].is_blank {
2586                continue;
2587            }
2588
2589            // Check for ATX headings (but skip MkDocs snippet lines)
2590            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2591            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2592                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2593                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2594            } else {
2595                false
2596            };
2597
2598            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2599                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2600                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2601                    continue;
2602                }
2603                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2604                // This prevents false positives where `#fragment` is detected as a heading
2605                let line_offset = lines[i].byte_offset;
2606                if link_byte_ranges
2607                    .iter()
2608                    .any(|&(start, end)| line_offset > start && line_offset < end)
2609                {
2610                    continue;
2611                }
2612                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2613                let hashes = caps.get(2).map_or("", |m| m.as_str());
2614                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2615                let rest = caps.get(4).map_or("", |m| m.as_str());
2616
2617                let level = hashes.len() as u8;
2618                let marker_column = leading_spaces.len();
2619
2620                // Check for closing sequence, but handle custom IDs that might come after
2621                let (text, has_closing, closing_seq) = {
2622                    // First check if there's a custom ID at the end
2623                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2624                        // Check if this looks like a valid custom ID (ends with })
2625                        if rest[id_start..].trim_end().ends_with('}') {
2626                            // Split off the custom ID
2627                            (&rest[..id_start], &rest[id_start..])
2628                        } else {
2629                            (rest, "")
2630                        }
2631                    } else {
2632                        (rest, "")
2633                    };
2634
2635                    // Now look for closing hashes in the part before the custom ID
2636                    let trimmed_rest = rest_without_id.trim_end();
2637                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2638                        // Find the start of the hash sequence by walking backwards
2639                        // Use char_indices to get byte positions at char boundaries
2640                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2641
2642                        // Find which char index corresponds to last_hash_byte_pos
2643                        let last_hash_char_idx = char_positions
2644                            .iter()
2645                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2646
2647                        if let Some(mut char_idx) = last_hash_char_idx {
2648                            // Walk backwards to find start of hash sequence
2649                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2650                                char_idx -= 1;
2651                            }
2652
2653                            // Get the byte position of the start of hashes
2654                            let start_of_hashes = char_positions[char_idx].0;
2655
2656                            // Check if there's at least one space before the closing hashes
2657                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2658
2659                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2660                            let potential_closing = &trimmed_rest[start_of_hashes..];
2661                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2662
2663                            if is_all_hashes && has_space_before {
2664                                // This is a closing sequence
2665                                let closing_hashes = potential_closing.to_string();
2666                                // The text is everything before the closing hashes
2667                                // Don't include the custom ID here - it will be extracted later
2668                                let text_part = if !custom_id_part.is_empty() {
2669                                    // If we have a custom ID, append it back to get the full rest
2670                                    // This allows the extract_header_id function to handle it properly
2671                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2672                                } else {
2673                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2674                                };
2675                                (text_part, true, closing_hashes)
2676                            } else {
2677                                // Not a valid closing sequence, return the full content
2678                                (rest.to_string(), false, String::new())
2679                            }
2680                        } else {
2681                            // Couldn't find char boundary, return the full content
2682                            (rest.to_string(), false, String::new())
2683                        }
2684                    } else {
2685                        // No hashes found, return the full content
2686                        (rest.to_string(), false, String::new())
2687                    }
2688                };
2689
2690                let content_column = marker_column + hashes.len() + spaces_after.len();
2691
2692                // Extract custom header ID if present
2693                let raw_text = text.trim().to_string();
2694                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2695
2696                // If no custom ID was found on the header line, check the next line for standalone attr-list
2697                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2698                    let next_line = content_lines[i + 1];
2699                    if !lines[i + 1].in_code_block
2700                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2701                        && let Some(next_line_id) =
2702                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2703                    {
2704                        custom_id = Some(next_line_id);
2705                    }
2706                }
2707
2708                // ATX heading is "valid" for processing by heading rules if:
2709                // 1. Has space after # (CommonMark compliant): `# Heading`
2710                // 2. Is empty (just hashes): `#`
2711                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2712                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2713                //
2714                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2715                // - `#tag` - single # with lowercase (social hashtag)
2716                // - `#123` - single # with number (GitHub issue ref)
2717                let is_valid = !spaces_after.is_empty()
2718                    || rest.is_empty()
2719                    || level > 1
2720                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2721
2722                lines[i].heading = Some(HeadingInfo {
2723                    level,
2724                    style: HeadingStyle::ATX,
2725                    marker: hashes.to_string(),
2726                    marker_column,
2727                    content_column,
2728                    text: clean_text,
2729                    custom_id,
2730                    raw_text,
2731                    has_closing_sequence: has_closing,
2732                    closing_sequence: closing_seq,
2733                    is_valid,
2734                });
2735            }
2736            // Check for Setext headings (need to look at next line)
2737            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2738                let next_line = content_lines[i + 1];
2739                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2740                    // Skip if next line is front matter delimiter
2741                    if front_matter_end > 0 && i < front_matter_end {
2742                        continue;
2743                    }
2744
2745                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2746                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2747                    {
2748                        continue;
2749                    }
2750
2751                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2752                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2753                    let content_line = line.trim();
2754
2755                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2756                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2757                        continue;
2758                    }
2759
2760                    // Skip underscore thematic breaks (___)
2761                    if content_line.starts_with('_') {
2762                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2763                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2764                            continue;
2765                        }
2766                    }
2767
2768                    // Skip numbered lists (1. Item, 2. Item, etc.)
2769                    if let Some(first_char) = content_line.chars().next()
2770                        && first_char.is_ascii_digit()
2771                    {
2772                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2773                        if num_end < content_line.len() {
2774                            let next = content_line.chars().nth(num_end);
2775                            if next == Some('.') || next == Some(')') {
2776                                continue;
2777                            }
2778                        }
2779                    }
2780
2781                    // Skip ATX headings
2782                    if ATX_HEADING_REGEX.is_match(line) {
2783                        continue;
2784                    }
2785
2786                    // Skip blockquotes
2787                    if content_line.starts_with('>') {
2788                        continue;
2789                    }
2790
2791                    // Skip code fences
2792                    let trimmed_start = line.trim_start();
2793                    if trimmed_start.len() >= 3 {
2794                        let first_three: String = trimmed_start.chars().take(3).collect();
2795                        if first_three == "```" || first_three == "~~~" {
2796                            continue;
2797                        }
2798                    }
2799
2800                    // Skip HTML blocks
2801                    if content_line.starts_with('<') {
2802                        continue;
2803                    }
2804
2805                    let underline = next_line.trim();
2806
2807                    let level = if underline.starts_with('=') { 1 } else { 2 };
2808                    let style = if level == 1 {
2809                        HeadingStyle::Setext1
2810                    } else {
2811                        HeadingStyle::Setext2
2812                    };
2813
2814                    // Extract custom header ID if present
2815                    let raw_text = line.trim().to_string();
2816                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2817
2818                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2819                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2820                        let attr_line = content_lines[i + 2];
2821                        if !lines[i + 2].in_code_block
2822                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2823                            && let Some(attr_line_id) =
2824                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2825                        {
2826                            custom_id = Some(attr_line_id);
2827                        }
2828                    }
2829
2830                    lines[i].heading = Some(HeadingInfo {
2831                        level,
2832                        style,
2833                        marker: underline.to_string(),
2834                        marker_column: next_line.len() - next_line.trim_start().len(),
2835                        content_column: lines[i].indent,
2836                        text: clean_text,
2837                        custom_id,
2838                        raw_text,
2839                        has_closing_sequence: false,
2840                        closing_sequence: String::new(),
2841                        is_valid: true, // Setext headings are always valid
2842                    });
2843                }
2844            }
2845        }
2846    }
2847
2848    /// Detect HTML blocks in the content
2849    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2850        // HTML block elements that trigger block context
2851        // Includes HTML5 media, embedded content, and interactive elements
2852        const BLOCK_ELEMENTS: &[&str] = &[
2853            "address",
2854            "article",
2855            "aside",
2856            "audio",
2857            "blockquote",
2858            "canvas",
2859            "details",
2860            "dialog",
2861            "dd",
2862            "div",
2863            "dl",
2864            "dt",
2865            "embed",
2866            "fieldset",
2867            "figcaption",
2868            "figure",
2869            "footer",
2870            "form",
2871            "h1",
2872            "h2",
2873            "h3",
2874            "h4",
2875            "h5",
2876            "h6",
2877            "header",
2878            "hr",
2879            "iframe",
2880            "li",
2881            "main",
2882            "menu",
2883            "nav",
2884            "noscript",
2885            "object",
2886            "ol",
2887            "p",
2888            "picture",
2889            "pre",
2890            "script",
2891            "search",
2892            "section",
2893            "source",
2894            "style",
2895            "summary",
2896            "svg",
2897            "table",
2898            "tbody",
2899            "td",
2900            "template",
2901            "textarea",
2902            "tfoot",
2903            "th",
2904            "thead",
2905            "tr",
2906            "track",
2907            "ul",
2908            "video",
2909        ];
2910
2911        let mut i = 0;
2912        while i < lines.len() {
2913            // Skip if already in code block or front matter
2914            if lines[i].in_code_block || lines[i].in_front_matter {
2915                i += 1;
2916                continue;
2917            }
2918
2919            let trimmed = lines[i].content(content).trim_start();
2920
2921            // Check if line starts with an HTML tag
2922            if trimmed.starts_with('<') && trimmed.len() > 1 {
2923                // Extract tag name safely
2924                let after_bracket = &trimmed[1..];
2925                let is_closing = after_bracket.starts_with('/');
2926                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2927
2928                // Extract tag name (stop at space, >, /, or end of string)
2929                let tag_name = tag_start
2930                    .chars()
2931                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2932                    .collect::<String>()
2933                    .to_lowercase();
2934
2935                // Check if it's a block element
2936                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2937                    // Mark this line as in HTML block
2938                    lines[i].in_html_block = true;
2939
2940                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2941                    // This avoids complex nesting logic that might cause infinite loops
2942                    // Only search for closing tag on subsequent lines if the opening tag
2943                    // does NOT have its closing tag on the same line
2944                    if !is_closing {
2945                        let closing_tag = format!("</{tag_name}>");
2946
2947                        // Check if closing tag is on the same line as opening tag
2948                        // (e.g., <script src="..."></script> or <style>.class{}</style>)
2949                        let same_line_close = lines[i].content(content).contains(&closing_tag);
2950
2951                        // Only search subsequent lines if the tag isn't self-closed on this line
2952                        if !same_line_close {
2953                            // style and script tags can contain blank lines (CSS/JS formatting)
2954                            let allow_blank_lines = tag_name == "style" || tag_name == "script";
2955                            let mut j = i + 1;
2956                            let mut found_closing_tag = false;
2957                            while j < lines.len() && j < i + 100 {
2958                                // Limit search to 100 lines
2959                                // Stop at blank lines (except for style/script tags)
2960                                if !allow_blank_lines && lines[j].is_blank {
2961                                    break;
2962                                }
2963
2964                                lines[j].in_html_block = true;
2965
2966                                // Check if this line contains the closing tag
2967                                if lines[j].content(content).contains(&closing_tag) {
2968                                    found_closing_tag = true;
2969                                }
2970
2971                                // After finding closing tag, continue marking lines as
2972                                // in_html_block until blank line (per CommonMark spec)
2973                                if found_closing_tag {
2974                                    j += 1;
2975                                    // Continue marking subsequent lines until blank
2976                                    while j < lines.len() && j < i + 100 {
2977                                        if lines[j].is_blank {
2978                                            break;
2979                                        }
2980                                        lines[j].in_html_block = true;
2981                                        j += 1;
2982                                    }
2983                                    break;
2984                                }
2985                                j += 1;
2986                            }
2987                        }
2988                    }
2989                }
2990            }
2991
2992            i += 1;
2993        }
2994    }
2995
2996    /// Detect ESM import/export blocks anywhere in MDX files
2997    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
2998    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2999        // Only process MDX files
3000        if !flavor.supports_esm_blocks() {
3001            return;
3002        }
3003
3004        let mut in_multiline_import = false;
3005
3006        for line in lines.iter_mut() {
3007            // Skip code blocks, front matter, and HTML comments
3008            if line.in_code_block || line.in_front_matter || line.in_html_comment {
3009                in_multiline_import = false;
3010                continue;
3011            }
3012
3013            let line_content = line.content(content);
3014            let trimmed = line_content.trim();
3015
3016            // Handle continuation of multi-line import/export
3017            if in_multiline_import {
3018                line.in_esm_block = true;
3019                // Check if this line completes the statement
3020                // Multi-line import ends when we see the closing quote + optional semicolon
3021                if trimmed.ends_with('\'')
3022                    || trimmed.ends_with('"')
3023                    || trimmed.ends_with("';")
3024                    || trimmed.ends_with("\";")
3025                    || line_content.contains(';')
3026                {
3027                    in_multiline_import = false;
3028                }
3029                continue;
3030            }
3031
3032            // Skip blank lines
3033            if line.is_blank {
3034                continue;
3035            }
3036
3037            // Check if line starts with import or export
3038            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3039                line.in_esm_block = true;
3040
3041                // Determine if this is a complete single-line statement or starts a multi-line one
3042                // Multi-line imports look like:
3043                //   import {
3044                //     Foo,
3045                //     Bar
3046                //   } from 'module'
3047                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3048                let is_import = trimmed.starts_with("import ");
3049
3050                // Check for simple complete statements
3051                let is_complete =
3052                    // Ends with semicolon
3053                    trimmed.ends_with(';')
3054                    // import/export with from clause that ends with quote
3055                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3056                    // Simple export (export const/let/var/function/class without from)
3057                    || (!is_import && !trimmed.contains(" from ") && (
3058                        trimmed.starts_with("export const ")
3059                        || trimmed.starts_with("export let ")
3060                        || trimmed.starts_with("export var ")
3061                        || trimmed.starts_with("export function ")
3062                        || trimmed.starts_with("export class ")
3063                        || trimmed.starts_with("export default ")
3064                    ));
3065
3066                if !is_complete && is_import {
3067                    // Only imports can span multiple lines in the typical case
3068                    // Check if it looks like the start of a multi-line import
3069                    // e.g., "import {" or "import type {"
3070                    if trimmed.contains('{') && !trimmed.contains('}') {
3071                        in_multiline_import = true;
3072                    }
3073                }
3074            }
3075        }
3076    }
3077
3078    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3079    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3080    fn detect_jsx_and_mdx_comments(
3081        content: &str,
3082        lines: &mut [LineInfo],
3083        flavor: MarkdownFlavor,
3084        code_blocks: &[(usize, usize)],
3085    ) -> (ByteRanges, ByteRanges) {
3086        // Only process MDX files
3087        if !flavor.supports_jsx() {
3088            return (Vec::new(), Vec::new());
3089        }
3090
3091        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3092        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3093
3094        // Quick check - if no braces, no JSX expressions or MDX comments
3095        if !content.contains('{') {
3096            return (jsx_expression_ranges, mdx_comment_ranges);
3097        }
3098
3099        let bytes = content.as_bytes();
3100        let mut i = 0;
3101
3102        while i < bytes.len() {
3103            if bytes[i] == b'{' {
3104                // Check if we're in a code block
3105                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3106                    i += 1;
3107                    continue;
3108                }
3109
3110                let start = i;
3111
3112                // Check if it's an MDX comment: {/* ... */}
3113                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3114                    // Find the closing */}
3115                    let mut j = i + 3;
3116                    while j + 2 < bytes.len() {
3117                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3118                            let end = j + 3;
3119                            mdx_comment_ranges.push((start, end));
3120
3121                            // Mark lines as in MDX comment
3122                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3123                                line.in_mdx_comment = true;
3124                            });
3125
3126                            i = end;
3127                            break;
3128                        }
3129                        j += 1;
3130                    }
3131                    if j + 2 >= bytes.len() {
3132                        // Unclosed MDX comment - mark rest as comment
3133                        mdx_comment_ranges.push((start, bytes.len()));
3134                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3135                            line.in_mdx_comment = true;
3136                        });
3137                        break;
3138                    }
3139                } else {
3140                    // Regular JSX expression: { ... }
3141                    // Need to handle nested braces
3142                    let mut brace_depth = 1;
3143                    let mut j = i + 1;
3144                    let mut in_string = false;
3145                    let mut string_char = b'"';
3146
3147                    while j < bytes.len() && brace_depth > 0 {
3148                        let c = bytes[j];
3149
3150                        // Handle strings to avoid counting braces inside them
3151                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3152                            in_string = true;
3153                            string_char = c;
3154                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3155                            in_string = false;
3156                        } else if !in_string {
3157                            if c == b'{' {
3158                                brace_depth += 1;
3159                            } else if c == b'}' {
3160                                brace_depth -= 1;
3161                            }
3162                        }
3163                        j += 1;
3164                    }
3165
3166                    if brace_depth == 0 {
3167                        let end = j;
3168                        jsx_expression_ranges.push((start, end));
3169
3170                        // Mark lines as in JSX expression
3171                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3172                            line.in_jsx_expression = true;
3173                        });
3174
3175                        i = end;
3176                    } else {
3177                        i += 1;
3178                    }
3179                }
3180            } else {
3181                i += 1;
3182            }
3183        }
3184
3185        (jsx_expression_ranges, mdx_comment_ranges)
3186    }
3187
3188    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3189    /// and populate the corresponding fields in LineInfo
3190    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3191        if flavor != MarkdownFlavor::MkDocs {
3192            return;
3193        }
3194
3195        use crate::utils::mkdocs_admonitions;
3196        use crate::utils::mkdocs_definition_lists;
3197        use crate::utils::mkdocs_tabs;
3198
3199        let content_lines: Vec<&str> = content.lines().collect();
3200
3201        // Track admonition context
3202        let mut in_admonition = false;
3203        let mut admonition_indent = 0;
3204
3205        // Track tab context
3206        let mut in_tab = false;
3207        let mut tab_indent = 0;
3208
3209        // Track definition list context
3210        let mut in_definition = false;
3211
3212        for (i, line) in content_lines.iter().enumerate() {
3213            if i >= lines.len() {
3214                break;
3215            }
3216
3217            // Check for admonition markers first - even on lines marked as code blocks
3218            // Pulldown-cmark marks 4-space indented content as indented code blocks,
3219            // but in MkDocs this is admonition/tab content, not code.
3220            if mkdocs_admonitions::is_admonition_start(line) {
3221                in_admonition = true;
3222                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3223                lines[i].in_admonition = true;
3224            } else if in_admonition {
3225                // Check if still in admonition content
3226                if line.trim().is_empty() {
3227                    // Blank lines are part of admonitions
3228                    lines[i].in_admonition = true;
3229                    // Override code block detection for blank lines inside admonitions
3230                    lines[i].in_code_block = false;
3231                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3232                    lines[i].in_admonition = true;
3233                    // Override code block detection - this is admonition content, not code
3234                    lines[i].in_code_block = false;
3235                } else {
3236                    // End of admonition
3237                    in_admonition = false;
3238                    // Check if this line starts a new admonition
3239                    if mkdocs_admonitions::is_admonition_start(line) {
3240                        in_admonition = true;
3241                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3242                        lines[i].in_admonition = true;
3243                    }
3244                }
3245            }
3246
3247            // Check for tab markers - also before the code block skip
3248            // Tab content also uses 4-space indentation which pulldown-cmark treats as code
3249            if mkdocs_tabs::is_tab_marker(line) {
3250                in_tab = true;
3251                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3252                lines[i].in_content_tab = true;
3253            } else if in_tab {
3254                // Check if still in tab content
3255                if line.trim().is_empty() {
3256                    // Blank lines are part of tabs
3257                    lines[i].in_content_tab = true;
3258                    lines[i].in_code_block = false;
3259                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3260                    lines[i].in_content_tab = true;
3261                    // Override code block detection - this is tab content, not code
3262                    lines[i].in_code_block = false;
3263                } else {
3264                    // End of tab content
3265                    in_tab = false;
3266                    // Check if this line starts a new tab
3267                    if mkdocs_tabs::is_tab_marker(line) {
3268                        in_tab = true;
3269                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3270                        lines[i].in_content_tab = true;
3271                    }
3272                }
3273            }
3274
3275            // Skip remaining detection for lines in actual code blocks
3276            if lines[i].in_code_block {
3277                continue;
3278            }
3279
3280            // Check for definition list items
3281            if mkdocs_definition_lists::is_definition_line(line) {
3282                in_definition = true;
3283                lines[i].in_definition_list = true;
3284            } else if in_definition {
3285                // Check if continuation
3286                if mkdocs_definition_lists::is_definition_continuation(line) {
3287                    lines[i].in_definition_list = true;
3288                } else if line.trim().is_empty() {
3289                    // Blank line might continue definition
3290                    lines[i].in_definition_list = true;
3291                } else if mkdocs_definition_lists::could_be_term_line(line) {
3292                    // This could be a new term - check if followed by definition
3293                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3294                    {
3295                        lines[i].in_definition_list = true;
3296                    } else {
3297                        in_definition = false;
3298                    }
3299                } else {
3300                    in_definition = false;
3301                }
3302            } else if mkdocs_definition_lists::could_be_term_line(line) {
3303                // Check if this is a term followed by a definition
3304                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3305                    lines[i].in_definition_list = true;
3306                    in_definition = true;
3307                }
3308            }
3309        }
3310    }
3311
3312    /// Helper to mark lines within a byte range
3313    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3314    where
3315        F: FnMut(&mut LineInfo),
3316    {
3317        // Find lines that overlap with the range
3318        for line in lines.iter_mut() {
3319            let line_start = line.byte_offset;
3320            let line_end = line.byte_offset + line.byte_len;
3321
3322            // Check if this line overlaps with the range
3323            if line_start < end && line_end > start {
3324                f(line);
3325            }
3326        }
3327
3328        // Silence unused warning for content (needed for signature consistency)
3329        let _ = content;
3330    }
3331
3332    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3333    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3334        // Quick check - if no backticks, no code spans
3335        if !content.contains('`') {
3336            return Vec::new();
3337        }
3338
3339        // Use pulldown-cmark's streaming parser with byte offsets
3340        let parser = Parser::new(content).into_offset_iter();
3341        let mut ranges = Vec::new();
3342
3343        for (event, range) in parser {
3344            if let Event::Code(_) = event {
3345                ranges.push((range.start, range.end));
3346            }
3347        }
3348
3349        Self::build_code_spans_from_ranges(content, lines, &ranges)
3350    }
3351
3352    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3353        let mut code_spans = Vec::new();
3354        if ranges.is_empty() {
3355            return code_spans;
3356        }
3357
3358        for &(start_pos, end_pos) in ranges {
3359            // The range includes the backticks, extract the actual content
3360            let full_span = &content[start_pos..end_pos];
3361            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3362
3363            // Extract content between backticks, preserving spaces
3364            let content_start = start_pos + backtick_count;
3365            let content_end = end_pos - backtick_count;
3366            let span_content = if content_start < content_end {
3367                content[content_start..content_end].to_string()
3368            } else {
3369                String::new()
3370            };
3371
3372            // Use binary search to find line number - O(log n) instead of O(n)
3373            // Find the rightmost line whose byte_offset <= start_pos
3374            let line_idx = lines
3375                .partition_point(|line| line.byte_offset <= start_pos)
3376                .saturating_sub(1);
3377            let line_num = line_idx + 1;
3378            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3379
3380            // Find end column using binary search
3381            let end_line_idx = lines
3382                .partition_point(|line| line.byte_offset <= end_pos)
3383                .saturating_sub(1);
3384            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3385
3386            // Convert byte offsets to character positions for correct Unicode handling
3387            // This ensures consistency with warning.column which uses character positions
3388            let line_content = lines[line_idx].content(content);
3389            let col_start = if byte_col_start <= line_content.len() {
3390                line_content[..byte_col_start].chars().count()
3391            } else {
3392                line_content.chars().count()
3393            };
3394
3395            let end_line_content = lines[end_line_idx].content(content);
3396            let col_end = if byte_col_end <= end_line_content.len() {
3397                end_line_content[..byte_col_end].chars().count()
3398            } else {
3399                end_line_content.chars().count()
3400            };
3401
3402            code_spans.push(CodeSpan {
3403                line: line_num,
3404                end_line: end_line_idx + 1,
3405                start_col: col_start,
3406                end_col: col_end,
3407                byte_offset: start_pos,
3408                byte_end: end_pos,
3409                backtick_count,
3410                content: span_content,
3411            });
3412        }
3413
3414        // Sort by position to ensure consistent ordering
3415        code_spans.sort_by_key(|span| span.byte_offset);
3416
3417        code_spans
3418    }
3419
3420    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3421    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3422        let mut math_spans = Vec::new();
3423
3424        // Quick check - if no $ signs, no math spans
3425        if !content.contains('$') {
3426            return math_spans;
3427        }
3428
3429        // Use pulldown-cmark with ENABLE_MATH option
3430        let mut options = Options::empty();
3431        options.insert(Options::ENABLE_MATH);
3432        let parser = Parser::new_ext(content, options).into_offset_iter();
3433
3434        for (event, range) in parser {
3435            let (is_display, math_content) = match &event {
3436                Event::InlineMath(text) => (false, text.as_ref()),
3437                Event::DisplayMath(text) => (true, text.as_ref()),
3438                _ => continue,
3439            };
3440
3441            let start_pos = range.start;
3442            let end_pos = range.end;
3443
3444            // Use binary search to find line number - O(log n) instead of O(n)
3445            let line_idx = lines
3446                .partition_point(|line| line.byte_offset <= start_pos)
3447                .saturating_sub(1);
3448            let line_num = line_idx + 1;
3449            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3450
3451            // Find end column using binary search
3452            let end_line_idx = lines
3453                .partition_point(|line| line.byte_offset <= end_pos)
3454                .saturating_sub(1);
3455            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3456
3457            // Convert byte offsets to character positions for correct Unicode handling
3458            let line_content = lines[line_idx].content(content);
3459            let col_start = if byte_col_start <= line_content.len() {
3460                line_content[..byte_col_start].chars().count()
3461            } else {
3462                line_content.chars().count()
3463            };
3464
3465            let end_line_content = lines[end_line_idx].content(content);
3466            let col_end = if byte_col_end <= end_line_content.len() {
3467                end_line_content[..byte_col_end].chars().count()
3468            } else {
3469                end_line_content.chars().count()
3470            };
3471
3472            math_spans.push(MathSpan {
3473                line: line_num,
3474                end_line: end_line_idx + 1,
3475                start_col: col_start,
3476                end_col: col_end,
3477                byte_offset: start_pos,
3478                byte_end: end_pos,
3479                is_display,
3480                content: math_content.to_string(),
3481            });
3482        }
3483
3484        // Sort by position to ensure consistent ordering
3485        math_spans.sort_by_key(|span| span.byte_offset);
3486
3487        math_spans
3488    }
3489
3490    /// Parse all list blocks in the content (legacy line-by-line approach)
3491    ///
3492    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3493    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3494    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3495    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3496    ///   treated as list continuation (based on the list marker width)
3497    ///
3498    /// When a new list item is encountered, we check if list-breaking content was seen
3499    /// since the last item. If so, we start a new list block.
3500    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3501        // Minimum indentation for unordered list continuation per CommonMark spec
3502        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3503
3504        /// Initialize or reset the forward-scanning tracking state.
3505        /// This helper eliminates code duplication across three initialization sites.
3506        #[inline]
3507        fn reset_tracking_state(
3508            list_item: &ListItemInfo,
3509            has_list_breaking_content: &mut bool,
3510            min_continuation: &mut usize,
3511        ) {
3512            *has_list_breaking_content = false;
3513            let marker_width = if list_item.is_ordered {
3514                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3515            } else {
3516                list_item.marker.len()
3517            };
3518            *min_continuation = if list_item.is_ordered {
3519                marker_width
3520            } else {
3521                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3522            };
3523        }
3524
3525        // Pre-size based on lines that could be list items
3526        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3527        let mut current_block: Option<ListBlock> = None;
3528        let mut last_list_item_line = 0;
3529        let mut current_indent_level = 0;
3530        let mut last_marker_width = 0;
3531
3532        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3533        let mut has_list_breaking_content_since_last_item = false;
3534        let mut min_continuation_for_tracking = 0;
3535
3536        for (line_idx, line_info) in lines.iter().enumerate() {
3537            let line_num = line_idx + 1;
3538
3539            // Enhanced code block handling using Design #3's context analysis
3540            if line_info.in_code_block {
3541                if let Some(ref mut block) = current_block {
3542                    // Calculate minimum indentation for list continuation
3543                    let min_continuation_indent =
3544                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3545
3546                    // Analyze code block context using the three-tier classification
3547                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3548
3549                    match context {
3550                        CodeBlockContext::Indented => {
3551                            // Code block is properly indented - continues the list
3552                            block.end_line = line_num;
3553                            continue;
3554                        }
3555                        CodeBlockContext::Standalone => {
3556                            // Code block separates lists - end current block
3557                            let completed_block = current_block.take().unwrap();
3558                            list_blocks.push(completed_block);
3559                            continue;
3560                        }
3561                        CodeBlockContext::Adjacent => {
3562                            // Edge case - use conservative behavior (continue list)
3563                            block.end_line = line_num;
3564                            continue;
3565                        }
3566                    }
3567                } else {
3568                    // No current list block - skip code block lines
3569                    continue;
3570                }
3571            }
3572
3573            // Extract blockquote prefix if any
3574            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3575                caps.get(0).unwrap().as_str().to_string()
3576            } else {
3577                String::new()
3578            };
3579
3580            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3581            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3582            if let Some(ref block) = current_block
3583                && line_info.list_item.is_none()
3584                && !line_info.is_blank
3585                && !line_info.in_code_span_continuation
3586            {
3587                let line_content = line_info.content(content).trim();
3588
3589                // Check for structural separators that break lists
3590                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3591                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3592                // as they indicate improper indentation rather than lazy continuation.
3593                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3594
3595                // Check if blockquote context changes (different prefix than current block)
3596                // Lines within the SAME blockquote context don't break lists
3597                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3598
3599                let breaks_list = line_info.heading.is_some()
3600                    || line_content.starts_with("---")
3601                    || line_content.starts_with("***")
3602                    || line_content.starts_with("___")
3603                    || crate::utils::skip_context::is_table_line(line_content)
3604                    || blockquote_prefix_changes
3605                    || (line_info.indent > 0
3606                        && line_info.indent < min_continuation_for_tracking
3607                        && !is_lazy_continuation);
3608
3609                if breaks_list {
3610                    has_list_breaking_content_since_last_item = true;
3611                }
3612            }
3613
3614            // If this line is a code span continuation within an active list block,
3615            // extend the block's end_line to include this line (maintains list continuity)
3616            if line_info.in_code_span_continuation
3617                && line_info.list_item.is_none()
3618                && let Some(ref mut block) = current_block
3619            {
3620                block.end_line = line_num;
3621            }
3622
3623            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3624            // properly indented lines within the list). This ensures the workaround at line 2448
3625            // works correctly when there are multiple continuation lines before a nested list item.
3626            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3627            // For blockquote lines, compute effective indent after stripping the prefix
3628            let effective_continuation_indent = if let Some(ref block) = current_block {
3629                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3630                let line_content = line_info.content(content);
3631                let line_bq_level = line_content
3632                    .chars()
3633                    .take_while(|c| *c == '>' || c.is_whitespace())
3634                    .filter(|&c| c == '>')
3635                    .count();
3636                if line_bq_level > 0 && line_bq_level == block_bq_level {
3637                    // Compute indent after blockquote markers
3638                    let mut pos = 0;
3639                    let mut found_markers = 0;
3640                    for c in line_content.chars() {
3641                        pos += c.len_utf8();
3642                        if c == '>' {
3643                            found_markers += 1;
3644                            if found_markers == line_bq_level {
3645                                if line_content.get(pos..pos + 1) == Some(" ") {
3646                                    pos += 1;
3647                                }
3648                                break;
3649                            }
3650                        }
3651                    }
3652                    let after_bq = &line_content[pos..];
3653                    after_bq.len() - after_bq.trim_start().len()
3654                } else {
3655                    line_info.indent
3656                }
3657            } else {
3658                line_info.indent
3659            };
3660            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3661                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3662                if block_bq_level > 0 {
3663                    if block.is_ordered { last_marker_width } else { 2 }
3664                } else {
3665                    min_continuation_for_tracking
3666                }
3667            } else {
3668                min_continuation_for_tracking
3669            };
3670            // Lazy continuation allows unindented text to continue a list item,
3671            // but NOT structural elements like headings, code fences, or horizontal rules
3672            let is_structural_element = line_info.heading.is_some()
3673                || line_info.content(content).trim().starts_with("```")
3674                || line_info.content(content).trim().starts_with("~~~");
3675            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3676                || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3677
3678            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3679                eprintln!(
3680                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3681                    line_num,
3682                    effective_continuation_indent,
3683                    adjusted_min_continuation_for_tracking,
3684                    is_valid_continuation,
3685                    line_info.in_code_span_continuation,
3686                    line_info.in_code_block,
3687                    current_block.is_some()
3688                );
3689            }
3690
3691            if !line_info.in_code_span_continuation
3692                && line_info.list_item.is_none()
3693                && !line_info.is_blank
3694                && !line_info.in_code_block
3695                && is_valid_continuation
3696                && let Some(ref mut block) = current_block
3697            {
3698                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3699                    eprintln!(
3700                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3701                        line_num, block.end_line, line_num
3702                    );
3703                }
3704                block.end_line = line_num;
3705            }
3706
3707            // Check if this line is a list item
3708            if let Some(list_item) = &line_info.list_item {
3709                // Calculate nesting level based on indentation
3710                let item_indent = list_item.marker_column;
3711                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3712
3713                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3714                    eprintln!(
3715                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3716                        line_num, list_item.marker, item_indent
3717                    );
3718                }
3719
3720                if let Some(ref mut block) = current_block {
3721                    // Check if this continues the current block
3722                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3723                    // or a continuation at the same or lower level
3724                    let is_nested = nesting > block.nesting_level;
3725                    let same_type =
3726                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3727                    let same_context = block.blockquote_prefix == blockquote_prefix;
3728                    // Allow one blank line after last item, or lines immediately after block content
3729                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3730
3731                    // For unordered lists, also check marker consistency
3732                    let marker_compatible =
3733                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3734
3735                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3736                    // This eliminates the quadratic bottleneck from issue #148
3737                    let has_non_list_content = has_list_breaking_content_since_last_item;
3738
3739                    // A list continues if:
3740                    // 1. It's a nested item (indented more than the parent), OR
3741                    // 2. It's the same type at the same level with reasonable distance
3742                    let mut continues_list = if is_nested {
3743                        // Nested items always continue the list if they're in the same context
3744                        same_context && reasonable_distance && !has_non_list_content
3745                    } else {
3746                        // Same-level items need to match type and markers
3747                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3748                    };
3749
3750                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3751                        eprintln!(
3752                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3753                            line_num,
3754                            continues_list,
3755                            is_nested,
3756                            same_type,
3757                            same_context,
3758                            reasonable_distance,
3759                            marker_compatible,
3760                            has_non_list_content,
3761                            last_list_item_line,
3762                            block.end_line
3763                        );
3764                    }
3765
3766                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3767                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3768                    // Apply for: nested items (different types OK), OR same-level same-type items
3769                    if !continues_list
3770                        && (is_nested || same_type)
3771                        && reasonable_distance
3772                        && line_num > 0
3773                        && block.end_line == line_num - 1
3774                    {
3775                        // Check if the previous line was a list item or a continuation of a list item
3776                        // (including lazy continuation lines)
3777                        if block.item_lines.contains(&(line_num - 1)) {
3778                            // They're consecutive list items - force them to be in the same list
3779                            continues_list = true;
3780                        } else {
3781                            // Previous line is a continuation line within this block
3782                            // (e.g., lazy continuation with indent=0)
3783                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3784                            continues_list = true;
3785                        }
3786                    }
3787
3788                    if continues_list {
3789                        // Extend current block
3790                        block.end_line = line_num;
3791                        block.item_lines.push(line_num);
3792
3793                        // Update max marker width
3794                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3795                            list_item.marker.len() + 1
3796                        } else {
3797                            list_item.marker.len()
3798                        });
3799
3800                        // Update marker consistency for unordered lists
3801                        if !block.is_ordered
3802                            && block.marker.is_some()
3803                            && block.marker.as_ref() != Some(&list_item.marker)
3804                        {
3805                            // Mixed markers, clear the marker field
3806                            block.marker = None;
3807                        }
3808
3809                        // Reset tracked state for issue #148 optimization
3810                        reset_tracking_state(
3811                            list_item,
3812                            &mut has_list_breaking_content_since_last_item,
3813                            &mut min_continuation_for_tracking,
3814                        );
3815                    } else {
3816                        // End current block and start a new one
3817                        // When a different list type starts AT THE SAME LEVEL (not nested),
3818                        // trim back lazy continuation lines (they become part of the gap, not the list)
3819                        // For nested items, different types are fine - they're sub-lists
3820                        if !same_type
3821                            && !is_nested
3822                            && let Some(&last_item) = block.item_lines.last()
3823                        {
3824                            block.end_line = last_item;
3825                        }
3826
3827                        list_blocks.push(block.clone());
3828
3829                        *block = ListBlock {
3830                            start_line: line_num,
3831                            end_line: line_num,
3832                            is_ordered: list_item.is_ordered,
3833                            marker: if list_item.is_ordered {
3834                                None
3835                            } else {
3836                                Some(list_item.marker.clone())
3837                            },
3838                            blockquote_prefix: blockquote_prefix.clone(),
3839                            item_lines: vec![line_num],
3840                            nesting_level: nesting,
3841                            max_marker_width: if list_item.is_ordered {
3842                                list_item.marker.len() + 1
3843                            } else {
3844                                list_item.marker.len()
3845                            },
3846                        };
3847
3848                        // Initialize tracked state for new block (issue #148 optimization)
3849                        reset_tracking_state(
3850                            list_item,
3851                            &mut has_list_breaking_content_since_last_item,
3852                            &mut min_continuation_for_tracking,
3853                        );
3854                    }
3855                } else {
3856                    // Start a new block
3857                    current_block = Some(ListBlock {
3858                        start_line: line_num,
3859                        end_line: line_num,
3860                        is_ordered: list_item.is_ordered,
3861                        marker: if list_item.is_ordered {
3862                            None
3863                        } else {
3864                            Some(list_item.marker.clone())
3865                        },
3866                        blockquote_prefix,
3867                        item_lines: vec![line_num],
3868                        nesting_level: nesting,
3869                        max_marker_width: list_item.marker.len(),
3870                    });
3871
3872                    // Initialize tracked state for new block (issue #148 optimization)
3873                    reset_tracking_state(
3874                        list_item,
3875                        &mut has_list_breaking_content_since_last_item,
3876                        &mut min_continuation_for_tracking,
3877                    );
3878                }
3879
3880                last_list_item_line = line_num;
3881                current_indent_level = item_indent;
3882                last_marker_width = if list_item.is_ordered {
3883                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3884                } else {
3885                    list_item.marker.len()
3886                };
3887            } else if let Some(ref mut block) = current_block {
3888                // Not a list item - check if it continues the current block
3889                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3890                    eprintln!(
3891                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3892                        line_num, line_info.is_blank
3893                    );
3894                }
3895
3896                // For MD032 compatibility, we use a simple approach:
3897                // - Indented lines continue the list
3898                // - Blank lines followed by indented content continue the list
3899                // - Everything else ends the list
3900
3901                // Check if the last line in the list block ended with a backslash (hard line break)
3902                // This handles cases where list items use backslash for hard line breaks
3903                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3904                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3905                } else {
3906                    false
3907                };
3908
3909                // Calculate minimum indentation for list continuation
3910                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3911                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3912                let min_continuation_indent = if block.is_ordered {
3913                    current_indent_level + last_marker_width
3914                } else {
3915                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3916                };
3917
3918                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3919                    // Indented line or backslash continuation continues the list
3920                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3921                        eprintln!(
3922                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3923                            line_num, line_info.indent, min_continuation_indent
3924                        );
3925                    }
3926                    block.end_line = line_num;
3927                } else if line_info.is_blank {
3928                    // Blank line - check if it's internal to the list or ending it
3929                    // We only include blank lines that are followed by more list content
3930                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3931                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3932                    }
3933                    let mut check_idx = line_idx + 1;
3934                    let mut found_continuation = false;
3935
3936                    // Skip additional blank lines
3937                    while check_idx < lines.len() && lines[check_idx].is_blank {
3938                        check_idx += 1;
3939                    }
3940
3941                    if check_idx < lines.len() {
3942                        let next_line = &lines[check_idx];
3943                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3944                        let next_content = next_line.content(content);
3945                        // Use blockquote level (count of >) to compare, not the full prefix
3946                        // This avoids issues where the regex captures extra whitespace
3947                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3948                        let next_bq_level_for_indent = next_content
3949                            .chars()
3950                            .take_while(|c| *c == '>' || c.is_whitespace())
3951                            .filter(|&c| c == '>')
3952                            .count();
3953                        let effective_indent =
3954                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3955                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3956                                // Find position after ">" and one space
3957                                let mut pos = 0;
3958                                let mut found_markers = 0;
3959                                for c in next_content.chars() {
3960                                    pos += c.len_utf8();
3961                                    if c == '>' {
3962                                        found_markers += 1;
3963                                        if found_markers == next_bq_level_for_indent {
3964                                            // Skip optional space after last >
3965                                            if next_content.get(pos..pos + 1) == Some(" ") {
3966                                                pos += 1;
3967                                            }
3968                                            break;
3969                                        }
3970                                    }
3971                                }
3972                                let after_blockquote_marker = &next_content[pos..];
3973                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3974                            } else {
3975                                next_line.indent
3976                            };
3977                        // Also adjust min_continuation_indent for blockquote lists
3978                        // The marker_column includes blockquote prefix, so subtract it
3979                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3980                            // For blockquote lists, the continuation is relative to blockquote content
3981                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3982                            if block.is_ordered { last_marker_width } else { 2 }
3983                        } else {
3984                            min_continuation_indent
3985                        };
3986                        // Check if followed by indented content (list continuation)
3987                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3988                            eprintln!(
3989                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3990                                line_num,
3991                                check_idx + 1,
3992                                effective_indent,
3993                                adjusted_min_continuation,
3994                                next_line.list_item.is_some(),
3995                                next_line.in_code_block
3996                            );
3997                        }
3998                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3999                            found_continuation = true;
4000                        }
4001                        // Check if followed by another list item at the same level
4002                        else if !next_line.in_code_block
4003                            && next_line.list_item.is_some()
4004                            && let Some(item) = &next_line.list_item
4005                        {
4006                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4007                                .find(next_line.content(content))
4008                                .map_or(String::new(), |m| m.as_str().to_string());
4009                            if item.marker_column == current_indent_level
4010                                && item.is_ordered == block.is_ordered
4011                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4012                            {
4013                                // Check if there was meaningful content between the list items (unused now)
4014                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
4015                                // Pre-compute block's blockquote level for use in closures
4016                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4017                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4018                                    if let Some(between_line) = lines.get(idx) {
4019                                        let between_content = between_line.content(content);
4020                                        let trimmed = between_content.trim();
4021                                        // Skip empty lines
4022                                        if trimmed.is_empty() {
4023                                            return false;
4024                                        }
4025                                        // Check for meaningful content
4026                                        let line_indent = between_content.len() - between_content.trim_start().len();
4027
4028                                        // Check if blockquote level changed (not just if line starts with ">")
4029                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4030                                            .find(between_content)
4031                                            .map_or(String::new(), |m| m.as_str().to_string());
4032                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4033                                        let blockquote_level_changed =
4034                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4035
4036                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4037                                        if trimmed.starts_with("```")
4038                                            || trimmed.starts_with("~~~")
4039                                            || trimmed.starts_with("---")
4040                                            || trimmed.starts_with("***")
4041                                            || trimmed.starts_with("___")
4042                                            || blockquote_level_changed
4043                                            || crate::utils::skip_context::is_table_line(trimmed)
4044                                            || between_line.heading.is_some()
4045                                        {
4046                                            return true; // These are structural separators - meaningful content that breaks lists
4047                                        }
4048
4049                                        // Only properly indented content continues the list
4050                                        line_indent >= min_continuation_indent
4051                                    } else {
4052                                        false
4053                                    }
4054                                });
4055
4056                                if block.is_ordered {
4057                                    // For ordered lists: don't continue if there are structural separators
4058                                    // Check if there are structural separators between the list items
4059                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4060                                        if let Some(between_line) = lines.get(idx) {
4061                                            let between_content = between_line.content(content);
4062                                            let trimmed = between_content.trim();
4063                                            if trimmed.is_empty() {
4064                                                return false;
4065                                            }
4066                                            // Check if blockquote level changed (not just if line starts with ">")
4067                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4068                                                .find(between_content)
4069                                                .map_or(String::new(), |m| m.as_str().to_string());
4070                                            let between_bq_level =
4071                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4072                                            let blockquote_level_changed =
4073                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4074                                            // Check for structural separators that break lists
4075                                            trimmed.starts_with("```")
4076                                                || trimmed.starts_with("~~~")
4077                                                || trimmed.starts_with("---")
4078                                                || trimmed.starts_with("***")
4079                                                || trimmed.starts_with("___")
4080                                                || blockquote_level_changed
4081                                                || crate::utils::skip_context::is_table_line(trimmed)
4082                                                || between_line.heading.is_some()
4083                                        } else {
4084                                            false
4085                                        }
4086                                    });
4087                                    found_continuation = !has_structural_separators;
4088                                } else {
4089                                    // For unordered lists: also check for structural separators
4090                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4091                                        if let Some(between_line) = lines.get(idx) {
4092                                            let between_content = between_line.content(content);
4093                                            let trimmed = between_content.trim();
4094                                            if trimmed.is_empty() {
4095                                                return false;
4096                                            }
4097                                            // Check if blockquote level changed (not just if line starts with ">")
4098                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4099                                                .find(between_content)
4100                                                .map_or(String::new(), |m| m.as_str().to_string());
4101                                            let between_bq_level =
4102                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4103                                            let blockquote_level_changed =
4104                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4105                                            // Check for structural separators that break lists
4106                                            trimmed.starts_with("```")
4107                                                || trimmed.starts_with("~~~")
4108                                                || trimmed.starts_with("---")
4109                                                || trimmed.starts_with("***")
4110                                                || trimmed.starts_with("___")
4111                                                || blockquote_level_changed
4112                                                || crate::utils::skip_context::is_table_line(trimmed)
4113                                                || between_line.heading.is_some()
4114                                        } else {
4115                                            false
4116                                        }
4117                                    });
4118                                    found_continuation = !has_structural_separators;
4119                                }
4120                            }
4121                        }
4122                    }
4123
4124                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4125                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4126                    }
4127                    if found_continuation {
4128                        // Include the blank line in the block
4129                        block.end_line = line_num;
4130                    } else {
4131                        // Blank line ends the list - don't include it
4132                        list_blocks.push(block.clone());
4133                        current_block = None;
4134                    }
4135                } else {
4136                    // Check for lazy continuation - non-indented line immediately after a list item
4137                    // But only if the line has sufficient indentation for the list type
4138                    let min_required_indent = if block.is_ordered {
4139                        current_indent_level + last_marker_width
4140                    } else {
4141                        current_indent_level + 2
4142                    };
4143
4144                    // For lazy continuation to apply, the line must either:
4145                    // 1. Have no indentation (true lazy continuation)
4146                    // 2. Have sufficient indentation for the list type
4147                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4148                    let line_content = line_info.content(content).trim();
4149
4150                    // Check for table-like patterns
4151                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4152
4153                    // Check if blockquote level changed (not just if line starts with ">")
4154                    // Lines within the same blockquote level are NOT structural separators
4155                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4156                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4157                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4158
4159                    let is_structural_separator = line_info.heading.is_some()
4160                        || line_content.starts_with("```")
4161                        || line_content.starts_with("~~~")
4162                        || line_content.starts_with("---")
4163                        || line_content.starts_with("***")
4164                        || line_content.starts_with("___")
4165                        || blockquote_level_changed
4166                        || looks_like_table;
4167
4168                    // Allow lazy continuation if we're still within the same list block
4169                    // (not just immediately after a list item)
4170                    // Also treat code span continuations as valid continuations regardless of indent
4171                    let is_lazy_continuation = !is_structural_separator
4172                        && !line_info.is_blank
4173                        && (line_info.indent == 0
4174                            || line_info.indent >= min_required_indent
4175                            || line_info.in_code_span_continuation);
4176
4177                    if is_lazy_continuation {
4178                        // Per CommonMark, lazy continuation continues until a blank line
4179                        // or structural element, regardless of uppercase at line start
4180                        block.end_line = line_num;
4181                    } else {
4182                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4183                        list_blocks.push(block.clone());
4184                        current_block = None;
4185                    }
4186                }
4187            }
4188        }
4189
4190        // Don't forget the last block
4191        if let Some(block) = current_block {
4192            list_blocks.push(block);
4193        }
4194
4195        // Merge adjacent blocks that should be one
4196        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4197
4198        list_blocks
4199    }
4200
4201    /// Compute character frequency for fast content analysis
4202    fn compute_char_frequency(content: &str) -> CharFrequency {
4203        let mut frequency = CharFrequency::default();
4204
4205        for ch in content.chars() {
4206            match ch {
4207                '#' => frequency.hash_count += 1,
4208                '*' => frequency.asterisk_count += 1,
4209                '_' => frequency.underscore_count += 1,
4210                '-' => frequency.hyphen_count += 1,
4211                '+' => frequency.plus_count += 1,
4212                '>' => frequency.gt_count += 1,
4213                '|' => frequency.pipe_count += 1,
4214                '[' => frequency.bracket_count += 1,
4215                '`' => frequency.backtick_count += 1,
4216                '<' => frequency.lt_count += 1,
4217                '!' => frequency.exclamation_count += 1,
4218                '\n' => frequency.newline_count += 1,
4219                _ => {}
4220            }
4221        }
4222
4223        frequency
4224    }
4225
4226    /// Parse HTML tags in the content
4227    fn parse_html_tags(
4228        content: &str,
4229        lines: &[LineInfo],
4230        code_blocks: &[(usize, usize)],
4231        flavor: MarkdownFlavor,
4232    ) -> Vec<HtmlTag> {
4233        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4234            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4235
4236        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4237
4238        for cap in HTML_TAG_REGEX.captures_iter(content) {
4239            let full_match = cap.get(0).unwrap();
4240            let match_start = full_match.start();
4241            let match_end = full_match.end();
4242
4243            // Skip if in code block
4244            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4245                continue;
4246            }
4247
4248            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4249            let tag_name_original = cap.get(2).unwrap().as_str();
4250            let tag_name = tag_name_original.to_lowercase();
4251            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4252
4253            // Skip JSX components in MDX files (tags starting with uppercase letter)
4254            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4255            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4256                continue;
4257            }
4258
4259            // Find which line this tag is on
4260            let mut line_num = 1;
4261            let mut col_start = match_start;
4262            let mut col_end = match_end;
4263            for (idx, line_info) in lines.iter().enumerate() {
4264                if match_start >= line_info.byte_offset {
4265                    line_num = idx + 1;
4266                    col_start = match_start - line_info.byte_offset;
4267                    col_end = match_end - line_info.byte_offset;
4268                } else {
4269                    break;
4270                }
4271            }
4272
4273            html_tags.push(HtmlTag {
4274                line: line_num,
4275                start_col: col_start,
4276                end_col: col_end,
4277                byte_offset: match_start,
4278                byte_end: match_end,
4279                tag_name,
4280                is_closing,
4281                is_self_closing,
4282                raw_content: full_match.as_str().to_string(),
4283            });
4284        }
4285
4286        html_tags
4287    }
4288
4289    /// Parse table rows in the content
4290    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4291        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4292
4293        for (line_idx, line_info) in lines.iter().enumerate() {
4294            // Skip lines in code blocks or blank lines
4295            if line_info.in_code_block || line_info.is_blank {
4296                continue;
4297            }
4298
4299            let line = line_info.content(content);
4300            let line_num = line_idx + 1;
4301
4302            // Check if this line contains pipes (potential table row)
4303            if !line.contains('|') {
4304                continue;
4305            }
4306
4307            // Count columns by splitting on pipes
4308            let parts: Vec<&str> = line.split('|').collect();
4309            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4310
4311            // Check if this is a separator row
4312            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4313            let mut column_alignments = Vec::new();
4314
4315            if is_separator {
4316                for part in &parts[1..parts.len() - 1] {
4317                    // Skip first and last empty parts
4318                    let trimmed = part.trim();
4319                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4320                        "center".to_string()
4321                    } else if trimmed.ends_with(':') {
4322                        "right".to_string()
4323                    } else if trimmed.starts_with(':') {
4324                        "left".to_string()
4325                    } else {
4326                        "none".to_string()
4327                    };
4328                    column_alignments.push(alignment);
4329                }
4330            }
4331
4332            table_rows.push(TableRow {
4333                line: line_num,
4334                is_separator,
4335                column_count,
4336                column_alignments,
4337            });
4338        }
4339
4340        table_rows
4341    }
4342
4343    /// Parse bare URLs and emails in the content
4344    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4345        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4346
4347        // Check for bare URLs (not in angle brackets or markdown links)
4348        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4349            let full_match = cap.get(0).unwrap();
4350            let match_start = full_match.start();
4351            let match_end = full_match.end();
4352
4353            // Skip if in code block
4354            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4355                continue;
4356            }
4357
4358            // Skip if already in angle brackets or markdown links
4359            let preceding_char = if match_start > 0 {
4360                content.chars().nth(match_start - 1)
4361            } else {
4362                None
4363            };
4364            let following_char = content.chars().nth(match_end);
4365
4366            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4367                continue;
4368            }
4369            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4370                continue;
4371            }
4372
4373            let url = full_match.as_str();
4374            let url_type = if url.starts_with("https://") {
4375                "https"
4376            } else if url.starts_with("http://") {
4377                "http"
4378            } else if url.starts_with("ftp://") {
4379                "ftp"
4380            } else {
4381                "other"
4382            };
4383
4384            // Find which line this URL is on
4385            let mut line_num = 1;
4386            let mut col_start = match_start;
4387            let mut col_end = match_end;
4388            for (idx, line_info) in lines.iter().enumerate() {
4389                if match_start >= line_info.byte_offset {
4390                    line_num = idx + 1;
4391                    col_start = match_start - line_info.byte_offset;
4392                    col_end = match_end - line_info.byte_offset;
4393                } else {
4394                    break;
4395                }
4396            }
4397
4398            bare_urls.push(BareUrl {
4399                line: line_num,
4400                start_col: col_start,
4401                end_col: col_end,
4402                byte_offset: match_start,
4403                byte_end: match_end,
4404                url: url.to_string(),
4405                url_type: url_type.to_string(),
4406            });
4407        }
4408
4409        // Check for bare email addresses
4410        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4411            let full_match = cap.get(0).unwrap();
4412            let match_start = full_match.start();
4413            let match_end = full_match.end();
4414
4415            // Skip if in code block
4416            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4417                continue;
4418            }
4419
4420            // Skip if already in angle brackets or markdown links
4421            let preceding_char = if match_start > 0 {
4422                content.chars().nth(match_start - 1)
4423            } else {
4424                None
4425            };
4426            let following_char = content.chars().nth(match_end);
4427
4428            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4429                continue;
4430            }
4431            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4432                continue;
4433            }
4434
4435            let email = full_match.as_str();
4436
4437            // Find which line this email is on
4438            let mut line_num = 1;
4439            let mut col_start = match_start;
4440            let mut col_end = match_end;
4441            for (idx, line_info) in lines.iter().enumerate() {
4442                if match_start >= line_info.byte_offset {
4443                    line_num = idx + 1;
4444                    col_start = match_start - line_info.byte_offset;
4445                    col_end = match_end - line_info.byte_offset;
4446                } else {
4447                    break;
4448                }
4449            }
4450
4451            bare_urls.push(BareUrl {
4452                line: line_num,
4453                start_col: col_start,
4454                end_col: col_end,
4455                byte_offset: match_start,
4456                byte_end: match_end,
4457                url: email.to_string(),
4458                url_type: "email".to_string(),
4459            });
4460        }
4461
4462        bare_urls
4463    }
4464
4465    /// Get an iterator over valid CommonMark headings
4466    ///
4467    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4468    /// that should be flagged by MD018 but should not be processed by other heading rules.
4469    ///
4470    /// # Examples
4471    ///
4472    /// ```rust
4473    /// use rumdl_lib::lint_context::LintContext;
4474    /// use rumdl_lib::config::MarkdownFlavor;
4475    ///
4476    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4477    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4478    ///
4479    /// for heading in ctx.valid_headings() {
4480    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4481    /// }
4482    /// // Only prints valid headings, skips `#NoSpace`
4483    /// ```
4484    #[must_use]
4485    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4486        ValidHeadingsIter::new(&self.lines)
4487    }
4488
4489    /// Check if the document contains any valid CommonMark headings
4490    ///
4491    /// Returns `true` if there is at least one heading with proper space after `#`.
4492    #[must_use]
4493    pub fn has_valid_headings(&self) -> bool {
4494        self.lines
4495            .iter()
4496            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4497    }
4498}
4499
4500/// Merge adjacent list blocks that should be treated as one
4501fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4502    if list_blocks.len() < 2 {
4503        return;
4504    }
4505
4506    let mut merger = ListBlockMerger::new(content, lines);
4507    *list_blocks = merger.merge(list_blocks);
4508}
4509
4510/// Helper struct to manage the complex logic of merging list blocks
4511struct ListBlockMerger<'a> {
4512    content: &'a str,
4513    lines: &'a [LineInfo],
4514}
4515
4516impl<'a> ListBlockMerger<'a> {
4517    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4518        Self { content, lines }
4519    }
4520
4521    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4522        let mut merged = Vec::with_capacity(list_blocks.len());
4523        let mut current = list_blocks[0].clone();
4524
4525        for next in list_blocks.iter().skip(1) {
4526            if self.should_merge_blocks(&current, next) {
4527                current = self.merge_two_blocks(current, next);
4528            } else {
4529                merged.push(current);
4530                current = next.clone();
4531            }
4532        }
4533
4534        merged.push(current);
4535        merged
4536    }
4537
4538    /// Determine if two adjacent list blocks should be merged
4539    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4540        // Basic compatibility checks
4541        if !self.blocks_are_compatible(current, next) {
4542            return false;
4543        }
4544
4545        // Check spacing and content between blocks
4546        let spacing = self.analyze_spacing_between(current, next);
4547        match spacing {
4548            BlockSpacing::Consecutive => true,
4549            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4550            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4551                self.can_merge_with_content_between(current, next)
4552            }
4553        }
4554    }
4555
4556    /// Check if blocks have compatible structure for merging
4557    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4558        current.is_ordered == next.is_ordered
4559            && current.blockquote_prefix == next.blockquote_prefix
4560            && current.nesting_level == next.nesting_level
4561    }
4562
4563    /// Analyze the spacing between two list blocks
4564    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4565        let gap = next.start_line - current.end_line;
4566
4567        match gap {
4568            1 => BlockSpacing::Consecutive,
4569            2 => BlockSpacing::SingleBlank,
4570            _ if gap > 2 => {
4571                if self.has_only_blank_lines_between(current, next) {
4572                    BlockSpacing::MultipleBlanks
4573                } else {
4574                    BlockSpacing::ContentBetween
4575                }
4576            }
4577            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4578        }
4579    }
4580
4581    /// Check if unordered lists can be merged with a single blank line between
4582    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4583        // Check if there are structural separators between the blocks
4584        // If has_meaningful_content_between returns true, it means there are structural separators
4585        if has_meaningful_content_between(self.content, current, next, self.lines) {
4586            return false; // Structural separators prevent merging
4587        }
4588
4589        // Only merge unordered lists with same marker across single blank
4590        !current.is_ordered && current.marker == next.marker
4591    }
4592
4593    /// Check if ordered lists can be merged when there's content between them
4594    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4595        // Do not merge lists if there are structural separators between them
4596        if has_meaningful_content_between(self.content, current, next, self.lines) {
4597            return false; // Structural separators prevent merging
4598        }
4599
4600        // Only consider merging ordered lists if there's no structural content between
4601        current.is_ordered && next.is_ordered
4602    }
4603
4604    /// Check if there are only blank lines between blocks
4605    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4606        for line_num in (current.end_line + 1)..next.start_line {
4607            if let Some(line_info) = self.lines.get(line_num - 1)
4608                && !line_info.content(self.content).trim().is_empty()
4609            {
4610                return false;
4611            }
4612        }
4613        true
4614    }
4615
4616    /// Merge two compatible list blocks into one
4617    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4618        current.end_line = next.end_line;
4619        current.item_lines.extend_from_slice(&next.item_lines);
4620
4621        // Update max marker width
4622        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4623
4624        // Handle marker consistency for unordered lists
4625        if !current.is_ordered && self.markers_differ(&current, next) {
4626            current.marker = None; // Mixed markers
4627        }
4628
4629        current
4630    }
4631
4632    /// Check if two blocks have different markers
4633    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4634        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4635    }
4636}
4637
4638/// Types of spacing between list blocks
4639#[derive(Debug, PartialEq)]
4640enum BlockSpacing {
4641    Consecutive,    // No gap between blocks
4642    SingleBlank,    // One blank line between blocks
4643    MultipleBlanks, // Multiple blank lines but no content
4644    ContentBetween, // Content exists between blocks
4645}
4646
4647/// Check if there's meaningful content (not just blank lines) between two list blocks
4648fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4649    // Check lines between current.end_line and next.start_line
4650    for line_num in (current.end_line + 1)..next.start_line {
4651        if let Some(line_info) = lines.get(line_num - 1) {
4652            // Convert to 0-indexed
4653            let trimmed = line_info.content(content).trim();
4654
4655            // Skip empty lines
4656            if trimmed.is_empty() {
4657                continue;
4658            }
4659
4660            // Check for structural separators that should separate lists (CommonMark compliant)
4661
4662            // Headings separate lists
4663            if line_info.heading.is_some() {
4664                return true; // Has meaningful content - headings separate lists
4665            }
4666
4667            // Horizontal rules separate lists (---, ***, ___)
4668            if is_horizontal_rule(trimmed) {
4669                return true; // Has meaningful content - horizontal rules separate lists
4670            }
4671
4672            // Tables separate lists
4673            if crate::utils::skip_context::is_table_line(trimmed) {
4674                return true; // Has meaningful content - tables separate lists
4675            }
4676
4677            // Blockquotes separate lists
4678            if trimmed.starts_with('>') {
4679                return true; // Has meaningful content - blockquotes separate lists
4680            }
4681
4682            // Code block fences separate lists (unless properly indented as list content)
4683            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4684                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4685
4686                // Check if this code block is properly indented as list continuation
4687                let min_continuation_indent = if current.is_ordered {
4688                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4689                } else {
4690                    current.nesting_level + 2
4691                };
4692
4693                if line_indent < min_continuation_indent {
4694                    // This is a standalone code block that separates lists
4695                    return true; // Has meaningful content - standalone code blocks separate lists
4696                }
4697            }
4698
4699            // Check if this line has proper indentation for list continuation
4700            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4701
4702            // Calculate minimum indentation needed to be list continuation
4703            let min_indent = if current.is_ordered {
4704                current.nesting_level + current.max_marker_width
4705            } else {
4706                current.nesting_level + 2
4707            };
4708
4709            // If the line is not indented enough to be list continuation, it's meaningful content
4710            if line_indent < min_indent {
4711                return true; // Has meaningful content - content not indented as list continuation
4712            }
4713
4714            // If we reach here, the line is properly indented as list continuation
4715            // Continue checking other lines
4716        }
4717    }
4718
4719    // Only blank lines or properly indented list continuation content between blocks
4720    false
4721}
4722
4723/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4724/// CommonMark rules for thematic breaks (horizontal rules):
4725/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4726/// - Must have 3+ of the same character (-, *, or _)
4727/// - May have spaces between characters
4728/// - No other characters allowed
4729pub fn is_horizontal_rule_line(line: &str) -> bool {
4730    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4731    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4732    if leading_spaces > 3 || line.starts_with('\t') {
4733        return false;
4734    }
4735
4736    is_horizontal_rule_content(line.trim())
4737}
4738
4739/// Check if trimmed content matches horizontal rule pattern.
4740/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4741pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4742    if trimmed.len() < 3 {
4743        return false;
4744    }
4745
4746    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4747    let chars: Vec<char> = trimmed.chars().collect();
4748    if let Some(&first_char) = chars.first()
4749        && (first_char == '-' || first_char == '*' || first_char == '_')
4750    {
4751        let mut count = 0;
4752        for &ch in &chars {
4753            if ch == first_char {
4754                count += 1;
4755            } else if ch != ' ' && ch != '\t' {
4756                return false; // Non-matching, non-whitespace character
4757            }
4758        }
4759        return count >= 3;
4760    }
4761    false
4762}
4763
4764/// Backwards-compatible alias for `is_horizontal_rule_content`
4765pub fn is_horizontal_rule(trimmed: &str) -> bool {
4766    is_horizontal_rule_content(trimmed)
4767}
4768
4769/// Check if content contains patterns that cause the markdown crate to panic
4770#[cfg(test)]
4771mod tests {
4772    use super::*;
4773
4774    #[test]
4775    fn test_empty_content() {
4776        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4777        assert_eq!(ctx.content, "");
4778        assert_eq!(ctx.line_offsets, vec![0]);
4779        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4780        assert_eq!(ctx.lines.len(), 0);
4781    }
4782
4783    #[test]
4784    fn test_single_line() {
4785        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4786        assert_eq!(ctx.content, "# Hello");
4787        assert_eq!(ctx.line_offsets, vec![0]);
4788        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4789        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4790    }
4791
4792    #[test]
4793    fn test_multi_line() {
4794        let content = "# Title\n\nSecond line\nThird line";
4795        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4796        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4797        // Test offset to line/col
4798        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4799        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4800        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4801        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4802        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4803    }
4804
4805    #[test]
4806    fn test_line_info() {
4807        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4808        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4809
4810        // Test line info
4811        assert_eq!(ctx.lines.len(), 7);
4812
4813        // Line 1: "# Title"
4814        let line1 = &ctx.lines[0];
4815        assert_eq!(line1.content(ctx.content), "# Title");
4816        assert_eq!(line1.byte_offset, 0);
4817        assert_eq!(line1.indent, 0);
4818        assert!(!line1.is_blank);
4819        assert!(!line1.in_code_block);
4820        assert!(line1.list_item.is_none());
4821
4822        // Line 2: "    indented"
4823        let line2 = &ctx.lines[1];
4824        assert_eq!(line2.content(ctx.content), "    indented");
4825        assert_eq!(line2.byte_offset, 8);
4826        assert_eq!(line2.indent, 4);
4827        assert!(!line2.is_blank);
4828
4829        // Line 3: "" (blank)
4830        let line3 = &ctx.lines[2];
4831        assert_eq!(line3.content(ctx.content), "");
4832        assert!(line3.is_blank);
4833
4834        // Test helper methods
4835        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4836        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4837        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4838        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4839    }
4840
4841    #[test]
4842    fn test_list_item_detection() {
4843        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4844        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4845
4846        // Line 1: "- Unordered item"
4847        let line1 = &ctx.lines[0];
4848        assert!(line1.list_item.is_some());
4849        let list1 = line1.list_item.as_ref().unwrap();
4850        assert_eq!(list1.marker, "-");
4851        assert!(!list1.is_ordered);
4852        assert_eq!(list1.marker_column, 0);
4853        assert_eq!(list1.content_column, 2);
4854
4855        // Line 2: "  * Nested item"
4856        let line2 = &ctx.lines[1];
4857        assert!(line2.list_item.is_some());
4858        let list2 = line2.list_item.as_ref().unwrap();
4859        assert_eq!(list2.marker, "*");
4860        assert_eq!(list2.marker_column, 2);
4861
4862        // Line 3: "1. Ordered item"
4863        let line3 = &ctx.lines[2];
4864        assert!(line3.list_item.is_some());
4865        let list3 = line3.list_item.as_ref().unwrap();
4866        assert_eq!(list3.marker, "1.");
4867        assert!(list3.is_ordered);
4868        assert_eq!(list3.number, Some(1));
4869
4870        // Line 6: "Not a list"
4871        let line6 = &ctx.lines[5];
4872        assert!(line6.list_item.is_none());
4873    }
4874
4875    #[test]
4876    fn test_offset_to_line_col_edge_cases() {
4877        let content = "a\nb\nc";
4878        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4879        // line_offsets: [0, 2, 4]
4880        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4881        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4882        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4883        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4884        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4885        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4886    }
4887
4888    #[test]
4889    fn test_mdx_esm_blocks() {
4890        let content = r##"import {Chart} from './snowfall.js'
4891export const year = 2023
4892
4893# Last year's snowfall
4894
4895In {year}, the snowfall was above average.
4896It was followed by a warm spring which caused
4897flood conditions in many of the nearby rivers.
4898
4899<Chart color="#fcb32c" year={year} />
4900"##;
4901
4902        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4903
4904        // Check that lines 1 and 2 are marked as ESM blocks
4905        assert_eq!(ctx.lines.len(), 10);
4906        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4907        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4908        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4909        assert!(
4910            !ctx.lines[3].in_esm_block,
4911            "Line 4 (heading) should NOT be in_esm_block"
4912        );
4913        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4914        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4915    }
4916
4917    #[test]
4918    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4919        let content = r#"import {Chart} from './snowfall.js'
4920export const year = 2023
4921
4922# Last year's snowfall
4923"#;
4924
4925        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4926
4927        // ESM blocks should NOT be detected in Standard flavor
4928        assert!(
4929            !ctx.lines[0].in_esm_block,
4930            "Line 1 should NOT be in_esm_block in Standard flavor"
4931        );
4932        assert!(
4933            !ctx.lines[1].in_esm_block,
4934            "Line 2 should NOT be in_esm_block in Standard flavor"
4935        );
4936    }
4937
4938    #[test]
4939    fn test_blockquote_with_indented_content() {
4940        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4941        // The content inside the blockquote may also be detected as a code block (which is correct),
4942        // but for MD046 purposes, we need to know the line is inside a blockquote.
4943        let content = r#"# Heading
4944
4945>      -S socket-path
4946>                    More text
4947"#;
4948        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4949
4950        // Line 3 (index 2) should be detected as blockquote
4951        assert!(
4952            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4953            "Line 3 should be a blockquote"
4954        );
4955        // Line 4 (index 3) should also be blockquote
4956        assert!(
4957            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4958            "Line 4 should be a blockquote"
4959        );
4960
4961        // Verify blockquote content is correctly parsed
4962        // Note: spaces_after includes the spaces between `>` and content
4963        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4964        assert_eq!(bq3.content, "-S socket-path");
4965        assert_eq!(bq3.nesting_level, 1);
4966        // 6 spaces after the `>` marker
4967        assert!(bq3.has_multiple_spaces_after_marker);
4968
4969        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4970        assert_eq!(bq4.content, "More text");
4971        assert_eq!(bq4.nesting_level, 1);
4972    }
4973
4974    #[test]
4975    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4976        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4977        let content = r#"# Title
4978
4979A footnote[^1].
4980
4981[^1]: This is the footnote content.
4982
4983[^note]: Another footnote with [link](https://example.com).
4984
4985[regular]: ./path.md "A real reference definition"
4986"#;
4987        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4988
4989        // Should only have one reference definition (the regular one)
4990        assert_eq!(
4991            ctx.reference_defs.len(),
4992            1,
4993            "Footnotes should not be parsed as reference definitions"
4994        );
4995
4996        // The only reference def should be the regular one
4997        assert_eq!(ctx.reference_defs[0].id, "regular");
4998        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4999        assert_eq!(
5000            ctx.reference_defs[0].title,
5001            Some("A real reference definition".to_string())
5002        );
5003    }
5004
5005    #[test]
5006    fn test_footnote_with_inline_link_not_misidentified() {
5007        // Regression test for issue #286: footnote containing an inline link
5008        // was incorrectly parsed as a reference definition with URL "[link](url)"
5009        let content = r#"# Title
5010
5011A footnote[^1].
5012
5013[^1]: [link](https://www.google.com).
5014"#;
5015        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5016
5017        // Should have no reference definitions
5018        assert!(
5019            ctx.reference_defs.is_empty(),
5020            "Footnote with inline link should not create a reference definition"
5021        );
5022    }
5023
5024    #[test]
5025    fn test_various_footnote_formats_excluded() {
5026        // Test various footnote ID formats are all excluded
5027        let content = r#"[^1]: Numeric footnote
5028[^note]: Named footnote
5029[^a]: Single char footnote
5030[^long-footnote-name]: Long named footnote
5031[^123abc]: Mixed alphanumeric
5032
5033[ref1]: ./file1.md
5034[ref2]: ./file2.md
5035"#;
5036        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5037
5038        // Should only have the two regular reference definitions
5039        assert_eq!(
5040            ctx.reference_defs.len(),
5041            2,
5042            "Only regular reference definitions should be parsed"
5043        );
5044
5045        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5046        assert!(ids.contains(&"ref1"));
5047        assert!(ids.contains(&"ref2"));
5048        assert!(!ids.iter().any(|id| id.starts_with('^')));
5049    }
5050
5051    // =========================================================================
5052    // Tests for has_char and char_count methods
5053    // =========================================================================
5054
5055    #[test]
5056    fn test_has_char_tracked_characters() {
5057        // Test all 12 tracked characters
5058        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5059        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5060
5061        // All tracked characters should be detected
5062        assert!(ctx.has_char('#'), "Should detect hash");
5063        assert!(ctx.has_char('*'), "Should detect asterisk");
5064        assert!(ctx.has_char('_'), "Should detect underscore");
5065        assert!(ctx.has_char('-'), "Should detect hyphen");
5066        assert!(ctx.has_char('+'), "Should detect plus");
5067        assert!(ctx.has_char('>'), "Should detect gt");
5068        assert!(ctx.has_char('|'), "Should detect pipe");
5069        assert!(ctx.has_char('['), "Should detect bracket");
5070        assert!(ctx.has_char('`'), "Should detect backtick");
5071        assert!(ctx.has_char('<'), "Should detect lt");
5072        assert!(ctx.has_char('!'), "Should detect exclamation");
5073        assert!(ctx.has_char('\n'), "Should detect newline");
5074    }
5075
5076    #[test]
5077    fn test_has_char_absent_characters() {
5078        let content = "Simple text without special chars";
5079        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5080
5081        // None of the tracked characters should be present
5082        assert!(!ctx.has_char('#'), "Should not detect hash");
5083        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5084        assert!(!ctx.has_char('_'), "Should not detect underscore");
5085        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5086        assert!(!ctx.has_char('+'), "Should not detect plus");
5087        assert!(!ctx.has_char('>'), "Should not detect gt");
5088        assert!(!ctx.has_char('|'), "Should not detect pipe");
5089        assert!(!ctx.has_char('['), "Should not detect bracket");
5090        assert!(!ctx.has_char('`'), "Should not detect backtick");
5091        assert!(!ctx.has_char('<'), "Should not detect lt");
5092        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5093        // Note: single line content has no newlines
5094        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5095    }
5096
5097    #[test]
5098    fn test_has_char_fallback_for_untracked() {
5099        let content = "Text with @mention and $dollar and %percent";
5100        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5101
5102        // Untracked characters should fall back to content.contains()
5103        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5104        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5105        assert!(ctx.has_char('%'), "Should detect % via fallback");
5106        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5107    }
5108
5109    #[test]
5110    fn test_char_count_tracked_characters() {
5111        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5112        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5113
5114        // Count each tracked character
5115        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5116        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5117        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5118        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5119        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5120        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5121        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5122        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5123        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5124        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5125        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5126        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5127    }
5128
5129    #[test]
5130    fn test_char_count_zero_for_absent() {
5131        let content = "Plain text";
5132        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5133
5134        assert_eq!(ctx.char_count('#'), 0);
5135        assert_eq!(ctx.char_count('*'), 0);
5136        assert_eq!(ctx.char_count('_'), 0);
5137        assert_eq!(ctx.char_count('\n'), 0);
5138    }
5139
5140    #[test]
5141    fn test_char_count_fallback_for_untracked() {
5142        let content = "@@@ $$ %%%";
5143        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5144
5145        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5146        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5147        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5148        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5149    }
5150
5151    #[test]
5152    fn test_char_count_empty_content() {
5153        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5154
5155        assert_eq!(ctx.char_count('#'), 0);
5156        assert_eq!(ctx.char_count('*'), 0);
5157        assert_eq!(ctx.char_count('@'), 0);
5158        assert!(!ctx.has_char('#'));
5159        assert!(!ctx.has_char('@'));
5160    }
5161
5162    // =========================================================================
5163    // Tests for is_in_html_tag method
5164    // =========================================================================
5165
5166    #[test]
5167    fn test_is_in_html_tag_simple() {
5168        let content = "<div>content</div>";
5169        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5170
5171        // Inside opening tag
5172        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5173        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5174        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5175
5176        // Outside tag (in content)
5177        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5178        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5179
5180        // Inside closing tag
5181        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5182        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5183    }
5184
5185    #[test]
5186    fn test_is_in_html_tag_self_closing() {
5187        let content = "Text <br/> more text";
5188        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5189
5190        // Before tag
5191        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5192        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5193
5194        // Inside self-closing tag
5195        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5196        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5197        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5198
5199        // After tag
5200        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5201    }
5202
5203    #[test]
5204    fn test_is_in_html_tag_with_attributes() {
5205        let content = r#"<a href="url" class="link">text</a>"#;
5206        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5207
5208        // All positions inside opening tag with attributes
5209        assert!(ctx.is_in_html_tag(0), "Start of tag");
5210        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5211        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5212        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5213
5214        // Content between tags
5215        assert!(!ctx.is_in_html_tag(27), "Start of content");
5216        assert!(!ctx.is_in_html_tag(30), "End of content");
5217
5218        // Closing tag
5219        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5220    }
5221
5222    #[test]
5223    fn test_is_in_html_tag_multiline() {
5224        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5225        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5226
5227        // Opening tag spans multiple lines
5228        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5229        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5230        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5231
5232        // After closing > of opening tag
5233        let closing_bracket_pos = content.find(">\n").unwrap();
5234        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5235    }
5236
5237    #[test]
5238    fn test_is_in_html_tag_no_tags() {
5239        let content = "Plain text without any HTML";
5240        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5241
5242        // No position should be in an HTML tag
5243        for i in 0..content.len() {
5244            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5245        }
5246    }
5247
5248    // =========================================================================
5249    // Tests for is_in_jinja_range method
5250    // =========================================================================
5251
5252    #[test]
5253    fn test_is_in_jinja_range_expression() {
5254        let content = "Hello {{ name }}!";
5255        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5256
5257        // Before Jinja
5258        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5259        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5260
5261        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5262        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5263        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5264        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5265        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5266        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5267
5268        // After Jinja
5269        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5270    }
5271
5272    #[test]
5273    fn test_is_in_jinja_range_statement() {
5274        let content = "{% if condition %}content{% endif %}";
5275        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5276
5277        // Inside opening statement
5278        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5279        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5280        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5281
5282        // Content between
5283        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5284
5285        // Inside closing statement
5286        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5287        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5288    }
5289
5290    #[test]
5291    fn test_is_in_jinja_range_multiple() {
5292        let content = "{{ a }} and {{ b }}";
5293        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5294
5295        // First Jinja expression
5296        assert!(ctx.is_in_jinja_range(0));
5297        assert!(ctx.is_in_jinja_range(3));
5298        assert!(ctx.is_in_jinja_range(6));
5299
5300        // Between expressions
5301        assert!(!ctx.is_in_jinja_range(8));
5302        assert!(!ctx.is_in_jinja_range(11));
5303
5304        // Second Jinja expression
5305        assert!(ctx.is_in_jinja_range(12));
5306        assert!(ctx.is_in_jinja_range(15));
5307        assert!(ctx.is_in_jinja_range(18));
5308    }
5309
5310    #[test]
5311    fn test_is_in_jinja_range_no_jinja() {
5312        let content = "Plain text with single braces but not Jinja";
5313        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5314
5315        // No position should be in Jinja
5316        for i in 0..content.len() {
5317            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5318        }
5319    }
5320
5321    // =========================================================================
5322    // Tests for is_in_link_title method
5323    // =========================================================================
5324
5325    #[test]
5326    fn test_is_in_link_title_with_title() {
5327        let content = r#"[ref]: https://example.com "Title text"
5328
5329Some content."#;
5330        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5331
5332        // Verify we have a reference def with title
5333        assert_eq!(ctx.reference_defs.len(), 1);
5334        let def = &ctx.reference_defs[0];
5335        assert!(def.title_byte_start.is_some());
5336        assert!(def.title_byte_end.is_some());
5337
5338        let title_start = def.title_byte_start.unwrap();
5339        let title_end = def.title_byte_end.unwrap();
5340
5341        // Before title (in URL)
5342        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5343
5344        // Inside title
5345        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5346        assert!(
5347            ctx.is_in_link_title(title_start + 5),
5348            "Middle of title should be in title"
5349        );
5350        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5351
5352        // After title
5353        assert!(
5354            !ctx.is_in_link_title(title_end),
5355            "After title end should not be in title"
5356        );
5357    }
5358
5359    #[test]
5360    fn test_is_in_link_title_without_title() {
5361        let content = "[ref]: https://example.com\n\nSome content.";
5362        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5363
5364        // Reference def without title
5365        assert_eq!(ctx.reference_defs.len(), 1);
5366        let def = &ctx.reference_defs[0];
5367        assert!(def.title_byte_start.is_none());
5368        assert!(def.title_byte_end.is_none());
5369
5370        // No position should be in a title
5371        for i in 0..content.len() {
5372            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5373        }
5374    }
5375
5376    #[test]
5377    fn test_is_in_link_title_multiple_refs() {
5378        let content = r#"[ref1]: /url1 "Title One"
5379[ref2]: /url2
5380[ref3]: /url3 "Title Three"
5381"#;
5382        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5383
5384        // Should have 3 reference defs
5385        assert_eq!(ctx.reference_defs.len(), 3);
5386
5387        // ref1 has title
5388        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5389        assert!(ref1.title_byte_start.is_some());
5390
5391        // ref2 has no title
5392        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5393        assert!(ref2.title_byte_start.is_none());
5394
5395        // ref3 has title
5396        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5397        assert!(ref3.title_byte_start.is_some());
5398
5399        // Check positions in ref1's title
5400        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5401            assert!(ctx.is_in_link_title(start + 1));
5402            assert!(!ctx.is_in_link_title(end + 5));
5403        }
5404
5405        // Check positions in ref3's title
5406        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5407            assert!(ctx.is_in_link_title(start + 1));
5408        }
5409    }
5410
5411    #[test]
5412    fn test_is_in_link_title_single_quotes() {
5413        let content = "[ref]: /url 'Single quoted title'\n";
5414        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5415
5416        assert_eq!(ctx.reference_defs.len(), 1);
5417        let def = &ctx.reference_defs[0];
5418
5419        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5420            assert!(ctx.is_in_link_title(start));
5421            assert!(ctx.is_in_link_title(start + 5));
5422            assert!(!ctx.is_in_link_title(end));
5423        }
5424    }
5425
5426    #[test]
5427    fn test_is_in_link_title_parentheses() {
5428        // Note: The reference def parser may not support parenthesized titles
5429        // This test verifies the is_in_link_title method works when titles exist
5430        let content = "[ref]: /url (Parenthesized title)\n";
5431        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5432
5433        // Parser behavior: may or may not parse parenthesized titles
5434        // We test that is_in_link_title correctly reflects whatever was parsed
5435        if ctx.reference_defs.is_empty() {
5436            // Parser didn't recognize this as a reference def
5437            for i in 0..content.len() {
5438                assert!(!ctx.is_in_link_title(i));
5439            }
5440        } else {
5441            let def = &ctx.reference_defs[0];
5442            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5443                assert!(ctx.is_in_link_title(start));
5444                assert!(ctx.is_in_link_title(start + 5));
5445                assert!(!ctx.is_in_link_title(end));
5446            } else {
5447                // Title wasn't parsed, so no position should be in title
5448                for i in 0..content.len() {
5449                    assert!(!ctx.is_in_link_title(i));
5450                }
5451            }
5452        }
5453    }
5454
5455    #[test]
5456    fn test_is_in_link_title_no_refs() {
5457        let content = "Just plain text without any reference definitions.";
5458        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5459
5460        assert!(ctx.reference_defs.is_empty());
5461
5462        for i in 0..content.len() {
5463            assert!(!ctx.is_in_link_title(i));
5464        }
5465    }
5466
5467    // =========================================================================
5468    // Math span tests (Issue #289)
5469    // =========================================================================
5470
5471    #[test]
5472    fn test_math_spans_inline() {
5473        let content = "Text with inline math $[f](x)$ in it.";
5474        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5475
5476        let math_spans = ctx.math_spans();
5477        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5478
5479        let span = &math_spans[0];
5480        assert!(!span.is_display, "Should be inline math, not display");
5481        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5482    }
5483
5484    #[test]
5485    fn test_math_spans_display_single_line() {
5486        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5487        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5488
5489        let math_spans = ctx.math_spans();
5490        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5491
5492        let span = &math_spans[0];
5493        assert!(span.is_display, "Should be display math");
5494        assert!(
5495            span.content.contains("[x](\\zeta)"),
5496            "Content should contain the link-like pattern"
5497        );
5498    }
5499
5500    #[test]
5501    fn test_math_spans_display_multiline() {
5502        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5503        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5504
5505        let math_spans = ctx.math_spans();
5506        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5507
5508        let span = &math_spans[0];
5509        assert!(span.is_display, "Should be display math");
5510    }
5511
5512    #[test]
5513    fn test_is_in_math_span() {
5514        let content = "Text $[f](x)$ more text";
5515        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5516
5517        // Position inside the math span
5518        let math_start = content.find('$').unwrap();
5519        let math_end = content.rfind('$').unwrap() + 1;
5520
5521        assert!(
5522            ctx.is_in_math_span(math_start + 1),
5523            "Position inside math span should return true"
5524        );
5525        assert!(
5526            ctx.is_in_math_span(math_start + 3),
5527            "Position inside math span should return true"
5528        );
5529
5530        // Position outside the math span
5531        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5532        assert!(
5533            !ctx.is_in_math_span(math_end + 1),
5534            "Position after math span should return false"
5535        );
5536    }
5537
5538    #[test]
5539    fn test_math_spans_mixed_with_code() {
5540        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5541        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5542
5543        let math_spans = ctx.math_spans();
5544        let code_spans = ctx.code_spans();
5545
5546        assert_eq!(math_spans.len(), 1, "Should have one math span");
5547        assert_eq!(code_spans.len(), 1, "Should have one code span");
5548
5549        // Verify math span content
5550        assert_eq!(math_spans[0].content, "[f](x)");
5551        // Verify code span content
5552        assert_eq!(code_spans[0].content, "[g](y)");
5553    }
5554
5555    #[test]
5556    fn test_math_spans_no_math() {
5557        let content = "Regular text without any math at all.";
5558        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5559
5560        let math_spans = ctx.math_spans();
5561        assert!(math_spans.is_empty(), "Should have no math spans");
5562    }
5563
5564    #[test]
5565    fn test_math_spans_multiple() {
5566        let content = "First $a$ and second $b$ and display $$c$$";
5567        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5568
5569        let math_spans = ctx.math_spans();
5570        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5571
5572        // Two inline, one display
5573        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5574        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5575
5576        assert_eq!(inline_count, 2, "Should have two inline math spans");
5577        assert_eq!(display_count, 1, "Should have one display math span");
5578    }
5579
5580    #[test]
5581    fn test_is_in_math_span_boundary_positions() {
5582        // Test exact boundary positions: $[f](x)$
5583        // Byte positions:                0123456789
5584        let content = "$[f](x)$";
5585        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5586
5587        let math_spans = ctx.math_spans();
5588        assert_eq!(math_spans.len(), 1, "Should have one math span");
5589
5590        let span = &math_spans[0];
5591
5592        // Position at opening $ should be in span (byte 0)
5593        assert!(
5594            ctx.is_in_math_span(span.byte_offset),
5595            "Start position should be in span"
5596        );
5597
5598        // Position just inside should be in span
5599        assert!(
5600            ctx.is_in_math_span(span.byte_offset + 1),
5601            "Position after start should be in span"
5602        );
5603
5604        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5605        assert!(
5606            ctx.is_in_math_span(span.byte_end - 1),
5607            "Position at end-1 should be in span"
5608        );
5609
5610        // Position at byte_end should NOT be in span (exclusive end)
5611        assert!(
5612            !ctx.is_in_math_span(span.byte_end),
5613            "Position at byte_end should NOT be in span (exclusive)"
5614        );
5615    }
5616
5617    #[test]
5618    fn test_math_spans_at_document_start() {
5619        let content = "$x$ text";
5620        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5621
5622        let math_spans = ctx.math_spans();
5623        assert_eq!(math_spans.len(), 1);
5624        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5625    }
5626
5627    #[test]
5628    fn test_math_spans_at_document_end() {
5629        let content = "text $x$";
5630        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5631
5632        let math_spans = ctx.math_spans();
5633        assert_eq!(math_spans.len(), 1);
5634        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5635    }
5636
5637    #[test]
5638    fn test_math_spans_consecutive() {
5639        let content = "$a$$b$";
5640        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5641
5642        let math_spans = ctx.math_spans();
5643        // pulldown-cmark should parse these as separate spans
5644        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5645
5646        // All positions should be in some math span
5647        for i in 0..content.len() {
5648            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5649        }
5650    }
5651
5652    #[test]
5653    fn test_math_spans_currency_not_math() {
5654        // Unbalanced $ should not create math spans
5655        let content = "Price is $100";
5656        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5657
5658        let math_spans = ctx.math_spans();
5659        // pulldown-cmark requires balanced delimiters for math
5660        // $100 alone is not math
5661        assert!(
5662            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5663            "Unbalanced $ should not create math span containing 100"
5664        );
5665    }
5666
5667    // =========================================================================
5668    // Tests for O(1) reference definition lookups via HashMap
5669    // =========================================================================
5670
5671    #[test]
5672    fn test_reference_lookup_o1_basic() {
5673        let content = r#"[ref1]: /url1
5674[REF2]: /url2 "Title"
5675[Ref3]: /url3
5676
5677Use [link][ref1] and [link][REF2]."#;
5678        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5679
5680        // Verify we have 3 reference defs
5681        assert_eq!(ctx.reference_defs.len(), 3);
5682
5683        // Test get_reference_url with various cases
5684        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5685        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5686        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5687        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5688        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5689        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5690        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5691    }
5692
5693    #[test]
5694    fn test_reference_lookup_o1_get_reference_def() {
5695        let content = r#"[myref]: https://example.com "My Title"
5696"#;
5697        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5698
5699        // Test get_reference_def
5700        let def = ctx.get_reference_def("myref").expect("Should find myref");
5701        assert_eq!(def.url, "https://example.com");
5702        assert_eq!(def.title.as_deref(), Some("My Title"));
5703
5704        // Case insensitive
5705        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5706        assert_eq!(def2.url, "https://example.com");
5707
5708        // Non-existent
5709        assert!(ctx.get_reference_def("nonexistent").is_none());
5710    }
5711
5712    #[test]
5713    fn test_reference_lookup_o1_has_reference_def() {
5714        let content = r#"[foo]: /foo
5715[BAR]: /bar
5716"#;
5717        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5718
5719        // Test has_reference_def
5720        assert!(ctx.has_reference_def("foo"));
5721        assert!(ctx.has_reference_def("FOO")); // case insensitive
5722        assert!(ctx.has_reference_def("bar"));
5723        assert!(ctx.has_reference_def("Bar")); // case insensitive
5724        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5725    }
5726
5727    #[test]
5728    fn test_reference_lookup_o1_empty_content() {
5729        let content = "No references here.";
5730        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5731
5732        assert!(ctx.reference_defs.is_empty());
5733        assert_eq!(ctx.get_reference_url("anything"), None);
5734        assert!(ctx.get_reference_def("anything").is_none());
5735        assert!(!ctx.has_reference_def("anything"));
5736    }
5737
5738    #[test]
5739    fn test_reference_lookup_o1_special_characters_in_id() {
5740        let content = r#"[ref-with-dash]: /url1
5741[ref_with_underscore]: /url2
5742[ref.with.dots]: /url3
5743"#;
5744        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5745
5746        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5747        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5748        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5749    }
5750
5751    #[test]
5752    fn test_reference_lookup_o1_unicode_id() {
5753        let content = r#"[日本語]: /japanese
5754[émoji]: /emoji
5755"#;
5756        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5757
5758        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5759        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5760        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5761    }
5762}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs