rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12/// Macro for profiling sections - only active in non-WASM builds
13#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15    ($name:expr, $profile:expr, $code:expr) => {{
16        let start = std::time::Instant::now();
17        let result = $code;
18        if $profile {
19            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20        }
21        result
22    }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30// Comprehensive link pattern that captures both inline and reference links
31// Use (?s) flag to make . match newlines
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        r#"(?sx)
35        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36        (?:
37            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
38            |
39            \[([^\]]*)\]      # Reference ID in group 6
40        )"#
41    ).unwrap()
42});
43
44// Image pattern (similar to links but with ! prefix)
45// Use (?s) flag to make . match newlines
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(
48        r#"(?sx)
49        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50        (?:
51            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
52            |
53            \[([^\]]*)\]      # Reference ID in group 6
54        )"#
55    ).unwrap()
56});
57
58// Reference definition pattern
59static REF_DEF_PATTERN: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62// Pattern for bare URLs - uses centralized URL pattern from regex_cache
63
64// Pattern for email addresses
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68// Pattern for blockquote prefix in parse_list_blocks
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71/// Pre-computed information about a line
72#[derive(Debug, Clone)]
73pub struct LineInfo {
74    /// Byte offset where this line starts in the document
75    pub byte_offset: usize,
76    /// Length of the line in bytes (without newline)
77    pub byte_len: usize,
78    /// Number of bytes of leading whitespace (for substring extraction)
79    pub indent: usize,
80    /// Visual column width of leading whitespace (with proper tab expansion)
81    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
82    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
83    pub visual_indent: usize,
84    /// Whether the line is blank (empty or only whitespace)
85    pub is_blank: bool,
86    /// Whether this line is inside a code block
87    pub in_code_block: bool,
88    /// Whether this line is inside front matter
89    pub in_front_matter: bool,
90    /// Whether this line is inside an HTML block
91    pub in_html_block: bool,
92    /// Whether this line is inside an HTML comment
93    pub in_html_comment: bool,
94    /// List item information if this line starts a list item
95    pub list_item: Option<ListItemInfo>,
96    /// Heading information if this line is a heading
97    pub heading: Option<HeadingInfo>,
98    /// Blockquote information if this line is a blockquote
99    pub blockquote: Option<BlockquoteInfo>,
100    /// Whether this line is inside a mkdocstrings autodoc block
101    pub in_mkdocstrings: bool,
102    /// Whether this line is part of an ESM import/export block (MDX only)
103    pub in_esm_block: bool,
104    /// Whether this line is a continuation of a multi-line code span from a previous line
105    pub in_code_span_continuation: bool,
106    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
107    /// Pre-computed for consistent detection across all rules
108    pub is_horizontal_rule: bool,
109    /// Whether this line is inside a math block ($$ ... $$)
110    pub in_math_block: bool,
111}
112
113impl LineInfo {
114    /// Get the line content as a string slice from the source document
115    pub fn content<'a>(&self, source: &'a str) -> &'a str {
116        &source[self.byte_offset..self.byte_offset + self.byte_len]
117    }
118}
119
120/// Information about a list item
121#[derive(Debug, Clone)]
122pub struct ListItemInfo {
123    /// The marker used (*, -, +, or number with . or ))
124    pub marker: String,
125    /// Whether it's ordered (true) or unordered (false)
126    pub is_ordered: bool,
127    /// The number for ordered lists
128    pub number: Option<usize>,
129    /// Column where the marker starts (0-based)
130    pub marker_column: usize,
131    /// Column where content after marker starts
132    pub content_column: usize,
133}
134
135/// Heading style type
136#[derive(Debug, Clone, PartialEq)]
137pub enum HeadingStyle {
138    /// ATX style heading (# Heading)
139    ATX,
140    /// Setext style heading with = underline
141    Setext1,
142    /// Setext style heading with - underline
143    Setext2,
144}
145
146/// Parsed link information
147#[derive(Debug, Clone)]
148pub struct ParsedLink<'a> {
149    /// Line number (1-indexed)
150    pub line: usize,
151    /// Start column (0-indexed) in the line
152    pub start_col: usize,
153    /// End column (0-indexed) in the line
154    pub end_col: usize,
155    /// Byte offset in document
156    pub byte_offset: usize,
157    /// End byte offset in document
158    pub byte_end: usize,
159    /// Link text
160    pub text: Cow<'a, str>,
161    /// Link URL or reference
162    pub url: Cow<'a, str>,
163    /// Whether this is a reference link [text][ref] vs inline [text](url)
164    pub is_reference: bool,
165    /// Reference ID for reference links
166    pub reference_id: Option<Cow<'a, str>>,
167    /// Link type from pulldown-cmark
168    pub link_type: LinkType,
169}
170
171/// Information about a broken link reported by pulldown-cmark
172#[derive(Debug, Clone)]
173pub struct BrokenLinkInfo {
174    /// The reference text that couldn't be resolved
175    pub reference: String,
176    /// Byte span in the source document
177    pub span: std::ops::Range<usize>,
178}
179
180/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
181#[derive(Debug, Clone)]
182pub struct FootnoteRef {
183    /// The footnote ID (without the ^ prefix)
184    pub id: String,
185    /// Line number (1-indexed)
186    pub line: usize,
187    /// Start byte offset in document
188    pub byte_offset: usize,
189    /// End byte offset in document
190    pub byte_end: usize,
191}
192
193/// Parsed image information
194#[derive(Debug, Clone)]
195pub struct ParsedImage<'a> {
196    /// Line number (1-indexed)
197    pub line: usize,
198    /// Start column (0-indexed) in the line
199    pub start_col: usize,
200    /// End column (0-indexed) in the line
201    pub end_col: usize,
202    /// Byte offset in document
203    pub byte_offset: usize,
204    /// End byte offset in document
205    pub byte_end: usize,
206    /// Alt text
207    pub alt_text: Cow<'a, str>,
208    /// Image URL or reference
209    pub url: Cow<'a, str>,
210    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
211    pub is_reference: bool,
212    /// Reference ID for reference images
213    pub reference_id: Option<Cow<'a, str>>,
214    /// Link type from pulldown-cmark
215    pub link_type: LinkType,
216}
217
218/// Reference definition [ref]: url "title"
219#[derive(Debug, Clone)]
220pub struct ReferenceDef {
221    /// Line number (1-indexed)
222    pub line: usize,
223    /// Reference ID (normalized to lowercase)
224    pub id: String,
225    /// URL
226    pub url: String,
227    /// Optional title
228    pub title: Option<String>,
229    /// Byte offset where the reference definition starts
230    pub byte_offset: usize,
231    /// Byte offset where the reference definition ends
232    pub byte_end: usize,
233    /// Byte offset where the title starts (if present, includes quote)
234    pub title_byte_start: Option<usize>,
235    /// Byte offset where the title ends (if present, includes quote)
236    pub title_byte_end: Option<usize>,
237}
238
239/// Parsed code span information
240#[derive(Debug, Clone)]
241pub struct CodeSpan {
242    /// Line number where the code span starts (1-indexed)
243    pub line: usize,
244    /// Line number where the code span ends (1-indexed)
245    pub end_line: usize,
246    /// Start column (0-indexed) in the line
247    pub start_col: usize,
248    /// End column (0-indexed) in the line
249    pub end_col: usize,
250    /// Byte offset in document
251    pub byte_offset: usize,
252    /// End byte offset in document
253    pub byte_end: usize,
254    /// Number of backticks used (1, 2, 3, etc.)
255    pub backtick_count: usize,
256    /// Content inside the code span (without backticks)
257    pub content: String,
258}
259
260/// Parsed math span information (inline $...$ or display $$...$$)
261#[derive(Debug, Clone)]
262pub struct MathSpan {
263    /// Line number where the math span starts (1-indexed)
264    pub line: usize,
265    /// Line number where the math span ends (1-indexed)
266    pub end_line: usize,
267    /// Start column (0-indexed) in the line
268    pub start_col: usize,
269    /// End column (0-indexed) in the line
270    pub end_col: usize,
271    /// Byte offset in document
272    pub byte_offset: usize,
273    /// End byte offset in document
274    pub byte_end: usize,
275    /// Whether this is display math ($$...$$) vs inline ($...$)
276    pub is_display: bool,
277    /// Content inside the math delimiters
278    pub content: String,
279}
280
281/// Information about a heading
282#[derive(Debug, Clone)]
283pub struct HeadingInfo {
284    /// Heading level (1-6 for ATX, 1-2 for Setext)
285    pub level: u8,
286    /// Style of heading
287    pub style: HeadingStyle,
288    /// The heading marker (# characters or underline)
289    pub marker: String,
290    /// Column where the marker starts (0-based)
291    pub marker_column: usize,
292    /// Column where heading text starts
293    pub content_column: usize,
294    /// The heading text (without markers and without custom ID syntax)
295    pub text: String,
296    /// Custom header ID if present (e.g., from {#custom-id} syntax)
297    pub custom_id: Option<String>,
298    /// Original heading text including custom ID syntax
299    pub raw_text: String,
300    /// Whether it has a closing sequence (for ATX)
301    pub has_closing_sequence: bool,
302    /// The closing sequence if present
303    pub closing_sequence: String,
304    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
305    /// False for malformed headings like `#NoSpace` that MD018 should flag
306    pub is_valid: bool,
307}
308
309/// A valid heading from a filtered iteration
310///
311/// Only includes headings that are CommonMark-compliant (have space after #).
312/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
313#[derive(Debug, Clone)]
314pub struct ValidHeading<'a> {
315    /// The 1-indexed line number in the document
316    pub line_num: usize,
317    /// Reference to the heading information
318    pub heading: &'a HeadingInfo,
319    /// Reference to the full line info (for rules that need additional context)
320    pub line_info: &'a LineInfo,
321}
322
323/// Iterator over valid CommonMark headings in a document
324///
325/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
326/// but should not be processed by other heading rules.
327pub struct ValidHeadingsIter<'a> {
328    lines: &'a [LineInfo],
329    current_index: usize,
330}
331
332impl<'a> ValidHeadingsIter<'a> {
333    fn new(lines: &'a [LineInfo]) -> Self {
334        Self {
335            lines,
336            current_index: 0,
337        }
338    }
339}
340
341impl<'a> Iterator for ValidHeadingsIter<'a> {
342    type Item = ValidHeading<'a>;
343
344    fn next(&mut self) -> Option<Self::Item> {
345        while self.current_index < self.lines.len() {
346            let idx = self.current_index;
347            self.current_index += 1;
348
349            let line_info = &self.lines[idx];
350            if let Some(heading) = &line_info.heading
351                && heading.is_valid
352            {
353                return Some(ValidHeading {
354                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
355                    heading,
356                    line_info,
357                });
358            }
359        }
360        None
361    }
362}
363
364/// Information about a blockquote line
365#[derive(Debug, Clone)]
366pub struct BlockquoteInfo {
367    /// Nesting level (1 for >, 2 for >>, etc.)
368    pub nesting_level: usize,
369    /// The indentation before the blockquote marker
370    pub indent: String,
371    /// Column where the first > starts (0-based)
372    pub marker_column: usize,
373    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
374    pub prefix: String,
375    /// Content after the blockquote marker(s)
376    pub content: String,
377    /// Whether the line has no space after the marker
378    pub has_no_space_after_marker: bool,
379    /// Whether the line has multiple spaces after the marker
380    pub has_multiple_spaces_after_marker: bool,
381    /// Whether this is an empty blockquote line needing MD028 fix
382    pub needs_md028_fix: bool,
383}
384
385/// Information about a list block
386#[derive(Debug, Clone)]
387pub struct ListBlock {
388    /// Line number where the list starts (1-indexed)
389    pub start_line: usize,
390    /// Line number where the list ends (1-indexed)
391    pub end_line: usize,
392    /// Whether it's ordered or unordered
393    pub is_ordered: bool,
394    /// The consistent marker for unordered lists (if any)
395    pub marker: Option<String>,
396    /// Blockquote prefix for this list (empty if not in blockquote)
397    pub blockquote_prefix: String,
398    /// Lines that are list items within this block
399    pub item_lines: Vec<usize>,
400    /// Nesting level (0 for top-level lists)
401    pub nesting_level: usize,
402    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
403    pub max_marker_width: usize,
404}
405
406use std::sync::{Arc, OnceLock};
407
408/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
409type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
410
411/// Character frequency data for fast content analysis
412#[derive(Debug, Clone, Default)]
413pub struct CharFrequency {
414    /// Count of # characters (headings)
415    pub hash_count: usize,
416    /// Count of * characters (emphasis, lists, horizontal rules)
417    pub asterisk_count: usize,
418    /// Count of _ characters (emphasis, horizontal rules)
419    pub underscore_count: usize,
420    /// Count of - characters (lists, horizontal rules, setext headings)
421    pub hyphen_count: usize,
422    /// Count of + characters (lists)
423    pub plus_count: usize,
424    /// Count of > characters (blockquotes)
425    pub gt_count: usize,
426    /// Count of | characters (tables)
427    pub pipe_count: usize,
428    /// Count of [ characters (links, images)
429    pub bracket_count: usize,
430    /// Count of ` characters (code spans, code blocks)
431    pub backtick_count: usize,
432    /// Count of < characters (HTML tags, autolinks)
433    pub lt_count: usize,
434    /// Count of ! characters (images)
435    pub exclamation_count: usize,
436    /// Count of newline characters
437    pub newline_count: usize,
438}
439
440/// Pre-parsed HTML tag information
441#[derive(Debug, Clone)]
442pub struct HtmlTag {
443    /// Line number (1-indexed)
444    pub line: usize,
445    /// Start column (0-indexed) in the line
446    pub start_col: usize,
447    /// End column (0-indexed) in the line
448    pub end_col: usize,
449    /// Byte offset in document
450    pub byte_offset: usize,
451    /// End byte offset in document
452    pub byte_end: usize,
453    /// Tag name (e.g., "div", "img", "br")
454    pub tag_name: String,
455    /// Whether it's a closing tag (`</tag>`)
456    pub is_closing: bool,
457    /// Whether it's self-closing (`<tag />`)
458    pub is_self_closing: bool,
459    /// Raw tag content
460    pub raw_content: String,
461}
462
463/// Pre-parsed emphasis span information
464#[derive(Debug, Clone)]
465pub struct EmphasisSpan {
466    /// Line number (1-indexed)
467    pub line: usize,
468    /// Start column (0-indexed) in the line
469    pub start_col: usize,
470    /// End column (0-indexed) in the line
471    pub end_col: usize,
472    /// Byte offset in document
473    pub byte_offset: usize,
474    /// End byte offset in document
475    pub byte_end: usize,
476    /// Type of emphasis ('*' or '_')
477    pub marker: char,
478    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
479    pub marker_count: usize,
480    /// Content inside the emphasis
481    pub content: String,
482}
483
484/// Pre-parsed table row information
485#[derive(Debug, Clone)]
486pub struct TableRow {
487    /// Line number (1-indexed)
488    pub line: usize,
489    /// Whether this is a separator row (contains only |, -, :, and spaces)
490    pub is_separator: bool,
491    /// Number of columns (pipe-separated cells)
492    pub column_count: usize,
493    /// Alignment info from separator row
494    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
495}
496
497/// Pre-parsed bare URL information (not in links)
498#[derive(Debug, Clone)]
499pub struct BareUrl {
500    /// Line number (1-indexed)
501    pub line: usize,
502    /// Start column (0-indexed) in the line
503    pub start_col: usize,
504    /// End column (0-indexed) in the line
505    pub end_col: usize,
506    /// Byte offset in document
507    pub byte_offset: usize,
508    /// End byte offset in document
509    pub byte_end: usize,
510    /// The URL string
511    pub url: String,
512    /// Type of URL ("http", "https", "ftp", "email")
513    pub url_type: String,
514}
515
516pub struct LintContext<'a> {
517    pub content: &'a str,
518    pub line_offsets: Vec<usize>,
519    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
520    pub lines: Vec<LineInfo>,             // Pre-computed line information
521    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
522    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
523    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
524    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
525    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
526    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
527    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
528    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
529    pub char_frequency: CharFrequency,    // Character frequency analysis
530    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
531    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
532    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
533    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
534    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
535    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
536    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
537    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
538    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
539    pub flavor: MarkdownFlavor,           // Markdown flavor being used
540    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
541}
542
543/// Detailed blockquote parse result with all components
544struct BlockquoteComponents<'a> {
545    indent: &'a str,
546    markers: &'a str,
547    spaces_after: &'a str,
548    content: &'a str,
549}
550
551/// Parse blockquote prefix with detailed components using manual parsing
552#[inline]
553fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
554    let bytes = line.as_bytes();
555    let mut pos = 0;
556
557    // Parse leading whitespace (indent)
558    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
559        pos += 1;
560    }
561    let indent_end = pos;
562
563    // Must have at least one '>' marker
564    if pos >= bytes.len() || bytes[pos] != b'>' {
565        return None;
566    }
567
568    // Parse '>' markers
569    while pos < bytes.len() && bytes[pos] == b'>' {
570        pos += 1;
571    }
572    let markers_end = pos;
573
574    // Parse spaces after markers
575    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
576        pos += 1;
577    }
578    let spaces_end = pos;
579
580    Some(BlockquoteComponents {
581        indent: &line[0..indent_end],
582        markers: &line[indent_end..markers_end],
583        spaces_after: &line[markers_end..spaces_end],
584        content: &line[spaces_end..],
585    })
586}
587
588impl<'a> LintContext<'a> {
589    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
590        #[cfg(not(target_arch = "wasm32"))]
591        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
592        #[cfg(target_arch = "wasm32")]
593        let profile = false;
594
595        let line_offsets = profile_section!("Line offsets", profile, {
596            let mut offsets = vec![0];
597            for (i, c) in content.char_indices() {
598                if c == '\n' {
599                    offsets.push(i + 1);
600                }
601            }
602            offsets
603        });
604
605        // Detect code blocks once and cache them
606        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
607
608        // Pre-compute HTML comment ranges ONCE for all operations
609        let html_comment_ranges = profile_section!(
610            "HTML comment ranges",
611            profile,
612            crate::utils::skip_context::compute_html_comment_ranges(content)
613        );
614
615        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
616        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
617            if flavor == MarkdownFlavor::MkDocs {
618                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
619            } else {
620                Vec::new()
621            }
622        });
623
624        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
625        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
626        let (mut lines, emphasis_spans) = profile_section!(
627            "Basic line info",
628            profile,
629            Self::compute_basic_line_info(
630                content,
631                &line_offsets,
632                &code_blocks,
633                flavor,
634                &html_comment_ranges,
635                &autodoc_ranges,
636            )
637        );
638
639        // Detect HTML blocks BEFORE heading detection
640        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
641
642        // Detect ESM import/export blocks in MDX files BEFORE heading detection
643        profile_section!(
644            "ESM blocks",
645            profile,
646            Self::detect_esm_blocks(content, &mut lines, flavor)
647        );
648
649        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
650        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
651
652        // Now detect headings and blockquotes
653        profile_section!(
654            "Headings & blockquotes",
655            profile,
656            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
657        );
658
659        // Parse code spans early so we can exclude them from link/image parsing
660        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
661
662        // Mark lines that are continuations of multi-line code spans
663        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
664        for span in &code_spans {
665            if span.end_line > span.line {
666                // Mark lines after the first line as continuations
667                for line_num in (span.line + 1)..=span.end_line {
668                    if let Some(line_info) = lines.get_mut(line_num - 1) {
669                        line_info.in_code_span_continuation = true;
670                    }
671                }
672            }
673        }
674
675        // Parse links, images, references, and list blocks
676        let (links, broken_links, footnote_refs) = profile_section!(
677            "Links",
678            profile,
679            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
680        );
681
682        let images = profile_section!(
683            "Images",
684            profile,
685            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
686        );
687
688        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
689
690        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
691
692        // Compute character frequency for fast content analysis
693        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
694
695        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
696        let table_blocks = profile_section!(
697            "Table blocks",
698            profile,
699            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
700                content,
701                &code_blocks,
702                &code_spans,
703                &html_comment_ranges,
704            )
705        );
706
707        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
708        let line_index = profile_section!(
709            "Line index",
710            profile,
711            crate::utils::range_utils::LineIndex::new(content)
712        );
713
714        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
715        let jinja_ranges = profile_section!(
716            "Jinja ranges",
717            profile,
718            crate::utils::jinja_utils::find_jinja_ranges(content)
719        );
720
721        Self {
722            content,
723            line_offsets,
724            code_blocks,
725            lines,
726            links,
727            images,
728            broken_links,
729            footnote_refs,
730            reference_defs,
731            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
732            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
733            list_blocks,
734            char_frequency,
735            html_tags_cache: OnceLock::new(),
736            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
737            table_rows_cache: OnceLock::new(),
738            bare_urls_cache: OnceLock::new(),
739            has_mixed_list_nesting_cache: OnceLock::new(),
740            html_comment_ranges,
741            table_blocks,
742            line_index,
743            jinja_ranges,
744            flavor,
745            source_file,
746        }
747    }
748
749    /// Get code spans - computed lazily on first access
750    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
751        Arc::clone(
752            self.code_spans_cache
753                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
754        )
755    }
756
757    /// Get math spans - computed lazily on first access
758    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
759        Arc::clone(
760            self.math_spans_cache
761                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
762        )
763    }
764
765    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
766    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
767        let math_spans = self.math_spans();
768        math_spans
769            .iter()
770            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
771    }
772
773    /// Get HTML comment ranges - pre-computed during LintContext construction
774    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
775        &self.html_comment_ranges
776    }
777
778    /// Get HTML tags - computed lazily on first access
779    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
780        Arc::clone(self.html_tags_cache.get_or_init(|| {
781            Arc::new(Self::parse_html_tags(
782                self.content,
783                &self.lines,
784                &self.code_blocks,
785                self.flavor,
786            ))
787        }))
788    }
789
790    /// Get emphasis spans - pre-computed during construction
791    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
792        Arc::clone(
793            self.emphasis_spans_cache
794                .get()
795                .expect("emphasis_spans_cache initialized during construction"),
796        )
797    }
798
799    /// Get table rows - computed lazily on first access
800    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
801        Arc::clone(
802            self.table_rows_cache
803                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
804        )
805    }
806
807    /// Get bare URLs - computed lazily on first access
808    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
809        Arc::clone(
810            self.bare_urls_cache
811                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
812        )
813    }
814
815    /// Check if document has mixed ordered/unordered list nesting.
816    /// Result is cached after first computation (document-level invariant).
817    /// This is used by MD007 for smart style auto-detection.
818    pub fn has_mixed_list_nesting(&self) -> bool {
819        *self
820            .has_mixed_list_nesting_cache
821            .get_or_init(|| self.compute_mixed_list_nesting())
822    }
823
824    /// Internal computation for mixed list nesting (only called once per LintContext).
825    fn compute_mixed_list_nesting(&self) -> bool {
826        // Track parent list items by their marker position and type
827        // Using marker_column instead of indent because it works correctly
828        // for blockquoted content where indent doesn't account for the prefix
829        // Stack stores: (marker_column, is_ordered)
830        let mut stack: Vec<(usize, bool)> = Vec::new();
831        let mut last_was_blank = false;
832
833        for line_info in &self.lines {
834            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
835            if line_info.in_code_block
836                || line_info.in_front_matter
837                || line_info.in_mkdocstrings
838                || line_info.in_html_comment
839                || line_info.in_esm_block
840            {
841                continue;
842            }
843
844            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
845            if line_info.is_blank {
846                last_was_blank = true;
847                continue;
848            }
849
850            if let Some(list_item) = &line_info.list_item {
851                // Normalize column 1 to column 0 (consistent with MD007 check function)
852                let current_pos = if list_item.marker_column == 1 {
853                    0
854                } else {
855                    list_item.marker_column
856                };
857
858                // If there was a blank line and this item is at root level, reset stack
859                if last_was_blank && current_pos == 0 {
860                    stack.clear();
861                }
862                last_was_blank = false;
863
864                // Pop items at same or greater position (they're siblings or deeper, not parents)
865                while let Some(&(pos, _)) = stack.last() {
866                    if pos >= current_pos {
867                        stack.pop();
868                    } else {
869                        break;
870                    }
871                }
872
873                // Check if immediate parent has different type - this is mixed nesting
874                if let Some(&(_, parent_is_ordered)) = stack.last()
875                    && parent_is_ordered != list_item.is_ordered
876                {
877                    return true; // Found mixed nesting - early exit
878                }
879
880                stack.push((current_pos, list_item.is_ordered));
881            } else {
882                // Non-list line (but not blank) - could be paragraph or other content
883                last_was_blank = false;
884            }
885        }
886
887        false
888    }
889
890    /// Map a byte offset to (line, column)
891    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
892        match self.line_offsets.binary_search(&offset) {
893            Ok(line) => (line + 1, 1),
894            Err(line) => {
895                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
896                (line, offset - line_start + 1)
897            }
898        }
899    }
900
901    /// Check if a position is within a code block or code span
902    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
903        // Check code blocks first
904        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
905            return true;
906        }
907
908        // Check inline code spans (lazy load if needed)
909        self.code_spans()
910            .iter()
911            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
912    }
913
914    /// Get line information by line number (1-indexed)
915    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
916        if line_num > 0 {
917            self.lines.get(line_num - 1)
918        } else {
919            None
920        }
921    }
922
923    /// Get byte offset for a line number (1-indexed)
924    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
925        self.line_info(line_num).map(|info| info.byte_offset)
926    }
927
928    /// Get URL for a reference link/image by its ID
929    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
930        let normalized_id = ref_id.to_lowercase();
931        self.reference_defs
932            .iter()
933            .find(|def| def.id == normalized_id)
934            .map(|def| def.url.as_str())
935    }
936
937    /// Check if a line is part of a list block
938    pub fn is_in_list_block(&self, line_num: usize) -> bool {
939        self.list_blocks
940            .iter()
941            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
942    }
943
944    /// Get the list block containing a specific line
945    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
946        self.list_blocks
947            .iter()
948            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
949    }
950
951    // Compatibility methods for DocumentStructure migration
952
953    /// Check if a line is within a code block
954    pub fn is_in_code_block(&self, line_num: usize) -> bool {
955        if line_num == 0 || line_num > self.lines.len() {
956            return false;
957        }
958        self.lines[line_num - 1].in_code_block
959    }
960
961    /// Check if a line is within front matter
962    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
963        if line_num == 0 || line_num > self.lines.len() {
964            return false;
965        }
966        self.lines[line_num - 1].in_front_matter
967    }
968
969    /// Check if a line is within an HTML block
970    pub fn is_in_html_block(&self, line_num: usize) -> bool {
971        if line_num == 0 || line_num > self.lines.len() {
972            return false;
973        }
974        self.lines[line_num - 1].in_html_block
975    }
976
977    /// Check if a line and column is within a code span
978    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
979        if line_num == 0 || line_num > self.lines.len() {
980            return false;
981        }
982
983        // Use the code spans cache to check
984        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
985        // Convert col to 0-indexed for comparison
986        let col_0indexed = if col > 0 { col - 1 } else { 0 };
987        let code_spans = self.code_spans();
988        code_spans.iter().any(|span| {
989            // Check if line is within the span's line range
990            if line_num < span.line || line_num > span.end_line {
991                return false;
992            }
993
994            if span.line == span.end_line {
995                // Single-line span: check column bounds
996                col_0indexed >= span.start_col && col_0indexed < span.end_col
997            } else if line_num == span.line {
998                // First line of multi-line span: anything after start_col is in span
999                col_0indexed >= span.start_col
1000            } else if line_num == span.end_line {
1001                // Last line of multi-line span: anything before end_col is in span
1002                col_0indexed < span.end_col
1003            } else {
1004                // Middle line of multi-line span: entire line is in span
1005                true
1006            }
1007        })
1008    }
1009
1010    /// Check if a byte offset is within a code span
1011    #[inline]
1012    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1013        let code_spans = self.code_spans();
1014        code_spans
1015            .iter()
1016            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1017    }
1018
1019    /// Check if a byte position is within a reference definition
1020    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1021    #[inline]
1022    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1023        self.reference_defs
1024            .iter()
1025            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1026    }
1027
1028    /// Check if a byte position is within an HTML comment
1029    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1030    /// where k is the number of HTML comments (typically very small)
1031    #[inline]
1032    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1033        self.html_comment_ranges
1034            .iter()
1035            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1036    }
1037
1038    /// Check if a byte position is within an HTML tag (including multiline tags)
1039    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1040    #[inline]
1041    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1042        self.html_tags()
1043            .iter()
1044            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1045    }
1046
1047    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1048    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1049        self.jinja_ranges
1050            .iter()
1051            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1052    }
1053
1054    /// Check if a byte position is within a link reference definition title
1055    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1056        self.reference_defs.iter().any(|def| {
1057            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1058                byte_pos >= start && byte_pos < end
1059            } else {
1060                false
1061            }
1062        })
1063    }
1064
1065    /// Check if content has any instances of a specific character (fast)
1066    pub fn has_char(&self, ch: char) -> bool {
1067        match ch {
1068            '#' => self.char_frequency.hash_count > 0,
1069            '*' => self.char_frequency.asterisk_count > 0,
1070            '_' => self.char_frequency.underscore_count > 0,
1071            '-' => self.char_frequency.hyphen_count > 0,
1072            '+' => self.char_frequency.plus_count > 0,
1073            '>' => self.char_frequency.gt_count > 0,
1074            '|' => self.char_frequency.pipe_count > 0,
1075            '[' => self.char_frequency.bracket_count > 0,
1076            '`' => self.char_frequency.backtick_count > 0,
1077            '<' => self.char_frequency.lt_count > 0,
1078            '!' => self.char_frequency.exclamation_count > 0,
1079            '\n' => self.char_frequency.newline_count > 0,
1080            _ => self.content.contains(ch), // Fallback for other characters
1081        }
1082    }
1083
1084    /// Get count of a specific character (fast)
1085    pub fn char_count(&self, ch: char) -> usize {
1086        match ch {
1087            '#' => self.char_frequency.hash_count,
1088            '*' => self.char_frequency.asterisk_count,
1089            '_' => self.char_frequency.underscore_count,
1090            '-' => self.char_frequency.hyphen_count,
1091            '+' => self.char_frequency.plus_count,
1092            '>' => self.char_frequency.gt_count,
1093            '|' => self.char_frequency.pipe_count,
1094            '[' => self.char_frequency.bracket_count,
1095            '`' => self.char_frequency.backtick_count,
1096            '<' => self.char_frequency.lt_count,
1097            '!' => self.char_frequency.exclamation_count,
1098            '\n' => self.char_frequency.newline_count,
1099            _ => self.content.matches(ch).count(), // Fallback for other characters
1100        }
1101    }
1102
1103    /// Check if content likely contains headings (fast)
1104    pub fn likely_has_headings(&self) -> bool {
1105        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1106    }
1107
1108    /// Check if content likely contains lists (fast)
1109    pub fn likely_has_lists(&self) -> bool {
1110        self.char_frequency.asterisk_count > 0
1111            || self.char_frequency.hyphen_count > 0
1112            || self.char_frequency.plus_count > 0
1113    }
1114
1115    /// Check if content likely contains emphasis (fast)
1116    pub fn likely_has_emphasis(&self) -> bool {
1117        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1118    }
1119
1120    /// Check if content likely contains tables (fast)
1121    pub fn likely_has_tables(&self) -> bool {
1122        self.char_frequency.pipe_count > 2
1123    }
1124
1125    /// Check if content likely contains blockquotes (fast)
1126    pub fn likely_has_blockquotes(&self) -> bool {
1127        self.char_frequency.gt_count > 0
1128    }
1129
1130    /// Check if content likely contains code (fast)
1131    pub fn likely_has_code(&self) -> bool {
1132        self.char_frequency.backtick_count > 0
1133    }
1134
1135    /// Check if content likely contains links or images (fast)
1136    pub fn likely_has_links_or_images(&self) -> bool {
1137        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1138    }
1139
1140    /// Check if content likely contains HTML (fast)
1141    pub fn likely_has_html(&self) -> bool {
1142        self.char_frequency.lt_count > 0
1143    }
1144
1145    /// Get the blockquote prefix for inserting a blank line at the given line index.
1146    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1147    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1148    /// Returns an empty string if the line is not inside a blockquote.
1149    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1150        if let Some(line_info) = self.lines.get(line_idx)
1151            && let Some(ref bq) = line_info.blockquote
1152        {
1153            bq.prefix.trim_end().to_string()
1154        } else {
1155            String::new()
1156        }
1157    }
1158
1159    /// Get HTML tags on a specific line
1160    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1161        self.html_tags()
1162            .iter()
1163            .filter(|tag| tag.line == line_num)
1164            .cloned()
1165            .collect()
1166    }
1167
1168    /// Get emphasis spans on a specific line
1169    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1170        self.emphasis_spans()
1171            .iter()
1172            .filter(|span| span.line == line_num)
1173            .cloned()
1174            .collect()
1175    }
1176
1177    /// Get table rows on a specific line
1178    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1179        self.table_rows()
1180            .iter()
1181            .filter(|row| row.line == line_num)
1182            .cloned()
1183            .collect()
1184    }
1185
1186    /// Get bare URLs on a specific line
1187    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1188        self.bare_urls()
1189            .iter()
1190            .filter(|url| url.line == line_num)
1191            .cloned()
1192            .collect()
1193    }
1194
1195    /// Find the line index for a given byte offset using binary search.
1196    /// Returns (line_index, line_number, column) where:
1197    /// - line_index is the 0-based index in the lines array
1198    /// - line_number is the 1-based line number
1199    /// - column is the byte offset within that line
1200    #[inline]
1201    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1202        // Binary search to find the line containing this byte offset
1203        let idx = match lines.binary_search_by(|line| {
1204            if byte_offset < line.byte_offset {
1205                std::cmp::Ordering::Greater
1206            } else if byte_offset > line.byte_offset + line.byte_len {
1207                std::cmp::Ordering::Less
1208            } else {
1209                std::cmp::Ordering::Equal
1210            }
1211        }) {
1212            Ok(idx) => idx,
1213            Err(idx) => idx.saturating_sub(1),
1214        };
1215
1216        let line = &lines[idx];
1217        let line_num = idx + 1;
1218        let col = byte_offset.saturating_sub(line.byte_offset);
1219
1220        (idx, line_num, col)
1221    }
1222
1223    /// Check if a byte offset is within a code span using binary search
1224    #[inline]
1225    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1226        // Since spans are sorted by byte_offset, use partition_point for binary search
1227        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1228
1229        // Check the span that starts at or before our offset
1230        if idx > 0 {
1231            let span = &code_spans[idx - 1];
1232            if offset >= span.byte_offset && offset < span.byte_end {
1233                return true;
1234            }
1235        }
1236
1237        false
1238    }
1239
1240    /// Collect byte ranges of all links using pulldown-cmark
1241    /// This is used to skip heading detection for lines that fall within link syntax
1242    /// (e.g., multiline links like `[text](url\n#fragment)`)
1243    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1244        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1245
1246        let mut link_ranges = Vec::new();
1247        let mut options = Options::empty();
1248        options.insert(Options::ENABLE_WIKILINKS);
1249        options.insert(Options::ENABLE_FOOTNOTES);
1250
1251        let parser = Parser::new_ext(content, options).into_offset_iter();
1252        let mut link_stack: Vec<usize> = Vec::new();
1253
1254        for (event, range) in parser {
1255            match event {
1256                Event::Start(Tag::Link { .. }) => {
1257                    link_stack.push(range.start);
1258                }
1259                Event::End(TagEnd::Link) => {
1260                    if let Some(start_pos) = link_stack.pop() {
1261                        link_ranges.push((start_pos, range.end));
1262                    }
1263                }
1264                _ => {}
1265            }
1266        }
1267
1268        link_ranges
1269    }
1270
1271    /// Parse all links in the content
1272    fn parse_links(
1273        content: &'a str,
1274        lines: &[LineInfo],
1275        code_blocks: &[(usize, usize)],
1276        code_spans: &[CodeSpan],
1277        flavor: MarkdownFlavor,
1278        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1279    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1280        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1281        use std::collections::HashSet;
1282
1283        let mut links = Vec::with_capacity(content.len() / 500);
1284        let mut broken_links = Vec::new();
1285        let mut footnote_refs = Vec::new();
1286
1287        // Track byte positions of links found by pulldown-cmark
1288        let mut found_positions = HashSet::new();
1289
1290        // Use pulldown-cmark's streaming parser with BrokenLink callback
1291        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1292        // This automatically handles:
1293        // - Escaped links (won't generate events)
1294        // - Links in code blocks/spans (won't generate Link events)
1295        // - Images (generates Tag::Image instead)
1296        // - Reference resolution (dest_url is already resolved!)
1297        // - Broken references (callback is invoked)
1298        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1299        let mut options = Options::empty();
1300        options.insert(Options::ENABLE_WIKILINKS);
1301        options.insert(Options::ENABLE_FOOTNOTES);
1302
1303        let parser = Parser::new_with_broken_link_callback(
1304            content,
1305            options,
1306            Some(|link: BrokenLink<'_>| {
1307                broken_links.push(BrokenLinkInfo {
1308                    reference: link.reference.to_string(),
1309                    span: link.span.clone(),
1310                });
1311                None
1312            }),
1313        )
1314        .into_offset_iter();
1315
1316        let mut link_stack: Vec<(
1317            usize,
1318            usize,
1319            pulldown_cmark::CowStr<'a>,
1320            LinkType,
1321            pulldown_cmark::CowStr<'a>,
1322        )> = Vec::new();
1323        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1324
1325        for (event, range) in parser {
1326            match event {
1327                Event::Start(Tag::Link {
1328                    link_type,
1329                    dest_url,
1330                    id,
1331                    ..
1332                }) => {
1333                    // Link start - record position, URL, and reference ID
1334                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1335                    text_chunks.clear();
1336                }
1337                Event::Text(text) if !link_stack.is_empty() => {
1338                    // Track text content with its byte range
1339                    text_chunks.push((text.to_string(), range.start, range.end));
1340                }
1341                Event::Code(code) if !link_stack.is_empty() => {
1342                    // Include inline code in link text (with backticks)
1343                    let code_text = format!("`{code}`");
1344                    text_chunks.push((code_text, range.start, range.end));
1345                }
1346                Event::End(TagEnd::Link) => {
1347                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1348                        // Skip if in HTML comment
1349                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1350                            text_chunks.clear();
1351                            continue;
1352                        }
1353
1354                        // Find line and column information
1355                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1356
1357                        // Skip if this link is on a MkDocs snippet line
1358                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1359                            text_chunks.clear();
1360                            continue;
1361                        }
1362
1363                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1364
1365                        let is_reference = matches!(
1366                            link_type,
1367                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1368                        );
1369
1370                        // Extract link text directly from source bytes to preserve escaping
1371                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1372                        let link_text = if start_pos < content.len() {
1373                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1374
1375                            // Find MATCHING ] by tracking bracket depth for nested brackets
1376                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1377                            // Brackets inside code spans (between backticks) should be ignored
1378                            let mut close_pos = None;
1379                            let mut depth = 0;
1380                            let mut in_code_span = false;
1381
1382                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1383                                // Count preceding backslashes
1384                                let mut backslash_count = 0;
1385                                let mut j = i;
1386                                while j > 0 && link_bytes[j - 1] == b'\\' {
1387                                    backslash_count += 1;
1388                                    j -= 1;
1389                                }
1390                                let is_escaped = backslash_count % 2 != 0;
1391
1392                                // Track code spans - backticks toggle in/out of code
1393                                if byte == b'`' && !is_escaped {
1394                                    in_code_span = !in_code_span;
1395                                }
1396
1397                                // Only count brackets when NOT in a code span
1398                                if !is_escaped && !in_code_span {
1399                                    if byte == b'[' {
1400                                        depth += 1;
1401                                    } else if byte == b']' {
1402                                        if depth == 0 {
1403                                            // Found the matching closing bracket
1404                                            close_pos = Some(i);
1405                                            break;
1406                                        } else {
1407                                            depth -= 1;
1408                                        }
1409                                    }
1410                                }
1411                            }
1412
1413                            if let Some(pos) = close_pos {
1414                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1415                            } else {
1416                                Cow::Borrowed("")
1417                            }
1418                        } else {
1419                            Cow::Borrowed("")
1420                        };
1421
1422                        // For reference links, use the actual reference ID from pulldown-cmark
1423                        let reference_id = if is_reference && !ref_id.is_empty() {
1424                            Some(Cow::Owned(ref_id.to_lowercase()))
1425                        } else if is_reference {
1426                            // For collapsed/shortcut references without explicit ID, use the link text
1427                            Some(Cow::Owned(link_text.to_lowercase()))
1428                        } else {
1429                            None
1430                        };
1431
1432                        // Track this position as found
1433                        found_positions.insert(start_pos);
1434
1435                        links.push(ParsedLink {
1436                            line: line_num,
1437                            start_col: col_start,
1438                            end_col: col_end,
1439                            byte_offset: start_pos,
1440                            byte_end: range.end,
1441                            text: link_text,
1442                            url: Cow::Owned(url.to_string()),
1443                            is_reference,
1444                            reference_id,
1445                            link_type,
1446                        });
1447
1448                        text_chunks.clear();
1449                    }
1450                }
1451                Event::FootnoteReference(footnote_id) => {
1452                    // Capture footnote references like [^1], [^note]
1453                    // Skip if in HTML comment
1454                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1455                        continue;
1456                    }
1457
1458                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1459                    footnote_refs.push(FootnoteRef {
1460                        id: footnote_id.to_string(),
1461                        line: line_num,
1462                        byte_offset: range.start,
1463                        byte_end: range.end,
1464                    });
1465                }
1466                _ => {}
1467            }
1468        }
1469
1470        // Also find undefined references using regex
1471        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1472        // because the reference is undefined
1473        for cap in LINK_PATTERN.captures_iter(content) {
1474            let full_match = cap.get(0).unwrap();
1475            let match_start = full_match.start();
1476            let match_end = full_match.end();
1477
1478            // Skip if this was already found by pulldown-cmark (it's a valid link)
1479            if found_positions.contains(&match_start) {
1480                continue;
1481            }
1482
1483            // Skip if escaped
1484            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1485                continue;
1486            }
1487
1488            // Skip if it's an image
1489            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1490                continue;
1491            }
1492
1493            // Skip if in code block
1494            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1495                continue;
1496            }
1497
1498            // Skip if in code span
1499            if Self::is_offset_in_code_span(code_spans, match_start) {
1500                continue;
1501            }
1502
1503            // Skip if in HTML comment
1504            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1505                continue;
1506            }
1507
1508            // Find line and column information
1509            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1510
1511            // Skip if this link is on a MkDocs snippet line
1512            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1513                continue;
1514            }
1515
1516            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1517
1518            let text = cap.get(1).map_or("", |m| m.as_str());
1519
1520            // Only process reference links (group 6)
1521            if let Some(ref_id) = cap.get(6) {
1522                let ref_id_str = ref_id.as_str();
1523                let normalized_ref = if ref_id_str.is_empty() {
1524                    Cow::Owned(text.to_lowercase()) // Implicit reference
1525                } else {
1526                    Cow::Owned(ref_id_str.to_lowercase())
1527                };
1528
1529                // This is an undefined reference (pulldown-cmark didn't parse it)
1530                links.push(ParsedLink {
1531                    line: line_num,
1532                    start_col: col_start,
1533                    end_col: col_end,
1534                    byte_offset: match_start,
1535                    byte_end: match_end,
1536                    text: Cow::Borrowed(text),
1537                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1538                    is_reference: true,
1539                    reference_id: Some(normalized_ref),
1540                    link_type: LinkType::Reference, // Undefined references are reference-style
1541                });
1542            }
1543        }
1544
1545        (links, broken_links, footnote_refs)
1546    }
1547
1548    /// Parse all images in the content
1549    fn parse_images(
1550        content: &'a str,
1551        lines: &[LineInfo],
1552        code_blocks: &[(usize, usize)],
1553        code_spans: &[CodeSpan],
1554        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1555    ) -> Vec<ParsedImage<'a>> {
1556        use crate::utils::skip_context::is_in_html_comment_ranges;
1557        use std::collections::HashSet;
1558
1559        // Pre-size based on a heuristic: images are less common than links
1560        let mut images = Vec::with_capacity(content.len() / 1000);
1561        let mut found_positions = HashSet::new();
1562
1563        // Use pulldown-cmark for parsing - more accurate and faster
1564        let parser = Parser::new(content).into_offset_iter();
1565        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1566            Vec::new();
1567        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1568
1569        for (event, range) in parser {
1570            match event {
1571                Event::Start(Tag::Image {
1572                    link_type,
1573                    dest_url,
1574                    id,
1575                    ..
1576                }) => {
1577                    image_stack.push((range.start, dest_url, link_type, id));
1578                    text_chunks.clear();
1579                }
1580                Event::Text(text) if !image_stack.is_empty() => {
1581                    text_chunks.push((text.to_string(), range.start, range.end));
1582                }
1583                Event::Code(code) if !image_stack.is_empty() => {
1584                    let code_text = format!("`{code}`");
1585                    text_chunks.push((code_text, range.start, range.end));
1586                }
1587                Event::End(TagEnd::Image) => {
1588                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1589                        // Skip if in code block
1590                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1591                            continue;
1592                        }
1593
1594                        // Skip if in code span
1595                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1596                            continue;
1597                        }
1598
1599                        // Skip if in HTML comment
1600                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1601                            continue;
1602                        }
1603
1604                        // Find line and column using binary search
1605                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1606                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1607
1608                        let is_reference = matches!(
1609                            link_type,
1610                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1611                        );
1612
1613                        // Extract alt text directly from source bytes to preserve escaping
1614                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1615                        let alt_text = if start_pos < content.len() {
1616                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1617
1618                            // Find MATCHING ] by tracking bracket depth for nested brackets
1619                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1620                            let mut close_pos = None;
1621                            let mut depth = 0;
1622
1623                            if image_bytes.len() > 2 {
1624                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1625                                    // Count preceding backslashes
1626                                    let mut backslash_count = 0;
1627                                    let mut j = i;
1628                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1629                                        backslash_count += 1;
1630                                        j -= 1;
1631                                    }
1632                                    let is_escaped = backslash_count % 2 != 0;
1633
1634                                    if !is_escaped {
1635                                        if byte == b'[' {
1636                                            depth += 1;
1637                                        } else if byte == b']' {
1638                                            if depth == 0 {
1639                                                // Found the matching closing bracket
1640                                                close_pos = Some(i);
1641                                                break;
1642                                            } else {
1643                                                depth -= 1;
1644                                            }
1645                                        }
1646                                    }
1647                                }
1648                            }
1649
1650                            if let Some(pos) = close_pos {
1651                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1652                            } else {
1653                                Cow::Borrowed("")
1654                            }
1655                        } else {
1656                            Cow::Borrowed("")
1657                        };
1658
1659                        let reference_id = if is_reference && !ref_id.is_empty() {
1660                            Some(Cow::Owned(ref_id.to_lowercase()))
1661                        } else if is_reference {
1662                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1663                        } else {
1664                            None
1665                        };
1666
1667                        found_positions.insert(start_pos);
1668                        images.push(ParsedImage {
1669                            line: line_num,
1670                            start_col: col_start,
1671                            end_col: col_end,
1672                            byte_offset: start_pos,
1673                            byte_end: range.end,
1674                            alt_text,
1675                            url: Cow::Owned(url.to_string()),
1676                            is_reference,
1677                            reference_id,
1678                            link_type,
1679                        });
1680                    }
1681                }
1682                _ => {}
1683            }
1684        }
1685
1686        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1687        for cap in IMAGE_PATTERN.captures_iter(content) {
1688            let full_match = cap.get(0).unwrap();
1689            let match_start = full_match.start();
1690            let match_end = full_match.end();
1691
1692            // Skip if already found by pulldown-cmark
1693            if found_positions.contains(&match_start) {
1694                continue;
1695            }
1696
1697            // Skip if the ! is escaped
1698            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1699                continue;
1700            }
1701
1702            // Skip if in code block, code span, or HTML comment
1703            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1704                || Self::is_offset_in_code_span(code_spans, match_start)
1705                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1706            {
1707                continue;
1708            }
1709
1710            // Only process reference images (undefined references not found by pulldown-cmark)
1711            if let Some(ref_id) = cap.get(6) {
1712                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1713                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1714                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1715                let ref_id_str = ref_id.as_str();
1716                let normalized_ref = if ref_id_str.is_empty() {
1717                    Cow::Owned(alt_text.to_lowercase())
1718                } else {
1719                    Cow::Owned(ref_id_str.to_lowercase())
1720                };
1721
1722                images.push(ParsedImage {
1723                    line: line_num,
1724                    start_col: col_start,
1725                    end_col: col_end,
1726                    byte_offset: match_start,
1727                    byte_end: match_end,
1728                    alt_text: Cow::Borrowed(alt_text),
1729                    url: Cow::Borrowed(""),
1730                    is_reference: true,
1731                    reference_id: Some(normalized_ref),
1732                    link_type: LinkType::Reference, // Undefined references are reference-style
1733                });
1734            }
1735        }
1736
1737        images
1738    }
1739
1740    /// Parse reference definitions
1741    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1742        // Pre-size based on lines count as reference definitions are line-based
1743        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1744
1745        for (line_idx, line_info) in lines.iter().enumerate() {
1746            // Skip lines in code blocks
1747            if line_info.in_code_block {
1748                continue;
1749            }
1750
1751            let line = line_info.content(content);
1752            let line_num = line_idx + 1;
1753
1754            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1755                let id_raw = cap.get(1).unwrap().as_str();
1756
1757                // Skip footnote definitions - they use [^id]: syntax and are semantically
1758                // different from reference link definitions
1759                if id_raw.starts_with('^') {
1760                    continue;
1761                }
1762
1763                let id = id_raw.to_lowercase();
1764                let url = cap.get(2).unwrap().as_str().to_string();
1765                let title_match = cap.get(3).or_else(|| cap.get(4));
1766                let title = title_match.map(|m| m.as_str().to_string());
1767
1768                // Calculate byte positions
1769                // The match starts at the beginning of the line (0) and extends to the end
1770                let match_obj = cap.get(0).unwrap();
1771                let byte_offset = line_info.byte_offset + match_obj.start();
1772                let byte_end = line_info.byte_offset + match_obj.end();
1773
1774                // Calculate title byte positions (includes the quote character before content)
1775                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1776                    // The match is the content inside quotes, so we include the quote before
1777                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1778                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1779                    (Some(start), Some(end))
1780                } else {
1781                    (None, None)
1782                };
1783
1784                refs.push(ReferenceDef {
1785                    line: line_num,
1786                    id,
1787                    url,
1788                    title,
1789                    byte_offset,
1790                    byte_end,
1791                    title_byte_start,
1792                    title_byte_end,
1793                });
1794            }
1795        }
1796
1797        refs
1798    }
1799
1800    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1801    /// Handles nested blockquotes like `> > > content`
1802    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1803    #[inline]
1804    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1805        let trimmed_start = line.trim_start();
1806        if !trimmed_start.starts_with('>') {
1807            return None;
1808        }
1809
1810        // Track total prefix length to handle nested blockquotes
1811        let mut remaining = line;
1812        let mut total_prefix_len = 0;
1813
1814        loop {
1815            let trimmed = remaining.trim_start();
1816            if !trimmed.starts_with('>') {
1817                break;
1818            }
1819
1820            // Add leading whitespace + '>' to prefix
1821            let leading_ws_len = remaining.len() - trimmed.len();
1822            total_prefix_len += leading_ws_len + 1;
1823
1824            let after_gt = &trimmed[1..];
1825
1826            // Handle optional whitespace after '>' (space or tab)
1827            if let Some(stripped) = after_gt.strip_prefix(' ') {
1828                total_prefix_len += 1;
1829                remaining = stripped;
1830            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1831                total_prefix_len += 1;
1832                remaining = stripped;
1833            } else {
1834                remaining = after_gt;
1835            }
1836        }
1837
1838        Some((&line[..total_prefix_len], remaining))
1839    }
1840
1841    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
1842    ///
1843    /// Returns a HashMap keyed by line byte offset, containing:
1844    /// `(is_ordered, marker, marker_column, content_column, number)`
1845    ///
1846    /// ## Why pulldown-cmark?
1847    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
1848    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
1849    /// This fixes issue #253 where continuation lines were falsely detected.
1850    ///
1851    /// ## Tab indentation quirk
1852    /// Pulldown-cmark reports nested list items at the newline character position
1853    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
1854    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
1855    /// We detect this and advance to the correct line.
1856    ///
1857    /// ## HashMap key strategy
1858    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
1859    /// that resolve to the same line (after newline adjustment). The first event
1860    /// for each line is authoritative.
1861    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
1862    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
1863    /// This avoids a separate parse for emphasis detection.
1864    fn detect_list_items_and_emphasis_with_pulldown(
1865        content: &str,
1866        line_offsets: &[usize],
1867        flavor: MarkdownFlavor,
1868        front_matter_end: usize,
1869        code_blocks: &[(usize, usize)],
1870    ) -> (ListItemMap, Vec<EmphasisSpan>) {
1871        use std::collections::HashMap;
1872
1873        let mut list_items = HashMap::new();
1874        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
1875
1876        let mut options = Options::empty();
1877        options.insert(Options::ENABLE_TABLES);
1878        options.insert(Options::ENABLE_FOOTNOTES);
1879        options.insert(Options::ENABLE_STRIKETHROUGH);
1880        options.insert(Options::ENABLE_TASKLISTS);
1881        // Always enable GFM features for consistency with existing behavior
1882        options.insert(Options::ENABLE_GFM);
1883
1884        // Suppress unused variable warning
1885        let _ = flavor;
1886
1887        let parser = Parser::new_ext(content, options).into_offset_iter();
1888        let mut list_depth: usize = 0;
1889        let mut list_stack: Vec<bool> = Vec::new();
1890
1891        for (event, range) in parser {
1892            match event {
1893                // Capture emphasis spans (for MD030's emphasis detection)
1894                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
1895                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
1896                        2
1897                    } else {
1898                        1
1899                    };
1900                    let match_start = range.start;
1901                    let match_end = range.end;
1902
1903                    // Skip if in code block
1904                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
1905                        // Determine marker character by looking at the content at the start
1906                        let marker = content[match_start..].chars().next().unwrap_or('*');
1907                        if marker == '*' || marker == '_' {
1908                            // Extract content between markers
1909                            let content_start = match_start + marker_count;
1910                            let content_end = if match_end >= marker_count {
1911                                match_end - marker_count
1912                            } else {
1913                                match_end
1914                            };
1915                            let content_part = if content_start < content_end && content_end <= content.len() {
1916                                &content[content_start..content_end]
1917                            } else {
1918                                ""
1919                            };
1920
1921                            // Find which line this emphasis is on using line_offsets
1922                            let line_idx = match line_offsets.binary_search(&match_start) {
1923                                Ok(idx) => idx,
1924                                Err(idx) => idx.saturating_sub(1),
1925                            };
1926                            let line_num = line_idx + 1;
1927                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
1928                            let col_start = match_start - line_start;
1929                            let col_end = match_end - line_start;
1930
1931                            emphasis_spans.push(EmphasisSpan {
1932                                line: line_num,
1933                                start_col: col_start,
1934                                end_col: col_end,
1935                                byte_offset: match_start,
1936                                byte_end: match_end,
1937                                marker,
1938                                marker_count,
1939                                content: content_part.to_string(),
1940                            });
1941                        }
1942                    }
1943                }
1944                Event::Start(Tag::List(start_number)) => {
1945                    list_depth += 1;
1946                    list_stack.push(start_number.is_some());
1947                }
1948                Event::End(TagEnd::List(_)) => {
1949                    list_depth = list_depth.saturating_sub(1);
1950                    list_stack.pop();
1951                }
1952                Event::Start(Tag::Item) if list_depth > 0 => {
1953                    // Get the ordered state for the CURRENT (innermost) list
1954                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1955                    // Find which line this byte offset corresponds to
1956                    let item_start = range.start;
1957
1958                    // Binary search to find the line number
1959                    let mut line_idx = match line_offsets.binary_search(&item_start) {
1960                        Ok(idx) => idx,
1961                        Err(idx) => idx.saturating_sub(1),
1962                    };
1963
1964                    // Pulldown-cmark reports nested list items at the newline before the item
1965                    // when using tab indentation (e.g., "* Item\n\t- Nested").
1966                    // Advance to the actual content line in this case.
1967                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1968                        line_idx += 1;
1969                    }
1970
1971                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
1972                    if front_matter_end > 0 && line_idx < front_matter_end {
1973                        continue;
1974                    }
1975
1976                    if line_idx < line_offsets.len() {
1977                        let line_start_byte = line_offsets[line_idx];
1978                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
1979                        let line = &content[line_start_byte..line_end.min(content.len())];
1980
1981                        // Strip trailing newline
1982                        let line = line
1983                            .strip_suffix('\n')
1984                            .or_else(|| line.strip_suffix("\r\n"))
1985                            .unwrap_or(line);
1986
1987                        // Strip blockquote prefix if present
1988                        let blockquote_parse = Self::parse_blockquote_prefix(line);
1989                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
1990                            (prefix.len(), content)
1991                        } else {
1992                            (0, line)
1993                        };
1994
1995                        // Parse the list marker from the actual line
1996                        if current_list_is_ordered {
1997                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1998                                Self::parse_ordered_list(line_to_parse)
1999                            {
2000                                let marker = format!("{number_str}{delimiter}");
2001                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2002                                let content_column = marker_column + marker.len() + spacing.len();
2003                                let number = number_str.parse().ok();
2004
2005                                list_items.entry(line_start_byte).or_insert((
2006                                    true,
2007                                    marker,
2008                                    marker_column,
2009                                    content_column,
2010                                    number,
2011                                ));
2012                            }
2013                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2014                            Self::parse_unordered_list(line_to_parse)
2015                        {
2016                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2017                            let content_column = marker_column + 1 + spacing.len();
2018
2019                            list_items.entry(line_start_byte).or_insert((
2020                                false,
2021                                marker.to_string(),
2022                                marker_column,
2023                                content_column,
2024                                None,
2025                            ));
2026                        }
2027                    }
2028                }
2029                _ => {}
2030            }
2031        }
2032
2033        (list_items, emphasis_spans)
2034    }
2035
2036    /// Fast unordered list parser - replaces regex for 5-10x speedup
2037    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2038    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2039    #[inline]
2040    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2041        let bytes = line.as_bytes();
2042        let mut i = 0;
2043
2044        // Skip leading whitespace
2045        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2046            i += 1;
2047        }
2048
2049        // Check for marker
2050        if i >= bytes.len() {
2051            return None;
2052        }
2053        let marker = bytes[i] as char;
2054        if marker != '-' && marker != '*' && marker != '+' {
2055            return None;
2056        }
2057        let marker_pos = i;
2058        i += 1;
2059
2060        // Collect spacing after marker (space or tab only)
2061        let spacing_start = i;
2062        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2063            i += 1;
2064        }
2065
2066        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2067    }
2068
2069    /// Fast ordered list parser - replaces regex for 5-10x speedup
2070    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2071    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2072    #[inline]
2073    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2074        let bytes = line.as_bytes();
2075        let mut i = 0;
2076
2077        // Skip leading whitespace
2078        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2079            i += 1;
2080        }
2081
2082        // Collect digits
2083        let number_start = i;
2084        while i < bytes.len() && bytes[i].is_ascii_digit() {
2085            i += 1;
2086        }
2087        if i == number_start {
2088            return None; // No digits found
2089        }
2090
2091        // Check for delimiter
2092        if i >= bytes.len() {
2093            return None;
2094        }
2095        let delimiter = bytes[i] as char;
2096        if delimiter != '.' && delimiter != ')' {
2097            return None;
2098        }
2099        let delimiter_pos = i;
2100        i += 1;
2101
2102        // Collect spacing after delimiter (space or tab only)
2103        let spacing_start = i;
2104        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2105            i += 1;
2106        }
2107
2108        Some((
2109            &line[..number_start],
2110            &line[number_start..delimiter_pos],
2111            delimiter,
2112            &line[spacing_start..i],
2113            &line[i..],
2114        ))
2115    }
2116
2117    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2118    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2119    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2120        let num_lines = line_offsets.len();
2121        let mut in_code_block = vec![false; num_lines];
2122
2123        // For each code block, mark all lines within it
2124        for &(start, end) in code_blocks {
2125            // Ensure we're at valid UTF-8 boundaries
2126            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2127                let mut boundary = start;
2128                while boundary > 0 && !content.is_char_boundary(boundary) {
2129                    boundary -= 1;
2130                }
2131                boundary
2132            } else {
2133                start
2134            };
2135
2136            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2137                let mut boundary = end;
2138                while boundary < content.len() && !content.is_char_boundary(boundary) {
2139                    boundary += 1;
2140                }
2141                boundary
2142            } else {
2143                end.min(content.len())
2144            };
2145
2146            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2147            // That function now has proper list context awareness (see code_block_utils.rs)
2148            // and correctly distinguishes between:
2149            // - Fenced code blocks (``` or ~~~)
2150            // - Indented code blocks at document level (4 spaces + blank line before)
2151            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2152            //
2153            // We no longer need to re-validate here. The original validation logic
2154            // was causing false positives by marking list continuation paragraphs as
2155            // code blocks when they have 4 spaces of indentation.
2156
2157            // Use binary search to find the first and last line indices
2158            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2159            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2160            //
2161            // Find the line that CONTAINS safe_start: the line with the largest
2162            // start offset that is <= safe_start. partition_point gives us the
2163            // first line that starts AFTER safe_start, so we subtract 1.
2164            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2165            let first_line = first_line_after.saturating_sub(1);
2166            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2167
2168            // Mark all lines in the range at once
2169            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2170                *flag = true;
2171            }
2172        }
2173
2174        in_code_block
2175    }
2176
2177    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2178    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2179    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2180        let content_lines: Vec<&str> = content.lines().collect();
2181        let num_lines = content_lines.len();
2182        let mut in_math_block = vec![false; num_lines];
2183
2184        let mut inside_math = false;
2185
2186        for (i, line) in content_lines.iter().enumerate() {
2187            // Skip lines that are in code blocks - math delimiters inside code are literal
2188            if code_block_map.get(i).copied().unwrap_or(false) {
2189                continue;
2190            }
2191
2192            let trimmed = line.trim();
2193
2194            // Check for math block delimiter ($$)
2195            // A line with just $$ toggles the math block state
2196            if trimmed == "$$" {
2197                if inside_math {
2198                    // Closing delimiter - this line is still part of the math block
2199                    in_math_block[i] = true;
2200                    inside_math = false;
2201                } else {
2202                    // Opening delimiter - this line starts the math block
2203                    in_math_block[i] = true;
2204                    inside_math = true;
2205                }
2206            } else if inside_math {
2207                // Content inside math block
2208                in_math_block[i] = true;
2209            }
2210        }
2211
2212        in_math_block
2213    }
2214
2215    /// Pre-compute basic line information (without headings/blockquotes)
2216    /// Also returns emphasis spans detected during the pulldown-cmark parse
2217    fn compute_basic_line_info(
2218        content: &str,
2219        line_offsets: &[usize],
2220        code_blocks: &[(usize, usize)],
2221        flavor: MarkdownFlavor,
2222        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2223        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2224    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2225        let content_lines: Vec<&str> = content.lines().collect();
2226        let mut lines = Vec::with_capacity(content_lines.len());
2227
2228        // Pre-compute which lines are in code blocks
2229        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2230
2231        // Pre-compute which lines are in math blocks ($$ ... $$)
2232        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2233
2234        // Detect front matter boundaries FIRST, before any other parsing
2235        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2236        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2237
2238        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2239        // (context-aware, eliminates false positives)
2240        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2241            content,
2242            line_offsets,
2243            flavor,
2244            front_matter_end,
2245            code_blocks,
2246        );
2247
2248        for (i, line) in content_lines.iter().enumerate() {
2249            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2250            let indent = line.len() - line.trim_start().len();
2251            // Compute visual indent with proper CommonMark tab expansion
2252            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2253
2254            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2255            let blockquote_parse = Self::parse_blockquote_prefix(line);
2256
2257            // For blank detection, consider blockquote context
2258            let is_blank = if let Some((_, content)) = blockquote_parse {
2259                // In blockquote context, check if content after prefix is blank
2260                content.trim().is_empty()
2261            } else {
2262                line.trim().is_empty()
2263            };
2264
2265            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2266            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2267
2268            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2269            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2270                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2271            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2272            // This ensures content after `-->` on the same line is not incorrectly skipped
2273            let line_end_offset = byte_offset + line.len();
2274            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2275                html_comment_ranges,
2276                byte_offset,
2277                line_end_offset,
2278            );
2279            // Use pulldown-cmark's list detection for context-aware parsing
2280            // This eliminates false positives on continuation lines (issue #253)
2281            let list_item =
2282                list_item_map
2283                    .get(&byte_offset)
2284                    .map(
2285                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2286                            marker: marker.clone(),
2287                            is_ordered: *is_ordered,
2288                            number: *number,
2289                            marker_column: *marker_column,
2290                            content_column: *content_column,
2291                        },
2292                    );
2293
2294            // Detect horizontal rules (only outside code blocks and frontmatter)
2295            // Uses CommonMark-compliant check including leading indentation validation
2296            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2297            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2298
2299            // Get math block status for this line
2300            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2301
2302            lines.push(LineInfo {
2303                byte_offset,
2304                byte_len: line.len(),
2305                indent,
2306                visual_indent,
2307                is_blank,
2308                in_code_block,
2309                in_front_matter,
2310                in_html_block: false, // Will be populated after line creation
2311                in_html_comment,
2312                list_item,
2313                heading: None,    // Will be populated in second pass for Setext headings
2314                blockquote: None, // Will be populated after line creation
2315                in_mkdocstrings,
2316                in_esm_block: false, // Will be populated after line creation for MDX files
2317                in_code_span_continuation: false, // Will be populated after code spans are parsed
2318                is_horizontal_rule: is_hr,
2319                in_math_block,
2320            });
2321        }
2322
2323        (lines, emphasis_spans)
2324    }
2325
2326    /// Detect headings and blockquotes (called after HTML block detection)
2327    fn detect_headings_and_blockquotes(
2328        content: &str,
2329        lines: &mut [LineInfo],
2330        flavor: MarkdownFlavor,
2331        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2332        link_byte_ranges: &[(usize, usize)],
2333    ) {
2334        // Regex for heading detection
2335        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2336            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2337        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2338            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2339
2340        let content_lines: Vec<&str> = content.lines().collect();
2341
2342        // Detect front matter boundaries to skip those lines
2343        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2344
2345        // Detect headings (including Setext which needs look-ahead) and blockquotes
2346        for i in 0..lines.len() {
2347            let line = content_lines[i];
2348
2349            // Detect blockquotes FIRST, before any skip conditions.
2350            // A line can be both a blockquote AND contain a code block inside it.
2351            // We need to know about the blockquote marker regardless of code block status.
2352            // Skip only frontmatter lines - those are never blockquotes.
2353            if !(front_matter_end > 0 && i < front_matter_end)
2354                && let Some(bq) = parse_blockquote_detailed(line)
2355            {
2356                let nesting_level = bq.markers.len();
2357                let marker_column = bq.indent.len();
2358                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2359                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2360                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2361                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2362
2363                lines[i].blockquote = Some(BlockquoteInfo {
2364                    nesting_level,
2365                    indent: bq.indent.to_string(),
2366                    marker_column,
2367                    prefix,
2368                    content: bq.content.to_string(),
2369                    has_no_space_after_marker: has_no_space,
2370                    has_multiple_spaces_after_marker: has_multiple_spaces,
2371                    needs_md028_fix,
2372                });
2373
2374                // Update is_horizontal_rule for blockquote content
2375                // The original detection doesn't strip blockquote prefix, so we need to check here
2376                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2377                    lines[i].is_horizontal_rule = true;
2378                }
2379            }
2380
2381            // Now apply skip conditions for heading detection
2382            if lines[i].in_code_block {
2383                continue;
2384            }
2385
2386            // Skip lines in front matter
2387            if front_matter_end > 0 && i < front_matter_end {
2388                continue;
2389            }
2390
2391            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2392            if lines[i].in_html_block {
2393                continue;
2394            }
2395
2396            // Skip heading detection for blank lines
2397            if lines[i].is_blank {
2398                continue;
2399            }
2400
2401            // Check for ATX headings (but skip MkDocs snippet lines)
2402            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2403            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2404                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2405                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2406            } else {
2407                false
2408            };
2409
2410            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2411                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2412                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2413                    continue;
2414                }
2415                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2416                // This prevents false positives where `#fragment` is detected as a heading
2417                let line_offset = lines[i].byte_offset;
2418                if link_byte_ranges
2419                    .iter()
2420                    .any(|&(start, end)| line_offset > start && line_offset < end)
2421                {
2422                    continue;
2423                }
2424                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2425                let hashes = caps.get(2).map_or("", |m| m.as_str());
2426                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2427                let rest = caps.get(4).map_or("", |m| m.as_str());
2428
2429                let level = hashes.len() as u8;
2430                let marker_column = leading_spaces.len();
2431
2432                // Check for closing sequence, but handle custom IDs that might come after
2433                let (text, has_closing, closing_seq) = {
2434                    // First check if there's a custom ID at the end
2435                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2436                        // Check if this looks like a valid custom ID (ends with })
2437                        if rest[id_start..].trim_end().ends_with('}') {
2438                            // Split off the custom ID
2439                            (&rest[..id_start], &rest[id_start..])
2440                        } else {
2441                            (rest, "")
2442                        }
2443                    } else {
2444                        (rest, "")
2445                    };
2446
2447                    // Now look for closing hashes in the part before the custom ID
2448                    let trimmed_rest = rest_without_id.trim_end();
2449                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2450                        // Find the start of the hash sequence by walking backwards
2451                        // Use char_indices to get byte positions at char boundaries
2452                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2453
2454                        // Find which char index corresponds to last_hash_byte_pos
2455                        let last_hash_char_idx = char_positions
2456                            .iter()
2457                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2458
2459                        if let Some(mut char_idx) = last_hash_char_idx {
2460                            // Walk backwards to find start of hash sequence
2461                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2462                                char_idx -= 1;
2463                            }
2464
2465                            // Get the byte position of the start of hashes
2466                            let start_of_hashes = char_positions[char_idx].0;
2467
2468                            // Check if there's at least one space before the closing hashes
2469                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2470
2471                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2472                            let potential_closing = &trimmed_rest[start_of_hashes..];
2473                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2474
2475                            if is_all_hashes && has_space_before {
2476                                // This is a closing sequence
2477                                let closing_hashes = potential_closing.to_string();
2478                                // The text is everything before the closing hashes
2479                                // Don't include the custom ID here - it will be extracted later
2480                                let text_part = if !custom_id_part.is_empty() {
2481                                    // If we have a custom ID, append it back to get the full rest
2482                                    // This allows the extract_header_id function to handle it properly
2483                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2484                                } else {
2485                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2486                                };
2487                                (text_part, true, closing_hashes)
2488                            } else {
2489                                // Not a valid closing sequence, return the full content
2490                                (rest.to_string(), false, String::new())
2491                            }
2492                        } else {
2493                            // Couldn't find char boundary, return the full content
2494                            (rest.to_string(), false, String::new())
2495                        }
2496                    } else {
2497                        // No hashes found, return the full content
2498                        (rest.to_string(), false, String::new())
2499                    }
2500                };
2501
2502                let content_column = marker_column + hashes.len() + spaces_after.len();
2503
2504                // Extract custom header ID if present
2505                let raw_text = text.trim().to_string();
2506                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2507
2508                // If no custom ID was found on the header line, check the next line for standalone attr-list
2509                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2510                    let next_line = content_lines[i + 1];
2511                    if !lines[i + 1].in_code_block
2512                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2513                        && let Some(next_line_id) =
2514                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2515                    {
2516                        custom_id = Some(next_line_id);
2517                    }
2518                }
2519
2520                // ATX heading is "valid" for processing by heading rules if:
2521                // 1. Has space after # (CommonMark compliant): `# Heading`
2522                // 2. Is empty (just hashes): `#`
2523                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2524                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2525                //
2526                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2527                // - `#tag` - single # with lowercase (social hashtag)
2528                // - `#123` - single # with number (GitHub issue ref)
2529                let is_valid = !spaces_after.is_empty()
2530                    || rest.is_empty()
2531                    || level > 1
2532                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2533
2534                lines[i].heading = Some(HeadingInfo {
2535                    level,
2536                    style: HeadingStyle::ATX,
2537                    marker: hashes.to_string(),
2538                    marker_column,
2539                    content_column,
2540                    text: clean_text,
2541                    custom_id,
2542                    raw_text,
2543                    has_closing_sequence: has_closing,
2544                    closing_sequence: closing_seq,
2545                    is_valid,
2546                });
2547            }
2548            // Check for Setext headings (need to look at next line)
2549            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2550                let next_line = content_lines[i + 1];
2551                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2552                    // Skip if next line is front matter delimiter
2553                    if front_matter_end > 0 && i < front_matter_end {
2554                        continue;
2555                    }
2556
2557                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2558                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2559                    {
2560                        continue;
2561                    }
2562
2563                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2564                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2565                    let content_line = line.trim();
2566
2567                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2568                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2569                        continue;
2570                    }
2571
2572                    // Skip underscore thematic breaks (___)
2573                    if content_line.starts_with('_') {
2574                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2575                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2576                            continue;
2577                        }
2578                    }
2579
2580                    // Skip numbered lists (1. Item, 2. Item, etc.)
2581                    if let Some(first_char) = content_line.chars().next()
2582                        && first_char.is_ascii_digit()
2583                    {
2584                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2585                        if num_end < content_line.len() {
2586                            let next = content_line.chars().nth(num_end);
2587                            if next == Some('.') || next == Some(')') {
2588                                continue;
2589                            }
2590                        }
2591                    }
2592
2593                    // Skip ATX headings
2594                    if ATX_HEADING_REGEX.is_match(line) {
2595                        continue;
2596                    }
2597
2598                    // Skip blockquotes
2599                    if content_line.starts_with('>') {
2600                        continue;
2601                    }
2602
2603                    // Skip code fences
2604                    let trimmed_start = line.trim_start();
2605                    if trimmed_start.len() >= 3 {
2606                        let first_three: String = trimmed_start.chars().take(3).collect();
2607                        if first_three == "```" || first_three == "~~~" {
2608                            continue;
2609                        }
2610                    }
2611
2612                    // Skip HTML blocks
2613                    if content_line.starts_with('<') {
2614                        continue;
2615                    }
2616
2617                    let underline = next_line.trim();
2618
2619                    let level = if underline.starts_with('=') { 1 } else { 2 };
2620                    let style = if level == 1 {
2621                        HeadingStyle::Setext1
2622                    } else {
2623                        HeadingStyle::Setext2
2624                    };
2625
2626                    // Extract custom header ID if present
2627                    let raw_text = line.trim().to_string();
2628                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2629
2630                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2631                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2632                        let attr_line = content_lines[i + 2];
2633                        if !lines[i + 2].in_code_block
2634                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2635                            && let Some(attr_line_id) =
2636                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2637                        {
2638                            custom_id = Some(attr_line_id);
2639                        }
2640                    }
2641
2642                    lines[i].heading = Some(HeadingInfo {
2643                        level,
2644                        style,
2645                        marker: underline.to_string(),
2646                        marker_column: next_line.len() - next_line.trim_start().len(),
2647                        content_column: lines[i].indent,
2648                        text: clean_text,
2649                        custom_id,
2650                        raw_text,
2651                        has_closing_sequence: false,
2652                        closing_sequence: String::new(),
2653                        is_valid: true, // Setext headings are always valid
2654                    });
2655                }
2656            }
2657        }
2658    }
2659
2660    /// Detect HTML blocks in the content
2661    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2662        // HTML block elements that trigger block context
2663        // Includes HTML5 media, embedded content, and interactive elements
2664        const BLOCK_ELEMENTS: &[&str] = &[
2665            "address",
2666            "article",
2667            "aside",
2668            "audio",
2669            "blockquote",
2670            "canvas",
2671            "details",
2672            "dialog",
2673            "dd",
2674            "div",
2675            "dl",
2676            "dt",
2677            "embed",
2678            "fieldset",
2679            "figcaption",
2680            "figure",
2681            "footer",
2682            "form",
2683            "h1",
2684            "h2",
2685            "h3",
2686            "h4",
2687            "h5",
2688            "h6",
2689            "header",
2690            "hr",
2691            "iframe",
2692            "li",
2693            "main",
2694            "menu",
2695            "nav",
2696            "noscript",
2697            "object",
2698            "ol",
2699            "p",
2700            "picture",
2701            "pre",
2702            "script",
2703            "search",
2704            "section",
2705            "source",
2706            "style",
2707            "summary",
2708            "svg",
2709            "table",
2710            "tbody",
2711            "td",
2712            "template",
2713            "textarea",
2714            "tfoot",
2715            "th",
2716            "thead",
2717            "tr",
2718            "track",
2719            "ul",
2720            "video",
2721        ];
2722
2723        let mut i = 0;
2724        while i < lines.len() {
2725            // Skip if already in code block or front matter
2726            if lines[i].in_code_block || lines[i].in_front_matter {
2727                i += 1;
2728                continue;
2729            }
2730
2731            let trimmed = lines[i].content(content).trim_start();
2732
2733            // Check if line starts with an HTML tag
2734            if trimmed.starts_with('<') && trimmed.len() > 1 {
2735                // Extract tag name safely
2736                let after_bracket = &trimmed[1..];
2737                let is_closing = after_bracket.starts_with('/');
2738                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2739
2740                // Extract tag name (stop at space, >, /, or end of string)
2741                let tag_name = tag_start
2742                    .chars()
2743                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2744                    .collect::<String>()
2745                    .to_lowercase();
2746
2747                // Check if it's a block element
2748                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2749                    // Mark this line as in HTML block
2750                    lines[i].in_html_block = true;
2751
2752                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2753                    // This avoids complex nesting logic that might cause infinite loops
2754                    if !is_closing {
2755                        let closing_tag = format!("</{tag_name}>");
2756                        // style and script tags can contain blank lines (CSS/JS formatting)
2757                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2758                        let mut j = i + 1;
2759                        let mut found_closing_tag = false;
2760                        while j < lines.len() && j < i + 100 {
2761                            // Limit search to 100 lines
2762                            // Stop at blank lines (except for style/script tags)
2763                            if !allow_blank_lines && lines[j].is_blank {
2764                                break;
2765                            }
2766
2767                            lines[j].in_html_block = true;
2768
2769                            // Check if this line contains the closing tag
2770                            if lines[j].content(content).contains(&closing_tag) {
2771                                found_closing_tag = true;
2772                            }
2773
2774                            // After finding closing tag, continue marking lines as
2775                            // in_html_block until blank line (per CommonMark spec)
2776                            if found_closing_tag {
2777                                j += 1;
2778                                // Continue marking subsequent lines until blank
2779                                while j < lines.len() && j < i + 100 {
2780                                    if lines[j].is_blank {
2781                                        break;
2782                                    }
2783                                    lines[j].in_html_block = true;
2784                                    j += 1;
2785                                }
2786                                break;
2787                            }
2788                            j += 1;
2789                        }
2790                    }
2791                }
2792            }
2793
2794            i += 1;
2795        }
2796    }
2797
2798    /// Detect ESM import/export blocks in MDX files
2799    /// ESM blocks consist of contiguous import/export statements at the top of the file
2800    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2801        // Only process MDX files
2802        if !flavor.supports_esm_blocks() {
2803            return;
2804        }
2805
2806        let mut in_multiline_comment = false;
2807
2808        for line in lines.iter_mut() {
2809            // Skip blank lines and HTML comments
2810            if line.is_blank || line.in_html_comment {
2811                continue;
2812            }
2813
2814            let trimmed = line.content(content).trim_start();
2815
2816            // Handle continuation of multi-line JS comments
2817            if in_multiline_comment {
2818                if trimmed.contains("*/") {
2819                    in_multiline_comment = false;
2820                }
2821                continue;
2822            }
2823
2824            // Skip single-line JS comments (// and ///)
2825            if trimmed.starts_with("//") {
2826                continue;
2827            }
2828
2829            // Handle start of multi-line JS comment
2830            if trimmed.starts_with("/*") {
2831                if !trimmed.contains("*/") {
2832                    in_multiline_comment = true;
2833                }
2834                continue;
2835            }
2836
2837            // Check if line starts with import or export
2838            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2839                line.in_esm_block = true;
2840            } else {
2841                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2842                break;
2843            }
2844        }
2845    }
2846
2847    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2848    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2849        let mut code_spans = Vec::new();
2850
2851        // Quick check - if no backticks, no code spans
2852        if !content.contains('`') {
2853            return code_spans;
2854        }
2855
2856        // Use pulldown-cmark's streaming parser with byte offsets
2857        let parser = Parser::new(content).into_offset_iter();
2858
2859        for (event, range) in parser {
2860            if let Event::Code(_) = event {
2861                let start_pos = range.start;
2862                let end_pos = range.end;
2863
2864                // The range includes the backticks, extract the actual content
2865                let full_span = &content[start_pos..end_pos];
2866                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2867
2868                // Extract content between backticks, preserving spaces
2869                let content_start = start_pos + backtick_count;
2870                let content_end = end_pos - backtick_count;
2871                let span_content = if content_start < content_end {
2872                    content[content_start..content_end].to_string()
2873                } else {
2874                    String::new()
2875                };
2876
2877                // Use binary search to find line number - O(log n) instead of O(n)
2878                // Find the rightmost line whose byte_offset <= start_pos
2879                let line_idx = lines
2880                    .partition_point(|line| line.byte_offset <= start_pos)
2881                    .saturating_sub(1);
2882                let line_num = line_idx + 1;
2883                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2884
2885                // Find end column using binary search
2886                let end_line_idx = lines
2887                    .partition_point(|line| line.byte_offset <= end_pos)
2888                    .saturating_sub(1);
2889                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2890
2891                // Convert byte offsets to character positions for correct Unicode handling
2892                // This ensures consistency with warning.column which uses character positions
2893                let line_content = lines[line_idx].content(content);
2894                let col_start = if byte_col_start <= line_content.len() {
2895                    line_content[..byte_col_start].chars().count()
2896                } else {
2897                    line_content.chars().count()
2898                };
2899
2900                let end_line_content = lines[end_line_idx].content(content);
2901                let col_end = if byte_col_end <= end_line_content.len() {
2902                    end_line_content[..byte_col_end].chars().count()
2903                } else {
2904                    end_line_content.chars().count()
2905                };
2906
2907                code_spans.push(CodeSpan {
2908                    line: line_num,
2909                    end_line: end_line_idx + 1,
2910                    start_col: col_start,
2911                    end_col: col_end,
2912                    byte_offset: start_pos,
2913                    byte_end: end_pos,
2914                    backtick_count,
2915                    content: span_content,
2916                });
2917            }
2918        }
2919
2920        // Sort by position to ensure consistent ordering
2921        code_spans.sort_by_key(|span| span.byte_offset);
2922
2923        code_spans
2924    }
2925
2926    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
2927    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
2928        let mut math_spans = Vec::new();
2929
2930        // Quick check - if no $ signs, no math spans
2931        if !content.contains('$') {
2932            return math_spans;
2933        }
2934
2935        // Use pulldown-cmark with ENABLE_MATH option
2936        let mut options = Options::empty();
2937        options.insert(Options::ENABLE_MATH);
2938        let parser = Parser::new_ext(content, options).into_offset_iter();
2939
2940        for (event, range) in parser {
2941            let (is_display, math_content) = match &event {
2942                Event::InlineMath(text) => (false, text.as_ref()),
2943                Event::DisplayMath(text) => (true, text.as_ref()),
2944                _ => continue,
2945            };
2946
2947            let start_pos = range.start;
2948            let end_pos = range.end;
2949
2950            // Use binary search to find line number - O(log n) instead of O(n)
2951            let line_idx = lines
2952                .partition_point(|line| line.byte_offset <= start_pos)
2953                .saturating_sub(1);
2954            let line_num = line_idx + 1;
2955            let byte_col_start = start_pos - lines[line_idx].byte_offset;
2956
2957            // Find end column using binary search
2958            let end_line_idx = lines
2959                .partition_point(|line| line.byte_offset <= end_pos)
2960                .saturating_sub(1);
2961            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2962
2963            // Convert byte offsets to character positions for correct Unicode handling
2964            let line_content = lines[line_idx].content(content);
2965            let col_start = if byte_col_start <= line_content.len() {
2966                line_content[..byte_col_start].chars().count()
2967            } else {
2968                line_content.chars().count()
2969            };
2970
2971            let end_line_content = lines[end_line_idx].content(content);
2972            let col_end = if byte_col_end <= end_line_content.len() {
2973                end_line_content[..byte_col_end].chars().count()
2974            } else {
2975                end_line_content.chars().count()
2976            };
2977
2978            math_spans.push(MathSpan {
2979                line: line_num,
2980                end_line: end_line_idx + 1,
2981                start_col: col_start,
2982                end_col: col_end,
2983                byte_offset: start_pos,
2984                byte_end: end_pos,
2985                is_display,
2986                content: math_content.to_string(),
2987            });
2988        }
2989
2990        // Sort by position to ensure consistent ordering
2991        math_spans.sort_by_key(|span| span.byte_offset);
2992
2993        math_spans
2994    }
2995
2996    /// Parse all list blocks in the content (legacy line-by-line approach)
2997    ///
2998    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2999    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3000    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3001    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3002    ///   treated as list continuation (based on the list marker width)
3003    ///
3004    /// When a new list item is encountered, we check if list-breaking content was seen
3005    /// since the last item. If so, we start a new list block.
3006    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3007        // Minimum indentation for unordered list continuation per CommonMark spec
3008        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3009
3010        /// Initialize or reset the forward-scanning tracking state.
3011        /// This helper eliminates code duplication across three initialization sites.
3012        #[inline]
3013        fn reset_tracking_state(
3014            list_item: &ListItemInfo,
3015            has_list_breaking_content: &mut bool,
3016            min_continuation: &mut usize,
3017        ) {
3018            *has_list_breaking_content = false;
3019            let marker_width = if list_item.is_ordered {
3020                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3021            } else {
3022                list_item.marker.len()
3023            };
3024            *min_continuation = if list_item.is_ordered {
3025                marker_width
3026            } else {
3027                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3028            };
3029        }
3030
3031        // Pre-size based on lines that could be list items
3032        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3033        let mut current_block: Option<ListBlock> = None;
3034        let mut last_list_item_line = 0;
3035        let mut current_indent_level = 0;
3036        let mut last_marker_width = 0;
3037
3038        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3039        let mut has_list_breaking_content_since_last_item = false;
3040        let mut min_continuation_for_tracking = 0;
3041
3042        for (line_idx, line_info) in lines.iter().enumerate() {
3043            let line_num = line_idx + 1;
3044
3045            // Enhanced code block handling using Design #3's context analysis
3046            if line_info.in_code_block {
3047                if let Some(ref mut block) = current_block {
3048                    // Calculate minimum indentation for list continuation
3049                    let min_continuation_indent =
3050                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3051
3052                    // Analyze code block context using the three-tier classification
3053                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3054
3055                    match context {
3056                        CodeBlockContext::Indented => {
3057                            // Code block is properly indented - continues the list
3058                            block.end_line = line_num;
3059                            continue;
3060                        }
3061                        CodeBlockContext::Standalone => {
3062                            // Code block separates lists - end current block
3063                            let completed_block = current_block.take().unwrap();
3064                            list_blocks.push(completed_block);
3065                            continue;
3066                        }
3067                        CodeBlockContext::Adjacent => {
3068                            // Edge case - use conservative behavior (continue list)
3069                            block.end_line = line_num;
3070                            continue;
3071                        }
3072                    }
3073                } else {
3074                    // No current list block - skip code block lines
3075                    continue;
3076                }
3077            }
3078
3079            // Extract blockquote prefix if any
3080            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3081                caps.get(0).unwrap().as_str().to_string()
3082            } else {
3083                String::new()
3084            };
3085
3086            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3087            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3088            if let Some(ref block) = current_block
3089                && line_info.list_item.is_none()
3090                && !line_info.is_blank
3091                && !line_info.in_code_span_continuation
3092            {
3093                let line_content = line_info.content(content).trim();
3094
3095                // Check for structural separators that break lists
3096                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3097                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3098                // as they indicate improper indentation rather than lazy continuation.
3099                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3100
3101                // Check if blockquote context changes (different prefix than current block)
3102                // Lines within the SAME blockquote context don't break lists
3103                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3104
3105                let breaks_list = line_info.heading.is_some()
3106                    || line_content.starts_with("---")
3107                    || line_content.starts_with("***")
3108                    || line_content.starts_with("___")
3109                    || crate::utils::skip_context::is_table_line(line_content)
3110                    || blockquote_prefix_changes
3111                    || (line_info.indent > 0
3112                        && line_info.indent < min_continuation_for_tracking
3113                        && !is_lazy_continuation);
3114
3115                if breaks_list {
3116                    has_list_breaking_content_since_last_item = true;
3117                }
3118            }
3119
3120            // If this line is a code span continuation within an active list block,
3121            // extend the block's end_line to include this line (maintains list continuity)
3122            if line_info.in_code_span_continuation
3123                && line_info.list_item.is_none()
3124                && let Some(ref mut block) = current_block
3125            {
3126                block.end_line = line_num;
3127            }
3128
3129            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3130            // properly indented lines within the list). This ensures the workaround at line 2448
3131            // works correctly when there are multiple continuation lines before a nested list item.
3132            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3133            // For blockquote lines, compute effective indent after stripping the prefix
3134            let effective_continuation_indent = if let Some(ref block) = current_block {
3135                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3136                let line_content = line_info.content(content);
3137                let line_bq_level = line_content
3138                    .chars()
3139                    .take_while(|c| *c == '>' || c.is_whitespace())
3140                    .filter(|&c| c == '>')
3141                    .count();
3142                if line_bq_level > 0 && line_bq_level == block_bq_level {
3143                    // Compute indent after blockquote markers
3144                    let mut pos = 0;
3145                    let mut found_markers = 0;
3146                    for c in line_content.chars() {
3147                        pos += c.len_utf8();
3148                        if c == '>' {
3149                            found_markers += 1;
3150                            if found_markers == line_bq_level {
3151                                if line_content.get(pos..pos + 1) == Some(" ") {
3152                                    pos += 1;
3153                                }
3154                                break;
3155                            }
3156                        }
3157                    }
3158                    let after_bq = &line_content[pos..];
3159                    after_bq.len() - after_bq.trim_start().len()
3160                } else {
3161                    line_info.indent
3162                }
3163            } else {
3164                line_info.indent
3165            };
3166            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3167                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3168                if block_bq_level > 0 {
3169                    if block.is_ordered { last_marker_width } else { 2 }
3170                } else {
3171                    min_continuation_for_tracking
3172                }
3173            } else {
3174                min_continuation_for_tracking
3175            };
3176            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3177                || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
3178
3179            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3180                eprintln!(
3181                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3182                    line_num,
3183                    effective_continuation_indent,
3184                    adjusted_min_continuation_for_tracking,
3185                    is_valid_continuation,
3186                    line_info.in_code_span_continuation,
3187                    line_info.in_code_block,
3188                    current_block.is_some()
3189                );
3190            }
3191
3192            if !line_info.in_code_span_continuation
3193                && line_info.list_item.is_none()
3194                && !line_info.is_blank
3195                && !line_info.in_code_block
3196                && is_valid_continuation
3197                && let Some(ref mut block) = current_block
3198            {
3199                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3200                    eprintln!(
3201                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3202                        line_num, block.end_line, line_num
3203                    );
3204                }
3205                block.end_line = line_num;
3206            }
3207
3208            // Check if this line is a list item
3209            if let Some(list_item) = &line_info.list_item {
3210                // Calculate nesting level based on indentation
3211                let item_indent = list_item.marker_column;
3212                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3213
3214                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3215                    eprintln!(
3216                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3217                        line_num, list_item.marker, item_indent
3218                    );
3219                }
3220
3221                if let Some(ref mut block) = current_block {
3222                    // Check if this continues the current block
3223                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3224                    // or a continuation at the same or lower level
3225                    let is_nested = nesting > block.nesting_level;
3226                    let same_type =
3227                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3228                    let same_context = block.blockquote_prefix == blockquote_prefix;
3229                    // Allow one blank line after last item, or lines immediately after block content
3230                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3231
3232                    // For unordered lists, also check marker consistency
3233                    let marker_compatible =
3234                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3235
3236                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3237                    // This eliminates the quadratic bottleneck from issue #148
3238                    let has_non_list_content = has_list_breaking_content_since_last_item;
3239
3240                    // A list continues if:
3241                    // 1. It's a nested item (indented more than the parent), OR
3242                    // 2. It's the same type at the same level with reasonable distance
3243                    let mut continues_list = if is_nested {
3244                        // Nested items always continue the list if they're in the same context
3245                        same_context && reasonable_distance && !has_non_list_content
3246                    } else {
3247                        // Same-level items need to match type and markers
3248                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3249                    };
3250
3251                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3252                        eprintln!(
3253                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3254                            line_num,
3255                            continues_list,
3256                            is_nested,
3257                            same_type,
3258                            same_context,
3259                            reasonable_distance,
3260                            marker_compatible,
3261                            has_non_list_content,
3262                            last_list_item_line,
3263                            block.end_line
3264                        );
3265                    }
3266
3267                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3268                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3269                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
3270                        // Check if the previous line was a list item or a continuation of a list item
3271                        // (including lazy continuation lines)
3272                        if block.item_lines.contains(&(line_num - 1)) {
3273                            // They're consecutive list items - force them to be in the same list
3274                            continues_list = true;
3275                        } else {
3276                            // Previous line is a continuation line within this block
3277                            // (e.g., lazy continuation with indent=0)
3278                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3279                            continues_list = true;
3280                        }
3281                    }
3282
3283                    if continues_list {
3284                        // Extend current block
3285                        block.end_line = line_num;
3286                        block.item_lines.push(line_num);
3287
3288                        // Update max marker width
3289                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3290                            list_item.marker.len() + 1
3291                        } else {
3292                            list_item.marker.len()
3293                        });
3294
3295                        // Update marker consistency for unordered lists
3296                        if !block.is_ordered
3297                            && block.marker.is_some()
3298                            && block.marker.as_ref() != Some(&list_item.marker)
3299                        {
3300                            // Mixed markers, clear the marker field
3301                            block.marker = None;
3302                        }
3303
3304                        // Reset tracked state for issue #148 optimization
3305                        reset_tracking_state(
3306                            list_item,
3307                            &mut has_list_breaking_content_since_last_item,
3308                            &mut min_continuation_for_tracking,
3309                        );
3310                    } else {
3311                        // End current block and start a new one
3312
3313                        list_blocks.push(block.clone());
3314
3315                        *block = ListBlock {
3316                            start_line: line_num,
3317                            end_line: line_num,
3318                            is_ordered: list_item.is_ordered,
3319                            marker: if list_item.is_ordered {
3320                                None
3321                            } else {
3322                                Some(list_item.marker.clone())
3323                            },
3324                            blockquote_prefix: blockquote_prefix.clone(),
3325                            item_lines: vec![line_num],
3326                            nesting_level: nesting,
3327                            max_marker_width: if list_item.is_ordered {
3328                                list_item.marker.len() + 1
3329                            } else {
3330                                list_item.marker.len()
3331                            },
3332                        };
3333
3334                        // Initialize tracked state for new block (issue #148 optimization)
3335                        reset_tracking_state(
3336                            list_item,
3337                            &mut has_list_breaking_content_since_last_item,
3338                            &mut min_continuation_for_tracking,
3339                        );
3340                    }
3341                } else {
3342                    // Start a new block
3343                    current_block = Some(ListBlock {
3344                        start_line: line_num,
3345                        end_line: line_num,
3346                        is_ordered: list_item.is_ordered,
3347                        marker: if list_item.is_ordered {
3348                            None
3349                        } else {
3350                            Some(list_item.marker.clone())
3351                        },
3352                        blockquote_prefix,
3353                        item_lines: vec![line_num],
3354                        nesting_level: nesting,
3355                        max_marker_width: list_item.marker.len(),
3356                    });
3357
3358                    // Initialize tracked state for new block (issue #148 optimization)
3359                    reset_tracking_state(
3360                        list_item,
3361                        &mut has_list_breaking_content_since_last_item,
3362                        &mut min_continuation_for_tracking,
3363                    );
3364                }
3365
3366                last_list_item_line = line_num;
3367                current_indent_level = item_indent;
3368                last_marker_width = if list_item.is_ordered {
3369                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3370                } else {
3371                    list_item.marker.len()
3372                };
3373            } else if let Some(ref mut block) = current_block {
3374                // Not a list item - check if it continues the current block
3375                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3376                    eprintln!(
3377                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
3378                        line_num, line_info.is_blank
3379                    );
3380                }
3381
3382                // For MD032 compatibility, we use a simple approach:
3383                // - Indented lines continue the list
3384                // - Blank lines followed by indented content continue the list
3385                // - Everything else ends the list
3386
3387                // Check if the last line in the list block ended with a backslash (hard line break)
3388                // This handles cases where list items use backslash for hard line breaks
3389                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3390                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3391                } else {
3392                    false
3393                };
3394
3395                // Calculate minimum indentation for list continuation
3396                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3397                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3398                let min_continuation_indent = if block.is_ordered {
3399                    current_indent_level + last_marker_width
3400                } else {
3401                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3402                };
3403
3404                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3405                    // Indented line or backslash continuation continues the list
3406                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3407                        eprintln!(
3408                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
3409                            line_num, line_info.indent, min_continuation_indent
3410                        );
3411                    }
3412                    block.end_line = line_num;
3413                } else if line_info.is_blank {
3414                    // Blank line - check if it's internal to the list or ending it
3415                    // We only include blank lines that are followed by more list content
3416                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3417                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
3418                    }
3419                    let mut check_idx = line_idx + 1;
3420                    let mut found_continuation = false;
3421
3422                    // Skip additional blank lines
3423                    while check_idx < lines.len() && lines[check_idx].is_blank {
3424                        check_idx += 1;
3425                    }
3426
3427                    if check_idx < lines.len() {
3428                        let next_line = &lines[check_idx];
3429                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
3430                        let next_content = next_line.content(content);
3431                        // Use blockquote level (count of >) to compare, not the full prefix
3432                        // This avoids issues where the regex captures extra whitespace
3433                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3434                        let next_bq_level_for_indent = next_content
3435                            .chars()
3436                            .take_while(|c| *c == '>' || c.is_whitespace())
3437                            .filter(|&c| c == '>')
3438                            .count();
3439                        let effective_indent =
3440                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
3441                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
3442                                // Find position after ">" and one space
3443                                let mut pos = 0;
3444                                let mut found_markers = 0;
3445                                for c in next_content.chars() {
3446                                    pos += c.len_utf8();
3447                                    if c == '>' {
3448                                        found_markers += 1;
3449                                        if found_markers == next_bq_level_for_indent {
3450                                            // Skip optional space after last >
3451                                            if next_content.get(pos..pos + 1) == Some(" ") {
3452                                                pos += 1;
3453                                            }
3454                                            break;
3455                                        }
3456                                    }
3457                                }
3458                                let after_blockquote_marker = &next_content[pos..];
3459                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
3460                            } else {
3461                                next_line.indent
3462                            };
3463                        // Also adjust min_continuation_indent for blockquote lists
3464                        // The marker_column includes blockquote prefix, so subtract it
3465                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
3466                            // For blockquote lists, the continuation is relative to blockquote content
3467                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
3468                            if block.is_ordered { last_marker_width } else { 2 }
3469                        } else {
3470                            min_continuation_indent
3471                        };
3472                        // Check if followed by indented content (list continuation)
3473                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3474                            eprintln!(
3475                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
3476                                line_num,
3477                                check_idx + 1,
3478                                effective_indent,
3479                                adjusted_min_continuation,
3480                                next_line.list_item.is_some(),
3481                                next_line.in_code_block
3482                            );
3483                        }
3484                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
3485                            found_continuation = true;
3486                        }
3487                        // Check if followed by another list item at the same level
3488                        else if !next_line.in_code_block
3489                            && next_line.list_item.is_some()
3490                            && let Some(item) = &next_line.list_item
3491                        {
3492                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3493                                .find(next_line.content(content))
3494                                .map_or(String::new(), |m| m.as_str().to_string());
3495                            if item.marker_column == current_indent_level
3496                                && item.is_ordered == block.is_ordered
3497                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3498                            {
3499                                // Check if there was meaningful content between the list items (unused now)
3500                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3501                                // Pre-compute block's blockquote level for use in closures
3502                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3503                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3504                                    if let Some(between_line) = lines.get(idx) {
3505                                        let between_content = between_line.content(content);
3506                                        let trimmed = between_content.trim();
3507                                        // Skip empty lines
3508                                        if trimmed.is_empty() {
3509                                            return false;
3510                                        }
3511                                        // Check for meaningful content
3512                                        let line_indent = between_content.len() - between_content.trim_start().len();
3513
3514                                        // Check if blockquote level changed (not just if line starts with ">")
3515                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3516                                            .find(between_content)
3517                                            .map_or(String::new(), |m| m.as_str().to_string());
3518                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
3519                                        let blockquote_level_changed =
3520                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
3521
3522                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
3523                                        if trimmed.starts_with("```")
3524                                            || trimmed.starts_with("~~~")
3525                                            || trimmed.starts_with("---")
3526                                            || trimmed.starts_with("***")
3527                                            || trimmed.starts_with("___")
3528                                            || blockquote_level_changed
3529                                            || crate::utils::skip_context::is_table_line(trimmed)
3530                                            || between_line.heading.is_some()
3531                                        {
3532                                            return true; // These are structural separators - meaningful content that breaks lists
3533                                        }
3534
3535                                        // Only properly indented content continues the list
3536                                        line_indent >= min_continuation_indent
3537                                    } else {
3538                                        false
3539                                    }
3540                                });
3541
3542                                if block.is_ordered {
3543                                    // For ordered lists: don't continue if there are structural separators
3544                                    // Check if there are structural separators between the list items
3545                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3546                                        if let Some(between_line) = lines.get(idx) {
3547                                            let between_content = between_line.content(content);
3548                                            let trimmed = between_content.trim();
3549                                            if trimmed.is_empty() {
3550                                                return false;
3551                                            }
3552                                            // Check if blockquote level changed (not just if line starts with ">")
3553                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3554                                                .find(between_content)
3555                                                .map_or(String::new(), |m| m.as_str().to_string());
3556                                            let between_bq_level =
3557                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
3558                                            let blockquote_level_changed =
3559                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
3560                                            // Check for structural separators that break lists
3561                                            trimmed.starts_with("```")
3562                                                || trimmed.starts_with("~~~")
3563                                                || trimmed.starts_with("---")
3564                                                || trimmed.starts_with("***")
3565                                                || trimmed.starts_with("___")
3566                                                || blockquote_level_changed
3567                                                || crate::utils::skip_context::is_table_line(trimmed)
3568                                                || between_line.heading.is_some()
3569                                        } else {
3570                                            false
3571                                        }
3572                                    });
3573                                    found_continuation = !has_structural_separators;
3574                                } else {
3575                                    // For unordered lists: also check for structural separators
3576                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3577                                        if let Some(between_line) = lines.get(idx) {
3578                                            let between_content = between_line.content(content);
3579                                            let trimmed = between_content.trim();
3580                                            if trimmed.is_empty() {
3581                                                return false;
3582                                            }
3583                                            // Check if blockquote level changed (not just if line starts with ">")
3584                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
3585                                                .find(between_content)
3586                                                .map_or(String::new(), |m| m.as_str().to_string());
3587                                            let between_bq_level =
3588                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
3589                                            let blockquote_level_changed =
3590                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
3591                                            // Check for structural separators that break lists
3592                                            trimmed.starts_with("```")
3593                                                || trimmed.starts_with("~~~")
3594                                                || trimmed.starts_with("---")
3595                                                || trimmed.starts_with("***")
3596                                                || trimmed.starts_with("___")
3597                                                || blockquote_level_changed
3598                                                || crate::utils::skip_context::is_table_line(trimmed)
3599                                                || between_line.heading.is_some()
3600                                        } else {
3601                                            false
3602                                        }
3603                                    });
3604                                    found_continuation = !has_structural_separators;
3605                                }
3606                            }
3607                        }
3608                    }
3609
3610                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3611                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
3612                    }
3613                    if found_continuation {
3614                        // Include the blank line in the block
3615                        block.end_line = line_num;
3616                    } else {
3617                        // Blank line ends the list - don't include it
3618                        list_blocks.push(block.clone());
3619                        current_block = None;
3620                    }
3621                } else {
3622                    // Check for lazy continuation - non-indented line immediately after a list item
3623                    // But only if the line has sufficient indentation for the list type
3624                    let min_required_indent = if block.is_ordered {
3625                        current_indent_level + last_marker_width
3626                    } else {
3627                        current_indent_level + 2
3628                    };
3629
3630                    // For lazy continuation to apply, the line must either:
3631                    // 1. Have no indentation (true lazy continuation)
3632                    // 2. Have sufficient indentation for the list type
3633                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3634                    let line_content = line_info.content(content).trim();
3635
3636                    // Check for table-like patterns
3637                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3638
3639                    // Check if blockquote level changed (not just if line starts with ">")
3640                    // Lines within the same blockquote level are NOT structural separators
3641                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3642                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3643                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3644
3645                    let is_structural_separator = line_info.heading.is_some()
3646                        || line_content.starts_with("```")
3647                        || line_content.starts_with("~~~")
3648                        || line_content.starts_with("---")
3649                        || line_content.starts_with("***")
3650                        || line_content.starts_with("___")
3651                        || blockquote_level_changed
3652                        || looks_like_table;
3653
3654                    // Allow lazy continuation if we're still within the same list block
3655                    // (not just immediately after a list item)
3656                    let is_lazy_continuation = !is_structural_separator
3657                        && !line_info.is_blank
3658                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3659
3660                    if is_lazy_continuation {
3661                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3662                        // it's probably not a continuation
3663                        // BUT: for blockquote lines with sufficient effective indent, always treat as continuation
3664                        let line_content_raw = line_info.content(content);
3665                        let block_bq_level_lazy = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3666                        let line_bq_level_lazy = line_content_raw
3667                            .chars()
3668                            .take_while(|c| *c == '>' || c.is_whitespace())
3669                            .filter(|&c| c == '>')
3670                            .count();
3671                        let has_proper_blockquote_indent =
3672                            if line_bq_level_lazy > 0 && line_bq_level_lazy == block_bq_level_lazy {
3673                                // Compute effective indent after blockquote markers
3674                                let mut pos = 0;
3675                                let mut found_markers = 0;
3676                                for c in line_content_raw.chars() {
3677                                    pos += c.len_utf8();
3678                                    if c == '>' {
3679                                        found_markers += 1;
3680                                        if found_markers == line_bq_level_lazy {
3681                                            if line_content_raw.get(pos..pos + 1) == Some(" ") {
3682                                                pos += 1;
3683                                            }
3684                                            break;
3685                                        }
3686                                    }
3687                                }
3688                                let after_bq = &line_content_raw[pos..];
3689                                let effective_indent_lazy = after_bq.len() - after_bq.trim_start().len();
3690                                let min_required_for_bq = if block.is_ordered { last_marker_width } else { 2 };
3691                                effective_indent_lazy >= min_required_for_bq
3692                            } else {
3693                                false
3694                            };
3695
3696                        // If it has proper blockquote indent, it's a continuation regardless of uppercase
3697                        if has_proper_blockquote_indent {
3698                            block.end_line = line_num;
3699                        } else {
3700                            let content_to_check = if !blockquote_prefix.is_empty() {
3701                                // Strip blockquote prefix to check the actual content
3702                                line_info
3703                                    .content(content)
3704                                    .strip_prefix(&blockquote_prefix)
3705                                    .unwrap_or(line_info.content(content))
3706                                    .trim()
3707                            } else {
3708                                line_info.content(content).trim()
3709                            };
3710
3711                            let starts_with_uppercase =
3712                                content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3713
3714                            // If it starts with uppercase and the previous line ended with punctuation,
3715                            // it's likely a new paragraph, not a continuation
3716                            if starts_with_uppercase && last_list_item_line > 0 {
3717                                // This looks like a new paragraph
3718                                list_blocks.push(block.clone());
3719                                current_block = None;
3720                            } else {
3721                                // This is a lazy continuation line
3722                                block.end_line = line_num;
3723                            }
3724                        }
3725                    } else {
3726                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3727                        list_blocks.push(block.clone());
3728                        current_block = None;
3729                    }
3730                }
3731            }
3732        }
3733
3734        // Don't forget the last block
3735        if let Some(block) = current_block {
3736            list_blocks.push(block);
3737        }
3738
3739        // Merge adjacent blocks that should be one
3740        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3741
3742        list_blocks
3743    }
3744
3745    /// Compute character frequency for fast content analysis
3746    fn compute_char_frequency(content: &str) -> CharFrequency {
3747        let mut frequency = CharFrequency::default();
3748
3749        for ch in content.chars() {
3750            match ch {
3751                '#' => frequency.hash_count += 1,
3752                '*' => frequency.asterisk_count += 1,
3753                '_' => frequency.underscore_count += 1,
3754                '-' => frequency.hyphen_count += 1,
3755                '+' => frequency.plus_count += 1,
3756                '>' => frequency.gt_count += 1,
3757                '|' => frequency.pipe_count += 1,
3758                '[' => frequency.bracket_count += 1,
3759                '`' => frequency.backtick_count += 1,
3760                '<' => frequency.lt_count += 1,
3761                '!' => frequency.exclamation_count += 1,
3762                '\n' => frequency.newline_count += 1,
3763                _ => {}
3764            }
3765        }
3766
3767        frequency
3768    }
3769
3770    /// Parse HTML tags in the content
3771    fn parse_html_tags(
3772        content: &str,
3773        lines: &[LineInfo],
3774        code_blocks: &[(usize, usize)],
3775        flavor: MarkdownFlavor,
3776    ) -> Vec<HtmlTag> {
3777        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3778            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3779
3780        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3781
3782        for cap in HTML_TAG_REGEX.captures_iter(content) {
3783            let full_match = cap.get(0).unwrap();
3784            let match_start = full_match.start();
3785            let match_end = full_match.end();
3786
3787            // Skip if in code block
3788            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3789                continue;
3790            }
3791
3792            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3793            let tag_name_original = cap.get(2).unwrap().as_str();
3794            let tag_name = tag_name_original.to_lowercase();
3795            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3796
3797            // Skip JSX components in MDX files (tags starting with uppercase letter)
3798            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3799            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3800                continue;
3801            }
3802
3803            // Find which line this tag is on
3804            let mut line_num = 1;
3805            let mut col_start = match_start;
3806            let mut col_end = match_end;
3807            for (idx, line_info) in lines.iter().enumerate() {
3808                if match_start >= line_info.byte_offset {
3809                    line_num = idx + 1;
3810                    col_start = match_start - line_info.byte_offset;
3811                    col_end = match_end - line_info.byte_offset;
3812                } else {
3813                    break;
3814                }
3815            }
3816
3817            html_tags.push(HtmlTag {
3818                line: line_num,
3819                start_col: col_start,
3820                end_col: col_end,
3821                byte_offset: match_start,
3822                byte_end: match_end,
3823                tag_name,
3824                is_closing,
3825                is_self_closing,
3826                raw_content: full_match.as_str().to_string(),
3827            });
3828        }
3829
3830        html_tags
3831    }
3832
3833    /// Parse table rows in the content
3834    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3835        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3836
3837        for (line_idx, line_info) in lines.iter().enumerate() {
3838            // Skip lines in code blocks or blank lines
3839            if line_info.in_code_block || line_info.is_blank {
3840                continue;
3841            }
3842
3843            let line = line_info.content(content);
3844            let line_num = line_idx + 1;
3845
3846            // Check if this line contains pipes (potential table row)
3847            if !line.contains('|') {
3848                continue;
3849            }
3850
3851            // Count columns by splitting on pipes
3852            let parts: Vec<&str> = line.split('|').collect();
3853            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3854
3855            // Check if this is a separator row
3856            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3857            let mut column_alignments = Vec::new();
3858
3859            if is_separator {
3860                for part in &parts[1..parts.len() - 1] {
3861                    // Skip first and last empty parts
3862                    let trimmed = part.trim();
3863                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3864                        "center".to_string()
3865                    } else if trimmed.ends_with(':') {
3866                        "right".to_string()
3867                    } else if trimmed.starts_with(':') {
3868                        "left".to_string()
3869                    } else {
3870                        "none".to_string()
3871                    };
3872                    column_alignments.push(alignment);
3873                }
3874            }
3875
3876            table_rows.push(TableRow {
3877                line: line_num,
3878                is_separator,
3879                column_count,
3880                column_alignments,
3881            });
3882        }
3883
3884        table_rows
3885    }
3886
3887    /// Parse bare URLs and emails in the content
3888    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3889        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3890
3891        // Check for bare URLs (not in angle brackets or markdown links)
3892        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3893            let full_match = cap.get(0).unwrap();
3894            let match_start = full_match.start();
3895            let match_end = full_match.end();
3896
3897            // Skip if in code block
3898            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3899                continue;
3900            }
3901
3902            // Skip if already in angle brackets or markdown links
3903            let preceding_char = if match_start > 0 {
3904                content.chars().nth(match_start - 1)
3905            } else {
3906                None
3907            };
3908            let following_char = content.chars().nth(match_end);
3909
3910            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3911                continue;
3912            }
3913            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3914                continue;
3915            }
3916
3917            let url = full_match.as_str();
3918            let url_type = if url.starts_with("https://") {
3919                "https"
3920            } else if url.starts_with("http://") {
3921                "http"
3922            } else if url.starts_with("ftp://") {
3923                "ftp"
3924            } else {
3925                "other"
3926            };
3927
3928            // Find which line this URL is on
3929            let mut line_num = 1;
3930            let mut col_start = match_start;
3931            let mut col_end = match_end;
3932            for (idx, line_info) in lines.iter().enumerate() {
3933                if match_start >= line_info.byte_offset {
3934                    line_num = idx + 1;
3935                    col_start = match_start - line_info.byte_offset;
3936                    col_end = match_end - line_info.byte_offset;
3937                } else {
3938                    break;
3939                }
3940            }
3941
3942            bare_urls.push(BareUrl {
3943                line: line_num,
3944                start_col: col_start,
3945                end_col: col_end,
3946                byte_offset: match_start,
3947                byte_end: match_end,
3948                url: url.to_string(),
3949                url_type: url_type.to_string(),
3950            });
3951        }
3952
3953        // Check for bare email addresses
3954        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3955            let full_match = cap.get(0).unwrap();
3956            let match_start = full_match.start();
3957            let match_end = full_match.end();
3958
3959            // Skip if in code block
3960            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3961                continue;
3962            }
3963
3964            // Skip if already in angle brackets or markdown links
3965            let preceding_char = if match_start > 0 {
3966                content.chars().nth(match_start - 1)
3967            } else {
3968                None
3969            };
3970            let following_char = content.chars().nth(match_end);
3971
3972            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3973                continue;
3974            }
3975            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3976                continue;
3977            }
3978
3979            let email = full_match.as_str();
3980
3981            // Find which line this email is on
3982            let mut line_num = 1;
3983            let mut col_start = match_start;
3984            let mut col_end = match_end;
3985            for (idx, line_info) in lines.iter().enumerate() {
3986                if match_start >= line_info.byte_offset {
3987                    line_num = idx + 1;
3988                    col_start = match_start - line_info.byte_offset;
3989                    col_end = match_end - line_info.byte_offset;
3990                } else {
3991                    break;
3992                }
3993            }
3994
3995            bare_urls.push(BareUrl {
3996                line: line_num,
3997                start_col: col_start,
3998                end_col: col_end,
3999                byte_offset: match_start,
4000                byte_end: match_end,
4001                url: email.to_string(),
4002                url_type: "email".to_string(),
4003            });
4004        }
4005
4006        bare_urls
4007    }
4008
4009    /// Get an iterator over valid CommonMark headings
4010    ///
4011    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4012    /// that should be flagged by MD018 but should not be processed by other heading rules.
4013    ///
4014    /// # Examples
4015    ///
4016    /// ```rust
4017    /// use rumdl_lib::lint_context::LintContext;
4018    /// use rumdl_lib::config::MarkdownFlavor;
4019    ///
4020    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4021    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4022    ///
4023    /// for heading in ctx.valid_headings() {
4024    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4025    /// }
4026    /// // Only prints valid headings, skips `#NoSpace`
4027    /// ```
4028    #[must_use]
4029    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4030        ValidHeadingsIter::new(&self.lines)
4031    }
4032
4033    /// Check if the document contains any valid CommonMark headings
4034    ///
4035    /// Returns `true` if there is at least one heading with proper space after `#`.
4036    #[must_use]
4037    pub fn has_valid_headings(&self) -> bool {
4038        self.lines
4039            .iter()
4040            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4041    }
4042}
4043
4044/// Merge adjacent list blocks that should be treated as one
4045fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4046    if list_blocks.len() < 2 {
4047        return;
4048    }
4049
4050    let mut merger = ListBlockMerger::new(content, lines);
4051    *list_blocks = merger.merge(list_blocks);
4052}
4053
4054/// Helper struct to manage the complex logic of merging list blocks
4055struct ListBlockMerger<'a> {
4056    content: &'a str,
4057    lines: &'a [LineInfo],
4058}
4059
4060impl<'a> ListBlockMerger<'a> {
4061    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4062        Self { content, lines }
4063    }
4064
4065    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4066        let mut merged = Vec::with_capacity(list_blocks.len());
4067        let mut current = list_blocks[0].clone();
4068
4069        for next in list_blocks.iter().skip(1) {
4070            if self.should_merge_blocks(&current, next) {
4071                current = self.merge_two_blocks(current, next);
4072            } else {
4073                merged.push(current);
4074                current = next.clone();
4075            }
4076        }
4077
4078        merged.push(current);
4079        merged
4080    }
4081
4082    /// Determine if two adjacent list blocks should be merged
4083    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4084        // Basic compatibility checks
4085        if !self.blocks_are_compatible(current, next) {
4086            return false;
4087        }
4088
4089        // Check spacing and content between blocks
4090        let spacing = self.analyze_spacing_between(current, next);
4091        match spacing {
4092            BlockSpacing::Consecutive => true,
4093            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4094            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4095                self.can_merge_with_content_between(current, next)
4096            }
4097        }
4098    }
4099
4100    /// Check if blocks have compatible structure for merging
4101    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4102        current.is_ordered == next.is_ordered
4103            && current.blockquote_prefix == next.blockquote_prefix
4104            && current.nesting_level == next.nesting_level
4105    }
4106
4107    /// Analyze the spacing between two list blocks
4108    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4109        let gap = next.start_line - current.end_line;
4110
4111        match gap {
4112            1 => BlockSpacing::Consecutive,
4113            2 => BlockSpacing::SingleBlank,
4114            _ if gap > 2 => {
4115                if self.has_only_blank_lines_between(current, next) {
4116                    BlockSpacing::MultipleBlanks
4117                } else {
4118                    BlockSpacing::ContentBetween
4119                }
4120            }
4121            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4122        }
4123    }
4124
4125    /// Check if unordered lists can be merged with a single blank line between
4126    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4127        // Check if there are structural separators between the blocks
4128        // If has_meaningful_content_between returns true, it means there are structural separators
4129        if has_meaningful_content_between(self.content, current, next, self.lines) {
4130            return false; // Structural separators prevent merging
4131        }
4132
4133        // Only merge unordered lists with same marker across single blank
4134        !current.is_ordered && current.marker == next.marker
4135    }
4136
4137    /// Check if ordered lists can be merged when there's content between them
4138    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4139        // Do not merge lists if there are structural separators between them
4140        if has_meaningful_content_between(self.content, current, next, self.lines) {
4141            return false; // Structural separators prevent merging
4142        }
4143
4144        // Only consider merging ordered lists if there's no structural content between
4145        current.is_ordered && next.is_ordered
4146    }
4147
4148    /// Check if there are only blank lines between blocks
4149    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4150        for line_num in (current.end_line + 1)..next.start_line {
4151            if let Some(line_info) = self.lines.get(line_num - 1)
4152                && !line_info.content(self.content).trim().is_empty()
4153            {
4154                return false;
4155            }
4156        }
4157        true
4158    }
4159
4160    /// Merge two compatible list blocks into one
4161    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4162        current.end_line = next.end_line;
4163        current.item_lines.extend_from_slice(&next.item_lines);
4164
4165        // Update max marker width
4166        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4167
4168        // Handle marker consistency for unordered lists
4169        if !current.is_ordered && self.markers_differ(&current, next) {
4170            current.marker = None; // Mixed markers
4171        }
4172
4173        current
4174    }
4175
4176    /// Check if two blocks have different markers
4177    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4178        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4179    }
4180}
4181
4182/// Types of spacing between list blocks
4183#[derive(Debug, PartialEq)]
4184enum BlockSpacing {
4185    Consecutive,    // No gap between blocks
4186    SingleBlank,    // One blank line between blocks
4187    MultipleBlanks, // Multiple blank lines but no content
4188    ContentBetween, // Content exists between blocks
4189}
4190
4191/// Check if there's meaningful content (not just blank lines) between two list blocks
4192fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4193    // Check lines between current.end_line and next.start_line
4194    for line_num in (current.end_line + 1)..next.start_line {
4195        if let Some(line_info) = lines.get(line_num - 1) {
4196            // Convert to 0-indexed
4197            let trimmed = line_info.content(content).trim();
4198
4199            // Skip empty lines
4200            if trimmed.is_empty() {
4201                continue;
4202            }
4203
4204            // Check for structural separators that should separate lists (CommonMark compliant)
4205
4206            // Headings separate lists
4207            if line_info.heading.is_some() {
4208                return true; // Has meaningful content - headings separate lists
4209            }
4210
4211            // Horizontal rules separate lists (---, ***, ___)
4212            if is_horizontal_rule(trimmed) {
4213                return true; // Has meaningful content - horizontal rules separate lists
4214            }
4215
4216            // Tables separate lists
4217            if crate::utils::skip_context::is_table_line(trimmed) {
4218                return true; // Has meaningful content - tables separate lists
4219            }
4220
4221            // Blockquotes separate lists
4222            if trimmed.starts_with('>') {
4223                return true; // Has meaningful content - blockquotes separate lists
4224            }
4225
4226            // Code block fences separate lists (unless properly indented as list content)
4227            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4228                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4229
4230                // Check if this code block is properly indented as list continuation
4231                let min_continuation_indent = if current.is_ordered {
4232                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4233                } else {
4234                    current.nesting_level + 2
4235                };
4236
4237                if line_indent < min_continuation_indent {
4238                    // This is a standalone code block that separates lists
4239                    return true; // Has meaningful content - standalone code blocks separate lists
4240                }
4241            }
4242
4243            // Check if this line has proper indentation for list continuation
4244            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4245
4246            // Calculate minimum indentation needed to be list continuation
4247            let min_indent = if current.is_ordered {
4248                current.nesting_level + current.max_marker_width
4249            } else {
4250                current.nesting_level + 2
4251            };
4252
4253            // If the line is not indented enough to be list continuation, it's meaningful content
4254            if line_indent < min_indent {
4255                return true; // Has meaningful content - content not indented as list continuation
4256            }
4257
4258            // If we reach here, the line is properly indented as list continuation
4259            // Continue checking other lines
4260        }
4261    }
4262
4263    // Only blank lines or properly indented list continuation content between blocks
4264    false
4265}
4266
4267/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4268/// CommonMark rules for thematic breaks (horizontal rules):
4269/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4270/// - Must have 3+ of the same character (-, *, or _)
4271/// - May have spaces between characters
4272/// - No other characters allowed
4273pub fn is_horizontal_rule_line(line: &str) -> bool {
4274    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4275    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4276    if leading_spaces > 3 || line.starts_with('\t') {
4277        return false;
4278    }
4279
4280    is_horizontal_rule_content(line.trim())
4281}
4282
4283/// Check if trimmed content matches horizontal rule pattern.
4284/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4285pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4286    if trimmed.len() < 3 {
4287        return false;
4288    }
4289
4290    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4291    let chars: Vec<char> = trimmed.chars().collect();
4292    if let Some(&first_char) = chars.first()
4293        && (first_char == '-' || first_char == '*' || first_char == '_')
4294    {
4295        let mut count = 0;
4296        for &ch in &chars {
4297            if ch == first_char {
4298                count += 1;
4299            } else if ch != ' ' && ch != '\t' {
4300                return false; // Non-matching, non-whitespace character
4301            }
4302        }
4303        return count >= 3;
4304    }
4305    false
4306}
4307
4308/// Backwards-compatible alias for `is_horizontal_rule_content`
4309pub fn is_horizontal_rule(trimmed: &str) -> bool {
4310    is_horizontal_rule_content(trimmed)
4311}
4312
4313/// Check if content contains patterns that cause the markdown crate to panic
4314#[cfg(test)]
4315mod tests {
4316    use super::*;
4317
4318    #[test]
4319    fn test_empty_content() {
4320        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4321        assert_eq!(ctx.content, "");
4322        assert_eq!(ctx.line_offsets, vec![0]);
4323        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4324        assert_eq!(ctx.lines.len(), 0);
4325    }
4326
4327    #[test]
4328    fn test_single_line() {
4329        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4330        assert_eq!(ctx.content, "# Hello");
4331        assert_eq!(ctx.line_offsets, vec![0]);
4332        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4333        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4334    }
4335
4336    #[test]
4337    fn test_multi_line() {
4338        let content = "# Title\n\nSecond line\nThird line";
4339        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4340        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4341        // Test offset to line/col
4342        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4343        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4344        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4345        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4346        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4347    }
4348
4349    #[test]
4350    fn test_line_info() {
4351        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
4352        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4353
4354        // Test line info
4355        assert_eq!(ctx.lines.len(), 7);
4356
4357        // Line 1: "# Title"
4358        let line1 = &ctx.lines[0];
4359        assert_eq!(line1.content(ctx.content), "# Title");
4360        assert_eq!(line1.byte_offset, 0);
4361        assert_eq!(line1.indent, 0);
4362        assert!(!line1.is_blank);
4363        assert!(!line1.in_code_block);
4364        assert!(line1.list_item.is_none());
4365
4366        // Line 2: "    indented"
4367        let line2 = &ctx.lines[1];
4368        assert_eq!(line2.content(ctx.content), "    indented");
4369        assert_eq!(line2.byte_offset, 8);
4370        assert_eq!(line2.indent, 4);
4371        assert!(!line2.is_blank);
4372
4373        // Line 3: "" (blank)
4374        let line3 = &ctx.lines[2];
4375        assert_eq!(line3.content(ctx.content), "");
4376        assert!(line3.is_blank);
4377
4378        // Test helper methods
4379        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
4380        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
4381        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
4382        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
4383    }
4384
4385    #[test]
4386    fn test_list_item_detection() {
4387        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
4388        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4389
4390        // Line 1: "- Unordered item"
4391        let line1 = &ctx.lines[0];
4392        assert!(line1.list_item.is_some());
4393        let list1 = line1.list_item.as_ref().unwrap();
4394        assert_eq!(list1.marker, "-");
4395        assert!(!list1.is_ordered);
4396        assert_eq!(list1.marker_column, 0);
4397        assert_eq!(list1.content_column, 2);
4398
4399        // Line 2: "  * Nested item"
4400        let line2 = &ctx.lines[1];
4401        assert!(line2.list_item.is_some());
4402        let list2 = line2.list_item.as_ref().unwrap();
4403        assert_eq!(list2.marker, "*");
4404        assert_eq!(list2.marker_column, 2);
4405
4406        // Line 3: "1. Ordered item"
4407        let line3 = &ctx.lines[2];
4408        assert!(line3.list_item.is_some());
4409        let list3 = line3.list_item.as_ref().unwrap();
4410        assert_eq!(list3.marker, "1.");
4411        assert!(list3.is_ordered);
4412        assert_eq!(list3.number, Some(1));
4413
4414        // Line 6: "Not a list"
4415        let line6 = &ctx.lines[5];
4416        assert!(line6.list_item.is_none());
4417    }
4418
4419    #[test]
4420    fn test_offset_to_line_col_edge_cases() {
4421        let content = "a\nb\nc";
4422        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4423        // line_offsets: [0, 2, 4]
4424        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4425        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4426        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4427        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4428        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4429        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4430    }
4431
4432    #[test]
4433    fn test_mdx_esm_blocks() {
4434        let content = r##"import {Chart} from './snowfall.js'
4435export const year = 2023
4436
4437# Last year's snowfall
4438
4439In {year}, the snowfall was above average.
4440It was followed by a warm spring which caused
4441flood conditions in many of the nearby rivers.
4442
4443<Chart color="#fcb32c" year={year} />
4444"##;
4445
4446        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4447
4448        // Check that lines 1 and 2 are marked as ESM blocks
4449        assert_eq!(ctx.lines.len(), 10);
4450        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4451        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4452        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4453        assert!(
4454            !ctx.lines[3].in_esm_block,
4455            "Line 4 (heading) should NOT be in_esm_block"
4456        );
4457        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4458        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4459    }
4460
4461    #[test]
4462    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4463        let content = r#"import {Chart} from './snowfall.js'
4464export const year = 2023
4465
4466# Last year's snowfall
4467"#;
4468
4469        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4470
4471        // ESM blocks should NOT be detected in Standard flavor
4472        assert!(
4473            !ctx.lines[0].in_esm_block,
4474            "Line 1 should NOT be in_esm_block in Standard flavor"
4475        );
4476        assert!(
4477            !ctx.lines[1].in_esm_block,
4478            "Line 2 should NOT be in_esm_block in Standard flavor"
4479        );
4480    }
4481
4482    #[test]
4483    fn test_blockquote_with_indented_content() {
4484        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
4485        // The content inside the blockquote may also be detected as a code block (which is correct),
4486        // but for MD046 purposes, we need to know the line is inside a blockquote.
4487        let content = r#"# Heading
4488
4489>      -S socket-path
4490>                    More text
4491"#;
4492        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4493
4494        // Line 3 (index 2) should be detected as blockquote
4495        assert!(
4496            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
4497            "Line 3 should be a blockquote"
4498        );
4499        // Line 4 (index 3) should also be blockquote
4500        assert!(
4501            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
4502            "Line 4 should be a blockquote"
4503        );
4504
4505        // Verify blockquote content is correctly parsed
4506        // Note: spaces_after includes the spaces between `>` and content
4507        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
4508        assert_eq!(bq3.content, "-S socket-path");
4509        assert_eq!(bq3.nesting_level, 1);
4510        // 6 spaces after the `>` marker
4511        assert!(bq3.has_multiple_spaces_after_marker);
4512
4513        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
4514        assert_eq!(bq4.content, "More text");
4515        assert_eq!(bq4.nesting_level, 1);
4516    }
4517
4518    #[test]
4519    fn test_footnote_definitions_not_parsed_as_reference_defs() {
4520        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
4521        let content = r#"# Title
4522
4523A footnote[^1].
4524
4525[^1]: This is the footnote content.
4526
4527[^note]: Another footnote with [link](https://example.com).
4528
4529[regular]: ./path.md "A real reference definition"
4530"#;
4531        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4532
4533        // Should only have one reference definition (the regular one)
4534        assert_eq!(
4535            ctx.reference_defs.len(),
4536            1,
4537            "Footnotes should not be parsed as reference definitions"
4538        );
4539
4540        // The only reference def should be the regular one
4541        assert_eq!(ctx.reference_defs[0].id, "regular");
4542        assert_eq!(ctx.reference_defs[0].url, "./path.md");
4543        assert_eq!(
4544            ctx.reference_defs[0].title,
4545            Some("A real reference definition".to_string())
4546        );
4547    }
4548
4549    #[test]
4550    fn test_footnote_with_inline_link_not_misidentified() {
4551        // Regression test for issue #286: footnote containing an inline link
4552        // was incorrectly parsed as a reference definition with URL "[link](url)"
4553        let content = r#"# Title
4554
4555A footnote[^1].
4556
4557[^1]: [link](https://www.google.com).
4558"#;
4559        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4560
4561        // Should have no reference definitions
4562        assert!(
4563            ctx.reference_defs.is_empty(),
4564            "Footnote with inline link should not create a reference definition"
4565        );
4566    }
4567
4568    #[test]
4569    fn test_various_footnote_formats_excluded() {
4570        // Test various footnote ID formats are all excluded
4571        let content = r#"[^1]: Numeric footnote
4572[^note]: Named footnote
4573[^a]: Single char footnote
4574[^long-footnote-name]: Long named footnote
4575[^123abc]: Mixed alphanumeric
4576
4577[ref1]: ./file1.md
4578[ref2]: ./file2.md
4579"#;
4580        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4581
4582        // Should only have the two regular reference definitions
4583        assert_eq!(
4584            ctx.reference_defs.len(),
4585            2,
4586            "Only regular reference definitions should be parsed"
4587        );
4588
4589        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
4590        assert!(ids.contains(&"ref1"));
4591        assert!(ids.contains(&"ref2"));
4592        assert!(!ids.iter().any(|id| id.starts_with('^')));
4593    }
4594
4595    // =========================================================================
4596    // Tests for has_char and char_count methods
4597    // =========================================================================
4598
4599    #[test]
4600    fn test_has_char_tracked_characters() {
4601        // Test all 12 tracked characters
4602        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
4603        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4604
4605        // All tracked characters should be detected
4606        assert!(ctx.has_char('#'), "Should detect hash");
4607        assert!(ctx.has_char('*'), "Should detect asterisk");
4608        assert!(ctx.has_char('_'), "Should detect underscore");
4609        assert!(ctx.has_char('-'), "Should detect hyphen");
4610        assert!(ctx.has_char('+'), "Should detect plus");
4611        assert!(ctx.has_char('>'), "Should detect gt");
4612        assert!(ctx.has_char('|'), "Should detect pipe");
4613        assert!(ctx.has_char('['), "Should detect bracket");
4614        assert!(ctx.has_char('`'), "Should detect backtick");
4615        assert!(ctx.has_char('<'), "Should detect lt");
4616        assert!(ctx.has_char('!'), "Should detect exclamation");
4617        assert!(ctx.has_char('\n'), "Should detect newline");
4618    }
4619
4620    #[test]
4621    fn test_has_char_absent_characters() {
4622        let content = "Simple text without special chars";
4623        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4624
4625        // None of the tracked characters should be present
4626        assert!(!ctx.has_char('#'), "Should not detect hash");
4627        assert!(!ctx.has_char('*'), "Should not detect asterisk");
4628        assert!(!ctx.has_char('_'), "Should not detect underscore");
4629        assert!(!ctx.has_char('-'), "Should not detect hyphen");
4630        assert!(!ctx.has_char('+'), "Should not detect plus");
4631        assert!(!ctx.has_char('>'), "Should not detect gt");
4632        assert!(!ctx.has_char('|'), "Should not detect pipe");
4633        assert!(!ctx.has_char('['), "Should not detect bracket");
4634        assert!(!ctx.has_char('`'), "Should not detect backtick");
4635        assert!(!ctx.has_char('<'), "Should not detect lt");
4636        assert!(!ctx.has_char('!'), "Should not detect exclamation");
4637        // Note: single line content has no newlines
4638        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
4639    }
4640
4641    #[test]
4642    fn test_has_char_fallback_for_untracked() {
4643        let content = "Text with @mention and $dollar and %percent";
4644        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4645
4646        // Untracked characters should fall back to content.contains()
4647        assert!(ctx.has_char('@'), "Should detect @ via fallback");
4648        assert!(ctx.has_char('$'), "Should detect $ via fallback");
4649        assert!(ctx.has_char('%'), "Should detect % via fallback");
4650        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
4651    }
4652
4653    #[test]
4654    fn test_char_count_tracked_characters() {
4655        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
4656        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4657
4658        // Count each tracked character
4659        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
4660        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
4661        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
4662        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
4663        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
4664        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
4665        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
4666        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
4667        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
4668        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
4669        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
4670        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
4671    }
4672
4673    #[test]
4674    fn test_char_count_zero_for_absent() {
4675        let content = "Plain text";
4676        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4677
4678        assert_eq!(ctx.char_count('#'), 0);
4679        assert_eq!(ctx.char_count('*'), 0);
4680        assert_eq!(ctx.char_count('_'), 0);
4681        assert_eq!(ctx.char_count('\n'), 0);
4682    }
4683
4684    #[test]
4685    fn test_char_count_fallback_for_untracked() {
4686        let content = "@@@ $$ %%%";
4687        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4688
4689        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
4690        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
4691        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
4692        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
4693    }
4694
4695    #[test]
4696    fn test_char_count_empty_content() {
4697        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4698
4699        assert_eq!(ctx.char_count('#'), 0);
4700        assert_eq!(ctx.char_count('*'), 0);
4701        assert_eq!(ctx.char_count('@'), 0);
4702        assert!(!ctx.has_char('#'));
4703        assert!(!ctx.has_char('@'));
4704    }
4705
4706    // =========================================================================
4707    // Tests for is_in_html_tag method
4708    // =========================================================================
4709
4710    #[test]
4711    fn test_is_in_html_tag_simple() {
4712        let content = "<div>content</div>";
4713        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4714
4715        // Inside opening tag
4716        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
4717        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
4718        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
4719
4720        // Outside tag (in content)
4721        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
4722        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
4723
4724        // Inside closing tag
4725        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
4726        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
4727    }
4728
4729    #[test]
4730    fn test_is_in_html_tag_self_closing() {
4731        let content = "Text <br/> more text";
4732        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4733
4734        // Before tag
4735        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
4736        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
4737
4738        // Inside self-closing tag
4739        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
4740        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
4741        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
4742
4743        // After tag
4744        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
4745    }
4746
4747    #[test]
4748    fn test_is_in_html_tag_with_attributes() {
4749        let content = r#"<a href="url" class="link">text</a>"#;
4750        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4751
4752        // All positions inside opening tag with attributes
4753        assert!(ctx.is_in_html_tag(0), "Start of tag");
4754        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
4755        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
4756        assert!(ctx.is_in_html_tag(26), "End of opening tag");
4757
4758        // Content between tags
4759        assert!(!ctx.is_in_html_tag(27), "Start of content");
4760        assert!(!ctx.is_in_html_tag(30), "End of content");
4761
4762        // Closing tag
4763        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
4764    }
4765
4766    #[test]
4767    fn test_is_in_html_tag_multiline() {
4768        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
4769        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4770
4771        // Opening tag spans multiple lines
4772        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
4773        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
4774        assert!(ctx.is_in_html_tag(15), "Inside attribute");
4775
4776        // After closing > of opening tag
4777        let closing_bracket_pos = content.find(">\n").unwrap();
4778        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
4779    }
4780
4781    #[test]
4782    fn test_is_in_html_tag_no_tags() {
4783        let content = "Plain text without any HTML";
4784        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4785
4786        // No position should be in an HTML tag
4787        for i in 0..content.len() {
4788            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
4789        }
4790    }
4791
4792    // =========================================================================
4793    // Tests for is_in_jinja_range method
4794    // =========================================================================
4795
4796    #[test]
4797    fn test_is_in_jinja_range_expression() {
4798        let content = "Hello {{ name }}!";
4799        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4800
4801        // Before Jinja
4802        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
4803        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
4804
4805        // Inside Jinja expression (positions 6-15 for "{{ name }}")
4806        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
4807        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
4808        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
4809        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
4810        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
4811
4812        // After Jinja
4813        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
4814    }
4815
4816    #[test]
4817    fn test_is_in_jinja_range_statement() {
4818        let content = "{% if condition %}content{% endif %}";
4819        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4820
4821        // Inside opening statement
4822        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
4823        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
4824        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
4825
4826        // Content between
4827        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
4828
4829        // Inside closing statement
4830        assert!(ctx.is_in_jinja_range(25), "Start of endif");
4831        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
4832    }
4833
4834    #[test]
4835    fn test_is_in_jinja_range_multiple() {
4836        let content = "{{ a }} and {{ b }}";
4837        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4838
4839        // First Jinja expression
4840        assert!(ctx.is_in_jinja_range(0));
4841        assert!(ctx.is_in_jinja_range(3));
4842        assert!(ctx.is_in_jinja_range(6));
4843
4844        // Between expressions
4845        assert!(!ctx.is_in_jinja_range(8));
4846        assert!(!ctx.is_in_jinja_range(11));
4847
4848        // Second Jinja expression
4849        assert!(ctx.is_in_jinja_range(12));
4850        assert!(ctx.is_in_jinja_range(15));
4851        assert!(ctx.is_in_jinja_range(18));
4852    }
4853
4854    #[test]
4855    fn test_is_in_jinja_range_no_jinja() {
4856        let content = "Plain text with single braces but not Jinja";
4857        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4858
4859        // No position should be in Jinja
4860        for i in 0..content.len() {
4861            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
4862        }
4863    }
4864
4865    // =========================================================================
4866    // Tests for is_in_link_title method
4867    // =========================================================================
4868
4869    #[test]
4870    fn test_is_in_link_title_with_title() {
4871        let content = r#"[ref]: https://example.com "Title text"
4872
4873Some content."#;
4874        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4875
4876        // Verify we have a reference def with title
4877        assert_eq!(ctx.reference_defs.len(), 1);
4878        let def = &ctx.reference_defs[0];
4879        assert!(def.title_byte_start.is_some());
4880        assert!(def.title_byte_end.is_some());
4881
4882        let title_start = def.title_byte_start.unwrap();
4883        let title_end = def.title_byte_end.unwrap();
4884
4885        // Before title (in URL)
4886        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
4887
4888        // Inside title
4889        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
4890        assert!(
4891            ctx.is_in_link_title(title_start + 5),
4892            "Middle of title should be in title"
4893        );
4894        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
4895
4896        // After title
4897        assert!(
4898            !ctx.is_in_link_title(title_end),
4899            "After title end should not be in title"
4900        );
4901    }
4902
4903    #[test]
4904    fn test_is_in_link_title_without_title() {
4905        let content = "[ref]: https://example.com\n\nSome content.";
4906        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4907
4908        // Reference def without title
4909        assert_eq!(ctx.reference_defs.len(), 1);
4910        let def = &ctx.reference_defs[0];
4911        assert!(def.title_byte_start.is_none());
4912        assert!(def.title_byte_end.is_none());
4913
4914        // No position should be in a title
4915        for i in 0..content.len() {
4916            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
4917        }
4918    }
4919
4920    #[test]
4921    fn test_is_in_link_title_multiple_refs() {
4922        let content = r#"[ref1]: /url1 "Title One"
4923[ref2]: /url2
4924[ref3]: /url3 "Title Three"
4925"#;
4926        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4927
4928        // Should have 3 reference defs
4929        assert_eq!(ctx.reference_defs.len(), 3);
4930
4931        // ref1 has title
4932        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
4933        assert!(ref1.title_byte_start.is_some());
4934
4935        // ref2 has no title
4936        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
4937        assert!(ref2.title_byte_start.is_none());
4938
4939        // ref3 has title
4940        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
4941        assert!(ref3.title_byte_start.is_some());
4942
4943        // Check positions in ref1's title
4944        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
4945            assert!(ctx.is_in_link_title(start + 1));
4946            assert!(!ctx.is_in_link_title(end + 5));
4947        }
4948
4949        // Check positions in ref3's title
4950        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
4951            assert!(ctx.is_in_link_title(start + 1));
4952        }
4953    }
4954
4955    #[test]
4956    fn test_is_in_link_title_single_quotes() {
4957        let content = "[ref]: /url 'Single quoted title'\n";
4958        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4959
4960        assert_eq!(ctx.reference_defs.len(), 1);
4961        let def = &ctx.reference_defs[0];
4962
4963        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
4964            assert!(ctx.is_in_link_title(start));
4965            assert!(ctx.is_in_link_title(start + 5));
4966            assert!(!ctx.is_in_link_title(end));
4967        }
4968    }
4969
4970    #[test]
4971    fn test_is_in_link_title_parentheses() {
4972        // Note: The reference def parser may not support parenthesized titles
4973        // This test verifies the is_in_link_title method works when titles exist
4974        let content = "[ref]: /url (Parenthesized title)\n";
4975        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4976
4977        // Parser behavior: may or may not parse parenthesized titles
4978        // We test that is_in_link_title correctly reflects whatever was parsed
4979        if ctx.reference_defs.is_empty() {
4980            // Parser didn't recognize this as a reference def
4981            for i in 0..content.len() {
4982                assert!(!ctx.is_in_link_title(i));
4983            }
4984        } else {
4985            let def = &ctx.reference_defs[0];
4986            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
4987                assert!(ctx.is_in_link_title(start));
4988                assert!(ctx.is_in_link_title(start + 5));
4989                assert!(!ctx.is_in_link_title(end));
4990            } else {
4991                // Title wasn't parsed, so no position should be in title
4992                for i in 0..content.len() {
4993                    assert!(!ctx.is_in_link_title(i));
4994                }
4995            }
4996        }
4997    }
4998
4999    #[test]
5000    fn test_is_in_link_title_no_refs() {
5001        let content = "Just plain text without any reference definitions.";
5002        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5003
5004        assert!(ctx.reference_defs.is_empty());
5005
5006        for i in 0..content.len() {
5007            assert!(!ctx.is_in_link_title(i));
5008        }
5009    }
5010
5011    // =========================================================================
5012    // Math span tests (Issue #289)
5013    // =========================================================================
5014
5015    #[test]
5016    fn test_math_spans_inline() {
5017        let content = "Text with inline math $[f](x)$ in it.";
5018        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5019
5020        let math_spans = ctx.math_spans();
5021        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5022
5023        let span = &math_spans[0];
5024        assert!(!span.is_display, "Should be inline math, not display");
5025        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5026    }
5027
5028    #[test]
5029    fn test_math_spans_display_single_line() {
5030        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5031        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5032
5033        let math_spans = ctx.math_spans();
5034        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5035
5036        let span = &math_spans[0];
5037        assert!(span.is_display, "Should be display math");
5038        assert!(
5039            span.content.contains("[x](\\zeta)"),
5040            "Content should contain the link-like pattern"
5041        );
5042    }
5043
5044    #[test]
5045    fn test_math_spans_display_multiline() {
5046        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5047        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5048
5049        let math_spans = ctx.math_spans();
5050        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5051
5052        let span = &math_spans[0];
5053        assert!(span.is_display, "Should be display math");
5054    }
5055
5056    #[test]
5057    fn test_is_in_math_span() {
5058        let content = "Text $[f](x)$ more text";
5059        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5060
5061        // Position inside the math span
5062        let math_start = content.find('$').unwrap();
5063        let math_end = content.rfind('$').unwrap() + 1;
5064
5065        assert!(
5066            ctx.is_in_math_span(math_start + 1),
5067            "Position inside math span should return true"
5068        );
5069        assert!(
5070            ctx.is_in_math_span(math_start + 3),
5071            "Position inside math span should return true"
5072        );
5073
5074        // Position outside the math span
5075        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5076        assert!(
5077            !ctx.is_in_math_span(math_end + 1),
5078            "Position after math span should return false"
5079        );
5080    }
5081
5082    #[test]
5083    fn test_math_spans_mixed_with_code() {
5084        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5085        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5086
5087        let math_spans = ctx.math_spans();
5088        let code_spans = ctx.code_spans();
5089
5090        assert_eq!(math_spans.len(), 1, "Should have one math span");
5091        assert_eq!(code_spans.len(), 1, "Should have one code span");
5092
5093        // Verify math span content
5094        assert_eq!(math_spans[0].content, "[f](x)");
5095        // Verify code span content
5096        assert_eq!(code_spans[0].content, "[g](y)");
5097    }
5098
5099    #[test]
5100    fn test_math_spans_no_math() {
5101        let content = "Regular text without any math at all.";
5102        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5103
5104        let math_spans = ctx.math_spans();
5105        assert!(math_spans.is_empty(), "Should have no math spans");
5106    }
5107
5108    #[test]
5109    fn test_math_spans_multiple() {
5110        let content = "First $a$ and second $b$ and display $$c$$";
5111        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5112
5113        let math_spans = ctx.math_spans();
5114        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5115
5116        // Two inline, one display
5117        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5118        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5119
5120        assert_eq!(inline_count, 2, "Should have two inline math spans");
5121        assert_eq!(display_count, 1, "Should have one display math span");
5122    }
5123
5124    #[test]
5125    fn test_is_in_math_span_boundary_positions() {
5126        // Test exact boundary positions: $[f](x)$
5127        // Byte positions:                0123456789
5128        let content = "$[f](x)$";
5129        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5130
5131        let math_spans = ctx.math_spans();
5132        assert_eq!(math_spans.len(), 1, "Should have one math span");
5133
5134        let span = &math_spans[0];
5135
5136        // Position at opening $ should be in span (byte 0)
5137        assert!(
5138            ctx.is_in_math_span(span.byte_offset),
5139            "Start position should be in span"
5140        );
5141
5142        // Position just inside should be in span
5143        assert!(
5144            ctx.is_in_math_span(span.byte_offset + 1),
5145            "Position after start should be in span"
5146        );
5147
5148        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5149        assert!(
5150            ctx.is_in_math_span(span.byte_end - 1),
5151            "Position at end-1 should be in span"
5152        );
5153
5154        // Position at byte_end should NOT be in span (exclusive end)
5155        assert!(
5156            !ctx.is_in_math_span(span.byte_end),
5157            "Position at byte_end should NOT be in span (exclusive)"
5158        );
5159    }
5160
5161    #[test]
5162    fn test_math_spans_at_document_start() {
5163        let content = "$x$ text";
5164        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5165
5166        let math_spans = ctx.math_spans();
5167        assert_eq!(math_spans.len(), 1);
5168        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5169    }
5170
5171    #[test]
5172    fn test_math_spans_at_document_end() {
5173        let content = "text $x$";
5174        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5175
5176        let math_spans = ctx.math_spans();
5177        assert_eq!(math_spans.len(), 1);
5178        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5179    }
5180
5181    #[test]
5182    fn test_math_spans_consecutive() {
5183        let content = "$a$$b$";
5184        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5185
5186        let math_spans = ctx.math_spans();
5187        // pulldown-cmark should parse these as separate spans
5188        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5189
5190        // All positions should be in some math span
5191        for i in 0..content.len() {
5192            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5193        }
5194    }
5195
5196    #[test]
5197    fn test_math_spans_currency_not_math() {
5198        // Unbalanced $ should not create math spans
5199        let content = "Price is $100";
5200        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5201
5202        let math_spans = ctx.math_spans();
5203        // pulldown-cmark requires balanced delimiters for math
5204        // $100 alone is not math
5205        assert!(
5206            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5207            "Unbalanced $ should not create math span containing 100"
5208        );
5209    }
5210}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs