Skip to main content

rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::mkdocs_html_markdown::MarkdownHtmlTracker;
7use crate::utils::regex_cache::URL_SIMPLE_REGEX;
8use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
9use regex::Regex;
10use std::borrow::Cow;
11use std::collections::HashMap;
12use std::path::PathBuf;
13use std::sync::LazyLock;
14
15/// Macro for profiling sections - only active in non-WASM builds
16#[cfg(not(target_arch = "wasm32"))]
17macro_rules! profile_section {
18    ($name:expr, $profile:expr, $code:expr) => {{
19        let start = std::time::Instant::now();
20        let result = $code;
21        if $profile {
22            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
23        }
24        result
25    }};
26}
27
28#[cfg(target_arch = "wasm32")]
29macro_rules! profile_section {
30    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
31}
32
33// Comprehensive link pattern that captures both inline and reference links
34// Use (?s) flag to make . match newlines
35static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36    Regex::new(
37        r#"(?sx)
38        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
39        (?:
40            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
41            |
42            \[([^\]]*)\]      # Reference ID in group 6
43        )"#
44    ).unwrap()
45});
46
47// Image pattern (similar to links but with ! prefix)
48// Use (?s) flag to make . match newlines
49static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
50    Regex::new(
51        r#"(?sx)
52        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
53        (?:
54            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
55            |
56            \[([^\]]*)\]      # Reference ID in group 6
57        )"#
58    ).unwrap()
59});
60
61// Reference definition pattern
62static REF_DEF_PATTERN: LazyLock<Regex> =
63    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
64
65// Pattern for bare URLs - uses centralized URL pattern from regex_cache
66
67// Pattern for email addresses
68static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71// Pattern for blockquote prefix in parse_list_blocks
72static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74/// Pre-computed information about a line
75#[derive(Debug, Clone)]
76pub struct LineInfo {
77    /// Byte offset where this line starts in the document
78    pub byte_offset: usize,
79    /// Length of the line in bytes (without newline)
80    pub byte_len: usize,
81    /// Number of bytes of leading whitespace (for substring extraction)
82    pub indent: usize,
83    /// Visual column width of leading whitespace (with proper tab expansion)
84    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
85    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
86    pub visual_indent: usize,
87    /// Whether the line is blank (empty or only whitespace)
88    pub is_blank: bool,
89    /// Whether this line is inside a code block
90    pub in_code_block: bool,
91    /// Whether this line is inside front matter
92    pub in_front_matter: bool,
93    /// Whether this line is inside an HTML block
94    pub in_html_block: bool,
95    /// Whether this line is inside an HTML comment
96    pub in_html_comment: bool,
97    /// List item information if this line starts a list item
98    pub list_item: Option<ListItemInfo>,
99    /// Heading information if this line is a heading
100    pub heading: Option<HeadingInfo>,
101    /// Blockquote information if this line is a blockquote
102    pub blockquote: Option<BlockquoteInfo>,
103    /// Whether this line is inside a mkdocstrings autodoc block
104    pub in_mkdocstrings: bool,
105    /// Whether this line is part of an ESM import/export block (MDX only)
106    pub in_esm_block: bool,
107    /// Whether this line is a continuation of a multi-line code span from a previous line
108    pub in_code_span_continuation: bool,
109    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
110    /// Pre-computed for consistent detection across all rules
111    pub is_horizontal_rule: bool,
112    /// Whether this line is inside a math block ($$ ... $$)
113    pub in_math_block: bool,
114    /// Whether this line is inside a Quarto div block (::: ... :::)
115    pub in_quarto_div: bool,
116    /// Whether this line contains or is inside a JSX expression (MDX only)
117    pub in_jsx_expression: bool,
118    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
119    pub in_mdx_comment: bool,
120    /// Whether this line is inside a JSX component (MDX only)
121    pub in_jsx_component: bool,
122    /// Whether this line is inside a JSX fragment (MDX only)
123    pub in_jsx_fragment: bool,
124    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
125    pub in_admonition: bool,
126    /// Whether this line is inside an MkDocs content tab block (===)
127    pub in_content_tab: bool,
128    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
129    pub in_mkdocs_html_markdown: bool,
130    /// Whether this line is a definition list item (: definition)
131    pub in_definition_list: bool,
132    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
133    pub in_obsidian_comment: bool,
134    /// Whether this line is inside a PyMdown Blocks region (/// ... ///, MkDocs flavor only)
135    pub in_pymdown_block: bool,
136}
137
138impl LineInfo {
139    /// Get the line content as a string slice from the source document
140    pub fn content<'a>(&self, source: &'a str) -> &'a str {
141        &source[self.byte_offset..self.byte_offset + self.byte_len]
142    }
143
144    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
145    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
146    /// but in MkDocs flavor it's actually container content that should be preserved.
147    #[inline]
148    pub fn in_mkdocs_container(&self) -> bool {
149        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
150    }
151}
152
153/// Grouped byte ranges for skip context detection
154/// Used to reduce parameter count in internal functions
155struct SkipByteRanges<'a> {
156    html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
157    autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
158    quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
159    pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
160}
161
162/// Information about a list item
163#[derive(Debug, Clone)]
164pub struct ListItemInfo {
165    /// The marker used (*, -, +, or number with . or ))
166    pub marker: String,
167    /// Whether it's ordered (true) or unordered (false)
168    pub is_ordered: bool,
169    /// The number for ordered lists
170    pub number: Option<usize>,
171    /// Column where the marker starts (0-based)
172    pub marker_column: usize,
173    /// Column where content after marker starts
174    pub content_column: usize,
175}
176
177/// Heading style type
178#[derive(Debug, Clone, PartialEq)]
179pub enum HeadingStyle {
180    /// ATX style heading (# Heading)
181    ATX,
182    /// Setext style heading with = underline
183    Setext1,
184    /// Setext style heading with - underline
185    Setext2,
186}
187
188/// Parsed link information
189#[derive(Debug, Clone)]
190pub struct ParsedLink<'a> {
191    /// Line number (1-indexed)
192    pub line: usize,
193    /// Start column (0-indexed) in the line
194    pub start_col: usize,
195    /// End column (0-indexed) in the line
196    pub end_col: usize,
197    /// Byte offset in document
198    pub byte_offset: usize,
199    /// End byte offset in document
200    pub byte_end: usize,
201    /// Link text
202    pub text: Cow<'a, str>,
203    /// Link URL or reference
204    pub url: Cow<'a, str>,
205    /// Whether this is a reference link `[text][ref]` vs inline `[text](url)`
206    pub is_reference: bool,
207    /// Reference ID for reference links
208    pub reference_id: Option<Cow<'a, str>>,
209    /// Link type from pulldown-cmark
210    pub link_type: LinkType,
211}
212
213/// Information about a broken link reported by pulldown-cmark
214#[derive(Debug, Clone)]
215pub struct BrokenLinkInfo {
216    /// The reference text that couldn't be resolved
217    pub reference: String,
218    /// Byte span in the source document
219    pub span: std::ops::Range<usize>,
220}
221
222/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
223#[derive(Debug, Clone)]
224pub struct FootnoteRef {
225    /// The footnote ID (without the ^ prefix)
226    pub id: String,
227    /// Line number (1-indexed)
228    pub line: usize,
229    /// Start byte offset in document
230    pub byte_offset: usize,
231    /// End byte offset in document
232    pub byte_end: usize,
233}
234
235/// Parsed image information
236#[derive(Debug, Clone)]
237pub struct ParsedImage<'a> {
238    /// Line number (1-indexed)
239    pub line: usize,
240    /// Start column (0-indexed) in the line
241    pub start_col: usize,
242    /// End column (0-indexed) in the line
243    pub end_col: usize,
244    /// Byte offset in document
245    pub byte_offset: usize,
246    /// End byte offset in document
247    pub byte_end: usize,
248    /// Alt text
249    pub alt_text: Cow<'a, str>,
250    /// Image URL or reference
251    pub url: Cow<'a, str>,
252    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
253    pub is_reference: bool,
254    /// Reference ID for reference images
255    pub reference_id: Option<Cow<'a, str>>,
256    /// Link type from pulldown-cmark
257    pub link_type: LinkType,
258}
259
260/// Reference definition `[ref]: url "title"`
261#[derive(Debug, Clone)]
262pub struct ReferenceDef {
263    /// Line number (1-indexed)
264    pub line: usize,
265    /// Reference ID (normalized to lowercase)
266    pub id: String,
267    /// URL
268    pub url: String,
269    /// Optional title
270    pub title: Option<String>,
271    /// Byte offset where the reference definition starts
272    pub byte_offset: usize,
273    /// Byte offset where the reference definition ends
274    pub byte_end: usize,
275    /// Byte offset where the title starts (if present, includes quote)
276    pub title_byte_start: Option<usize>,
277    /// Byte offset where the title ends (if present, includes quote)
278    pub title_byte_end: Option<usize>,
279}
280
281/// Parsed code span information
282#[derive(Debug, Clone)]
283pub struct CodeSpan {
284    /// Line number where the code span starts (1-indexed)
285    pub line: usize,
286    /// Line number where the code span ends (1-indexed)
287    pub end_line: usize,
288    /// Start column (0-indexed) in the line
289    pub start_col: usize,
290    /// End column (0-indexed) in the line
291    pub end_col: usize,
292    /// Byte offset in document
293    pub byte_offset: usize,
294    /// End byte offset in document
295    pub byte_end: usize,
296    /// Number of backticks used (1, 2, 3, etc.)
297    pub backtick_count: usize,
298    /// Content inside the code span (without backticks)
299    pub content: String,
300}
301
302/// Parsed math span information (inline $...$ or display $$...$$)
303#[derive(Debug, Clone)]
304pub struct MathSpan {
305    /// Line number where the math span starts (1-indexed)
306    pub line: usize,
307    /// Line number where the math span ends (1-indexed)
308    pub end_line: usize,
309    /// Start column (0-indexed) in the line
310    pub start_col: usize,
311    /// End column (0-indexed) in the line
312    pub end_col: usize,
313    /// Byte offset in document
314    pub byte_offset: usize,
315    /// End byte offset in document
316    pub byte_end: usize,
317    /// Whether this is display math ($$...$$) vs inline ($...$)
318    pub is_display: bool,
319    /// Content inside the math delimiters
320    pub content: String,
321}
322
323/// Information about a heading
324#[derive(Debug, Clone)]
325pub struct HeadingInfo {
326    /// Heading level (1-6 for ATX, 1-2 for Setext)
327    pub level: u8,
328    /// Style of heading
329    pub style: HeadingStyle,
330    /// The heading marker (# characters or underline)
331    pub marker: String,
332    /// Column where the marker starts (0-based)
333    pub marker_column: usize,
334    /// Column where heading text starts
335    pub content_column: usize,
336    /// The heading text (without markers and without custom ID syntax)
337    pub text: String,
338    /// Custom header ID if present (e.g., from {#custom-id} syntax)
339    pub custom_id: Option<String>,
340    /// Original heading text including custom ID syntax
341    pub raw_text: String,
342    /// Whether it has a closing sequence (for ATX)
343    pub has_closing_sequence: bool,
344    /// The closing sequence if present
345    pub closing_sequence: String,
346    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
347    /// False for malformed headings like `#NoSpace` that MD018 should flag
348    pub is_valid: bool,
349}
350
351/// A valid heading from a filtered iteration
352///
353/// Only includes headings that are CommonMark-compliant (have space after #).
354/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
355#[derive(Debug, Clone)]
356pub struct ValidHeading<'a> {
357    /// The 1-indexed line number in the document
358    pub line_num: usize,
359    /// Reference to the heading information
360    pub heading: &'a HeadingInfo,
361    /// Reference to the full line info (for rules that need additional context)
362    pub line_info: &'a LineInfo,
363}
364
365/// Iterator over valid CommonMark headings in a document
366///
367/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
368/// but should not be processed by other heading rules.
369pub struct ValidHeadingsIter<'a> {
370    lines: &'a [LineInfo],
371    current_index: usize,
372}
373
374impl<'a> ValidHeadingsIter<'a> {
375    fn new(lines: &'a [LineInfo]) -> Self {
376        Self {
377            lines,
378            current_index: 0,
379        }
380    }
381}
382
383impl<'a> Iterator for ValidHeadingsIter<'a> {
384    type Item = ValidHeading<'a>;
385
386    fn next(&mut self) -> Option<Self::Item> {
387        while self.current_index < self.lines.len() {
388            let idx = self.current_index;
389            self.current_index += 1;
390
391            let line_info = &self.lines[idx];
392            if let Some(heading) = &line_info.heading
393                && heading.is_valid
394            {
395                return Some(ValidHeading {
396                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
397                    heading,
398                    line_info,
399                });
400            }
401        }
402        None
403    }
404}
405
406/// Information about a blockquote line
407#[derive(Debug, Clone)]
408pub struct BlockquoteInfo {
409    /// Nesting level (1 for >, 2 for >>, etc.)
410    pub nesting_level: usize,
411    /// The indentation before the blockquote marker
412    pub indent: String,
413    /// Column where the first > starts (0-based)
414    pub marker_column: usize,
415    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
416    pub prefix: String,
417    /// Content after the blockquote marker(s)
418    pub content: String,
419    /// Whether the line has no space after the marker
420    pub has_no_space_after_marker: bool,
421    /// Whether the line has multiple spaces after the marker
422    pub has_multiple_spaces_after_marker: bool,
423    /// Whether this is an empty blockquote line needing MD028 fix
424    pub needs_md028_fix: bool,
425}
426
427/// Information about a list block
428#[derive(Debug, Clone)]
429pub struct ListBlock {
430    /// Line number where the list starts (1-indexed)
431    pub start_line: usize,
432    /// Line number where the list ends (1-indexed)
433    pub end_line: usize,
434    /// Whether it's ordered or unordered
435    pub is_ordered: bool,
436    /// The consistent marker for unordered lists (if any)
437    pub marker: Option<String>,
438    /// Blockquote prefix for this list (empty if not in blockquote)
439    pub blockquote_prefix: String,
440    /// Lines that are list items within this block
441    pub item_lines: Vec<usize>,
442    /// Nesting level (0 for top-level lists)
443    pub nesting_level: usize,
444    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
445    pub max_marker_width: usize,
446}
447
448use std::sync::{Arc, OnceLock};
449
450/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
451type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
452
453/// Type alias for byte ranges used in JSX expression and MDX comment detection
454type ByteRanges = Vec<(usize, usize)>;
455
456/// Character frequency data for fast content analysis
457#[derive(Debug, Clone, Default)]
458pub struct CharFrequency {
459    /// Count of # characters (headings)
460    pub hash_count: usize,
461    /// Count of * characters (emphasis, lists, horizontal rules)
462    pub asterisk_count: usize,
463    /// Count of _ characters (emphasis, horizontal rules)
464    pub underscore_count: usize,
465    /// Count of - characters (lists, horizontal rules, setext headings)
466    pub hyphen_count: usize,
467    /// Count of + characters (lists)
468    pub plus_count: usize,
469    /// Count of > characters (blockquotes)
470    pub gt_count: usize,
471    /// Count of | characters (tables)
472    pub pipe_count: usize,
473    /// Count of [ characters (links, images)
474    pub bracket_count: usize,
475    /// Count of ` characters (code spans, code blocks)
476    pub backtick_count: usize,
477    /// Count of < characters (HTML tags, autolinks)
478    pub lt_count: usize,
479    /// Count of ! characters (images)
480    pub exclamation_count: usize,
481    /// Count of newline characters
482    pub newline_count: usize,
483}
484
485/// Pre-parsed HTML tag information
486#[derive(Debug, Clone)]
487pub struct HtmlTag {
488    /// Line number (1-indexed)
489    pub line: usize,
490    /// Start column (0-indexed) in the line
491    pub start_col: usize,
492    /// End column (0-indexed) in the line
493    pub end_col: usize,
494    /// Byte offset in document
495    pub byte_offset: usize,
496    /// End byte offset in document
497    pub byte_end: usize,
498    /// Tag name (e.g., "div", "img", "br")
499    pub tag_name: String,
500    /// Whether it's a closing tag (`</tag>`)
501    pub is_closing: bool,
502    /// Whether it's self-closing (`<tag />`)
503    pub is_self_closing: bool,
504    /// Raw tag content
505    pub raw_content: String,
506}
507
508/// Pre-parsed emphasis span information
509#[derive(Debug, Clone)]
510pub struct EmphasisSpan {
511    /// Line number (1-indexed)
512    pub line: usize,
513    /// Start column (0-indexed) in the line
514    pub start_col: usize,
515    /// End column (0-indexed) in the line
516    pub end_col: usize,
517    /// Byte offset in document
518    pub byte_offset: usize,
519    /// End byte offset in document
520    pub byte_end: usize,
521    /// Type of emphasis ('*' or '_')
522    pub marker: char,
523    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
524    pub marker_count: usize,
525    /// Content inside the emphasis
526    pub content: String,
527}
528
529/// Pre-parsed table row information
530#[derive(Debug, Clone)]
531pub struct TableRow {
532    /// Line number (1-indexed)
533    pub line: usize,
534    /// Whether this is a separator row (contains only |, -, :, and spaces)
535    pub is_separator: bool,
536    /// Number of columns (pipe-separated cells)
537    pub column_count: usize,
538    /// Alignment info from separator row
539    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
540}
541
542/// Pre-parsed bare URL information (not in links)
543#[derive(Debug, Clone)]
544pub struct BareUrl {
545    /// Line number (1-indexed)
546    pub line: usize,
547    /// Start column (0-indexed) in the line
548    pub start_col: usize,
549    /// End column (0-indexed) in the line
550    pub end_col: usize,
551    /// Byte offset in document
552    pub byte_offset: usize,
553    /// End byte offset in document
554    pub byte_end: usize,
555    /// The URL string
556    pub url: String,
557    /// Type of URL ("http", "https", "ftp", "email")
558    pub url_type: String,
559}
560
561pub struct LintContext<'a> {
562    pub content: &'a str,
563    pub line_offsets: Vec<usize>,
564    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
565    pub lines: Vec<LineInfo>,             // Pre-computed line information
566    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
567    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
568    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
569    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
570    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
571    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
572    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
573    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
574    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
575    pub char_frequency: CharFrequency,    // Character frequency analysis
576    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
577    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
578    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
579    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
580    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
581    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
582    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
583    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
584    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
585    pub flavor: MarkdownFlavor,           // Markdown flavor being used
586    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
587    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
588    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
589    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
590    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
591    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
592    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
593}
594
595/// Detailed blockquote parse result with all components
596struct BlockquoteComponents<'a> {
597    indent: &'a str,
598    markers: &'a str,
599    spaces_after: &'a str,
600    content: &'a str,
601}
602
603/// Parse blockquote prefix with detailed components using manual parsing
604#[inline]
605fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
606    let bytes = line.as_bytes();
607    let mut pos = 0;
608
609    // Parse leading whitespace (indent)
610    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
611        pos += 1;
612    }
613    let indent_end = pos;
614
615    // Must have at least one '>' marker
616    if pos >= bytes.len() || bytes[pos] != b'>' {
617        return None;
618    }
619
620    // Parse '>' markers
621    while pos < bytes.len() && bytes[pos] == b'>' {
622        pos += 1;
623    }
624    let markers_end = pos;
625
626    // Parse spaces after markers
627    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
628        pos += 1;
629    }
630    let spaces_end = pos;
631
632    Some(BlockquoteComponents {
633        indent: &line[0..indent_end],
634        markers: &line[indent_end..markers_end],
635        spaces_after: &line[markers_end..spaces_end],
636        content: &line[spaces_end..],
637    })
638}
639
640impl<'a> LintContext<'a> {
641    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
642        #[cfg(not(target_arch = "wasm32"))]
643        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
644        #[cfg(target_arch = "wasm32")]
645        let profile = false;
646
647        let line_offsets = profile_section!("Line offsets", profile, {
648            let mut offsets = vec![0];
649            for (i, c) in content.char_indices() {
650                if c == '\n' {
651                    offsets.push(i + 1);
652                }
653            }
654            offsets
655        });
656
657        // Detect code blocks and code spans once and cache them
658        let (code_blocks, code_span_ranges) = profile_section!(
659            "Code blocks",
660            profile,
661            CodeBlockUtils::detect_code_blocks_and_spans(content)
662        );
663
664        // Pre-compute HTML comment ranges ONCE for all operations
665        let html_comment_ranges = profile_section!(
666            "HTML comment ranges",
667            profile,
668            crate::utils::skip_context::compute_html_comment_ranges(content)
669        );
670
671        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
672        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
673            if flavor == MarkdownFlavor::MkDocs {
674                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
675            } else {
676                Vec::new()
677            }
678        });
679
680        // Pre-compute Quarto div block ranges for Quarto flavor
681        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
682            if flavor == MarkdownFlavor::Quarto {
683                crate::utils::quarto_divs::detect_div_block_ranges(content)
684            } else {
685                Vec::new()
686            }
687        });
688
689        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
690        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
691            if flavor == MarkdownFlavor::MkDocs {
692                crate::utils::pymdown_blocks::detect_block_ranges(content)
693            } else {
694                Vec::new()
695            }
696        });
697
698        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
699        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
700        let skip_ranges = SkipByteRanges {
701            html_comment_ranges: &html_comment_ranges,
702            autodoc_ranges: &autodoc_ranges,
703            quarto_div_ranges: &quarto_div_ranges,
704            pymdown_block_ranges: &pymdown_block_ranges,
705        };
706        let (mut lines, emphasis_spans) = profile_section!(
707            "Basic line info",
708            profile,
709            Self::compute_basic_line_info(content, &line_offsets, &code_blocks, flavor, &skip_ranges,)
710        );
711
712        // Detect HTML blocks BEFORE heading detection
713        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
714
715        // Detect ESM import/export blocks in MDX files BEFORE heading detection
716        profile_section!(
717            "ESM blocks",
718            profile,
719            Self::detect_esm_blocks(content, &mut lines, flavor)
720        );
721
722        // Detect JSX expressions and MDX comments in MDX files
723        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
724            "JSX/MDX detection",
725            profile,
726            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
727        );
728
729        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
730        profile_section!(
731            "MkDocs constructs",
732            profile,
733            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
734        );
735
736        // Detect Obsidian comments (%%...%%) in Obsidian flavor
737        let obsidian_comment_ranges = profile_section!(
738            "Obsidian comments",
739            profile,
740            Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
741        );
742
743        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
744        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
745
746        // Now detect headings and blockquotes
747        profile_section!(
748            "Headings & blockquotes",
749            profile,
750            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
751        );
752
753        // Parse code spans early so we can exclude them from link/image parsing
754        let code_spans = profile_section!(
755            "Code spans",
756            profile,
757            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
758        );
759
760        // Mark lines that are continuations of multi-line code spans
761        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
762        for span in &code_spans {
763            if span.end_line > span.line {
764                // Mark lines after the first line as continuations
765                for line_num in (span.line + 1)..=span.end_line {
766                    if let Some(line_info) = lines.get_mut(line_num - 1) {
767                        line_info.in_code_span_continuation = true;
768                    }
769                }
770            }
771        }
772
773        // Parse links, images, references, and list blocks
774        let (links, broken_links, footnote_refs) = profile_section!(
775            "Links",
776            profile,
777            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
778        );
779
780        let images = profile_section!(
781            "Images",
782            profile,
783            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
784        );
785
786        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
787
788        // Build O(1) lookup map for reference definitions by lowercase ID
789        let reference_defs_map: HashMap<String, usize> = reference_defs
790            .iter()
791            .enumerate()
792            .map(|(idx, def)| (def.id.to_lowercase(), idx))
793            .collect();
794
795        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
796
797        // Compute character frequency for fast content analysis
798        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
799
800        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
801        let table_blocks = profile_section!(
802            "Table blocks",
803            profile,
804            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
805                content,
806                &code_blocks,
807                &code_spans,
808                &html_comment_ranges,
809            )
810        );
811
812        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
813        let line_index = profile_section!(
814            "Line index",
815            profile,
816            crate::utils::range_utils::LineIndex::new(content)
817        );
818
819        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
820        let jinja_ranges = profile_section!(
821            "Jinja ranges",
822            profile,
823            crate::utils::jinja_utils::find_jinja_ranges(content)
824        );
825
826        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
827        let citation_ranges = profile_section!("Citation ranges", profile, {
828            if flavor == MarkdownFlavor::Quarto {
829                crate::utils::quarto_divs::find_citation_ranges(content)
830            } else {
831                Vec::new()
832            }
833        });
834
835        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
836        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
837            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
838            let mut ranges = Vec::new();
839            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
840                ranges.push((mat.start(), mat.end()));
841            }
842            ranges
843        });
844
845        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
846
847        Self {
848            content,
849            line_offsets,
850            code_blocks,
851            lines,
852            links,
853            images,
854            broken_links,
855            footnote_refs,
856            reference_defs,
857            reference_defs_map,
858            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
859            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
860            list_blocks,
861            char_frequency,
862            html_tags_cache: OnceLock::new(),
863            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
864            table_rows_cache: OnceLock::new(),
865            bare_urls_cache: OnceLock::new(),
866            has_mixed_list_nesting_cache: OnceLock::new(),
867            html_comment_ranges,
868            table_blocks,
869            line_index,
870            jinja_ranges,
871            flavor,
872            source_file,
873            jsx_expression_ranges,
874            mdx_comment_ranges,
875            citation_ranges,
876            shortcode_ranges,
877            inline_config,
878            obsidian_comment_ranges,
879        }
880    }
881
882    /// Check if a rule is disabled at a specific line number (1-indexed)
883    ///
884    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
885    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
886    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
887        self.inline_config.is_rule_disabled(rule_name, line_number)
888    }
889
890    /// Get code spans - computed lazily on first access
891    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
892        Arc::clone(
893            self.code_spans_cache
894                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
895        )
896    }
897
898    /// Get math spans - computed lazily on first access
899    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
900        Arc::clone(
901            self.math_spans_cache
902                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
903        )
904    }
905
906    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
907    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
908        let math_spans = self.math_spans();
909        math_spans
910            .iter()
911            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
912    }
913
914    /// Get HTML comment ranges - pre-computed during LintContext construction
915    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
916        &self.html_comment_ranges
917    }
918
919    /// Get Obsidian comment ranges - pre-computed during LintContext construction
920    /// Returns empty slice for non-Obsidian flavors
921    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
922        &self.obsidian_comment_ranges
923    }
924
925    /// Check if a byte position is inside an Obsidian comment
926    ///
927    /// Returns false for non-Obsidian flavors.
928    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
929        self.obsidian_comment_ranges
930            .iter()
931            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
932    }
933
934    /// Check if a line/column position is inside an Obsidian comment
935    ///
936    /// Line number is 1-indexed, column is 1-indexed.
937    /// Returns false for non-Obsidian flavors.
938    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
939        if self.obsidian_comment_ranges.is_empty() {
940            return false;
941        }
942
943        // Convert line/column (1-indexed, char-based) to byte position
944        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
945        self.is_in_obsidian_comment(byte_pos)
946    }
947
948    /// Get HTML tags - computed lazily on first access
949    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
950        Arc::clone(self.html_tags_cache.get_or_init(|| {
951            Arc::new(Self::parse_html_tags(
952                self.content,
953                &self.lines,
954                &self.code_blocks,
955                self.flavor,
956            ))
957        }))
958    }
959
960    /// Get emphasis spans - pre-computed during construction
961    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
962        Arc::clone(
963            self.emphasis_spans_cache
964                .get()
965                .expect("emphasis_spans_cache initialized during construction"),
966        )
967    }
968
969    /// Get table rows - computed lazily on first access
970    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
971        Arc::clone(
972            self.table_rows_cache
973                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
974        )
975    }
976
977    /// Get bare URLs - computed lazily on first access
978    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
979        Arc::clone(
980            self.bare_urls_cache
981                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
982        )
983    }
984
985    /// Check if document has mixed ordered/unordered list nesting.
986    /// Result is cached after first computation (document-level invariant).
987    /// This is used by MD007 for smart style auto-detection.
988    pub fn has_mixed_list_nesting(&self) -> bool {
989        *self
990            .has_mixed_list_nesting_cache
991            .get_or_init(|| self.compute_mixed_list_nesting())
992    }
993
994    /// Internal computation for mixed list nesting (only called once per LintContext).
995    fn compute_mixed_list_nesting(&self) -> bool {
996        // Track parent list items by their marker position and type
997        // Using marker_column instead of indent because it works correctly
998        // for blockquoted content where indent doesn't account for the prefix
999        // Stack stores: (marker_column, is_ordered)
1000        let mut stack: Vec<(usize, bool)> = Vec::new();
1001        let mut last_was_blank = false;
1002
1003        for line_info in &self.lines {
1004            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
1005            if line_info.in_code_block
1006                || line_info.in_front_matter
1007                || line_info.in_mkdocstrings
1008                || line_info.in_html_comment
1009                || line_info.in_esm_block
1010            {
1011                continue;
1012            }
1013
1014            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
1015            if line_info.is_blank {
1016                last_was_blank = true;
1017                continue;
1018            }
1019
1020            if let Some(list_item) = &line_info.list_item {
1021                // Normalize column 1 to column 0 (consistent with MD007 check function)
1022                let current_pos = if list_item.marker_column == 1 {
1023                    0
1024                } else {
1025                    list_item.marker_column
1026                };
1027
1028                // If there was a blank line and this item is at root level, reset stack
1029                if last_was_blank && current_pos == 0 {
1030                    stack.clear();
1031                }
1032                last_was_blank = false;
1033
1034                // Pop items at same or greater position (they're siblings or deeper, not parents)
1035                while let Some(&(pos, _)) = stack.last() {
1036                    if pos >= current_pos {
1037                        stack.pop();
1038                    } else {
1039                        break;
1040                    }
1041                }
1042
1043                // Check if immediate parent has different type - this is mixed nesting
1044                if let Some(&(_, parent_is_ordered)) = stack.last()
1045                    && parent_is_ordered != list_item.is_ordered
1046                {
1047                    return true; // Found mixed nesting - early exit
1048                }
1049
1050                stack.push((current_pos, list_item.is_ordered));
1051            } else {
1052                // Non-list line (but not blank) - could be paragraph or other content
1053                last_was_blank = false;
1054            }
1055        }
1056
1057        false
1058    }
1059
1060    /// Map a byte offset to (line, column)
1061    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1062        match self.line_offsets.binary_search(&offset) {
1063            Ok(line) => (line + 1, 1),
1064            Err(line) => {
1065                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1066                (line, offset - line_start + 1)
1067            }
1068        }
1069    }
1070
1071    /// Check if a position is within a code block or code span
1072    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1073        // Check code blocks first
1074        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1075            return true;
1076        }
1077
1078        // Check inline code spans (lazy load if needed)
1079        self.code_spans()
1080            .iter()
1081            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1082    }
1083
1084    /// Get line information by line number (1-indexed)
1085    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1086        if line_num > 0 {
1087            self.lines.get(line_num - 1)
1088        } else {
1089            None
1090        }
1091    }
1092
1093    /// Get byte offset for a line number (1-indexed)
1094    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1095        self.line_info(line_num).map(|info| info.byte_offset)
1096    }
1097
1098    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1099    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1100        let normalized_id = ref_id.to_lowercase();
1101        self.reference_defs_map
1102            .get(&normalized_id)
1103            .map(|&idx| self.reference_defs[idx].url.as_str())
1104    }
1105
1106    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1107    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1108        let normalized_id = ref_id.to_lowercase();
1109        self.reference_defs_map
1110            .get(&normalized_id)
1111            .map(|&idx| &self.reference_defs[idx])
1112    }
1113
1114    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1115    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1116        let normalized_id = ref_id.to_lowercase();
1117        self.reference_defs_map.contains_key(&normalized_id)
1118    }
1119
1120    /// Check if a line is part of a list block
1121    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1122        self.list_blocks
1123            .iter()
1124            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1125    }
1126
1127    /// Get the list block containing a specific line
1128    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1129        self.list_blocks
1130            .iter()
1131            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1132    }
1133
1134    // Compatibility methods for DocumentStructure migration
1135
1136    /// Check if a line is within a code block
1137    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1138        if line_num == 0 || line_num > self.lines.len() {
1139            return false;
1140        }
1141        self.lines[line_num - 1].in_code_block
1142    }
1143
1144    /// Check if a line is within front matter
1145    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1146        if line_num == 0 || line_num > self.lines.len() {
1147            return false;
1148        }
1149        self.lines[line_num - 1].in_front_matter
1150    }
1151
1152    /// Check if a line is within an HTML block
1153    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1154        if line_num == 0 || line_num > self.lines.len() {
1155            return false;
1156        }
1157        self.lines[line_num - 1].in_html_block
1158    }
1159
1160    /// Check if a line and column is within a code span
1161    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1162        if line_num == 0 || line_num > self.lines.len() {
1163            return false;
1164        }
1165
1166        // Use the code spans cache to check
1167        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1168        // Convert col to 0-indexed for comparison
1169        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1170        let code_spans = self.code_spans();
1171        code_spans.iter().any(|span| {
1172            // Check if line is within the span's line range
1173            if line_num < span.line || line_num > span.end_line {
1174                return false;
1175            }
1176
1177            if span.line == span.end_line {
1178                // Single-line span: check column bounds
1179                col_0indexed >= span.start_col && col_0indexed < span.end_col
1180            } else if line_num == span.line {
1181                // First line of multi-line span: anything after start_col is in span
1182                col_0indexed >= span.start_col
1183            } else if line_num == span.end_line {
1184                // Last line of multi-line span: anything before end_col is in span
1185                col_0indexed < span.end_col
1186            } else {
1187                // Middle line of multi-line span: entire line is in span
1188                true
1189            }
1190        })
1191    }
1192
1193    /// Check if a byte offset is within a code span
1194    #[inline]
1195    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1196        let code_spans = self.code_spans();
1197        code_spans
1198            .iter()
1199            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1200    }
1201
1202    /// Check if a byte position is within a reference definition
1203    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1204    #[inline]
1205    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1206        self.reference_defs
1207            .iter()
1208            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1209    }
1210
1211    /// Check if a byte position is within an HTML comment
1212    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1213    /// where k is the number of HTML comments (typically very small)
1214    #[inline]
1215    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1216        self.html_comment_ranges
1217            .iter()
1218            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1219    }
1220
1221    /// Check if a byte position is within an HTML tag (including multiline tags)
1222    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1223    #[inline]
1224    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1225        self.html_tags()
1226            .iter()
1227            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1228    }
1229
1230    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1231    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1232        self.jinja_ranges
1233            .iter()
1234            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1235    }
1236
1237    /// Check if a byte position is within a JSX expression (MDX: {expression})
1238    #[inline]
1239    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1240        self.jsx_expression_ranges
1241            .iter()
1242            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1243    }
1244
1245    /// Check if a byte position is within an MDX comment ({/* ... */})
1246    #[inline]
1247    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1248        self.mdx_comment_ranges
1249            .iter()
1250            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1251    }
1252
1253    /// Get all JSX expression byte ranges
1254    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1255        &self.jsx_expression_ranges
1256    }
1257
1258    /// Get all MDX comment byte ranges
1259    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1260        &self.mdx_comment_ranges
1261    }
1262
1263    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`)
1264    /// Only active in Quarto flavor
1265    #[inline]
1266    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1267        self.citation_ranges
1268            .iter()
1269            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1270    }
1271
1272    /// Get all citation byte ranges (Quarto flavor only)
1273    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1274        &self.citation_ranges
1275    }
1276
1277    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1278    #[inline]
1279    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1280        self.shortcode_ranges
1281            .iter()
1282            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1283    }
1284
1285    /// Get all shortcode byte ranges
1286    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1287        &self.shortcode_ranges
1288    }
1289
1290    /// Check if a byte position is within a link reference definition title
1291    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1292        self.reference_defs.iter().any(|def| {
1293            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1294                byte_pos >= start && byte_pos < end
1295            } else {
1296                false
1297            }
1298        })
1299    }
1300
1301    /// Check if content has any instances of a specific character (fast)
1302    pub fn has_char(&self, ch: char) -> bool {
1303        match ch {
1304            '#' => self.char_frequency.hash_count > 0,
1305            '*' => self.char_frequency.asterisk_count > 0,
1306            '_' => self.char_frequency.underscore_count > 0,
1307            '-' => self.char_frequency.hyphen_count > 0,
1308            '+' => self.char_frequency.plus_count > 0,
1309            '>' => self.char_frequency.gt_count > 0,
1310            '|' => self.char_frequency.pipe_count > 0,
1311            '[' => self.char_frequency.bracket_count > 0,
1312            '`' => self.char_frequency.backtick_count > 0,
1313            '<' => self.char_frequency.lt_count > 0,
1314            '!' => self.char_frequency.exclamation_count > 0,
1315            '\n' => self.char_frequency.newline_count > 0,
1316            _ => self.content.contains(ch), // Fallback for other characters
1317        }
1318    }
1319
1320    /// Get count of a specific character (fast)
1321    pub fn char_count(&self, ch: char) -> usize {
1322        match ch {
1323            '#' => self.char_frequency.hash_count,
1324            '*' => self.char_frequency.asterisk_count,
1325            '_' => self.char_frequency.underscore_count,
1326            '-' => self.char_frequency.hyphen_count,
1327            '+' => self.char_frequency.plus_count,
1328            '>' => self.char_frequency.gt_count,
1329            '|' => self.char_frequency.pipe_count,
1330            '[' => self.char_frequency.bracket_count,
1331            '`' => self.char_frequency.backtick_count,
1332            '<' => self.char_frequency.lt_count,
1333            '!' => self.char_frequency.exclamation_count,
1334            '\n' => self.char_frequency.newline_count,
1335            _ => self.content.matches(ch).count(), // Fallback for other characters
1336        }
1337    }
1338
1339    /// Check if content likely contains headings (fast)
1340    pub fn likely_has_headings(&self) -> bool {
1341        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1342    }
1343
1344    /// Check if content likely contains lists (fast)
1345    pub fn likely_has_lists(&self) -> bool {
1346        self.char_frequency.asterisk_count > 0
1347            || self.char_frequency.hyphen_count > 0
1348            || self.char_frequency.plus_count > 0
1349    }
1350
1351    /// Check if content likely contains emphasis (fast)
1352    pub fn likely_has_emphasis(&self) -> bool {
1353        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1354    }
1355
1356    /// Check if content likely contains tables (fast)
1357    pub fn likely_has_tables(&self) -> bool {
1358        self.char_frequency.pipe_count > 2
1359    }
1360
1361    /// Check if content likely contains blockquotes (fast)
1362    pub fn likely_has_blockquotes(&self) -> bool {
1363        self.char_frequency.gt_count > 0
1364    }
1365
1366    /// Check if content likely contains code (fast)
1367    pub fn likely_has_code(&self) -> bool {
1368        self.char_frequency.backtick_count > 0
1369    }
1370
1371    /// Check if content likely contains links or images (fast)
1372    pub fn likely_has_links_or_images(&self) -> bool {
1373        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1374    }
1375
1376    /// Check if content likely contains HTML (fast)
1377    pub fn likely_has_html(&self) -> bool {
1378        self.char_frequency.lt_count > 0
1379    }
1380
1381    /// Get the blockquote prefix for inserting a blank line at the given line index.
1382    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1383    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1384    /// Returns an empty string if the line is not inside a blockquote.
1385    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1386        if let Some(line_info) = self.lines.get(line_idx)
1387            && let Some(ref bq) = line_info.blockquote
1388        {
1389            bq.prefix.trim_end().to_string()
1390        } else {
1391            String::new()
1392        }
1393    }
1394
1395    /// Get HTML tags on a specific line
1396    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1397        self.html_tags()
1398            .iter()
1399            .filter(|tag| tag.line == line_num)
1400            .cloned()
1401            .collect()
1402    }
1403
1404    /// Get emphasis spans on a specific line
1405    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1406        self.emphasis_spans()
1407            .iter()
1408            .filter(|span| span.line == line_num)
1409            .cloned()
1410            .collect()
1411    }
1412
1413    /// Get table rows on a specific line
1414    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1415        self.table_rows()
1416            .iter()
1417            .filter(|row| row.line == line_num)
1418            .cloned()
1419            .collect()
1420    }
1421
1422    /// Get bare URLs on a specific line
1423    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1424        self.bare_urls()
1425            .iter()
1426            .filter(|url| url.line == line_num)
1427            .cloned()
1428            .collect()
1429    }
1430
1431    /// Find the line index for a given byte offset using binary search.
1432    /// Returns (line_index, line_number, column) where:
1433    /// - line_index is the 0-based index in the lines array
1434    /// - line_number is the 1-based line number
1435    /// - column is the byte offset within that line
1436    #[inline]
1437    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1438        // Binary search to find the line containing this byte offset
1439        let idx = match lines.binary_search_by(|line| {
1440            if byte_offset < line.byte_offset {
1441                std::cmp::Ordering::Greater
1442            } else if byte_offset > line.byte_offset + line.byte_len {
1443                std::cmp::Ordering::Less
1444            } else {
1445                std::cmp::Ordering::Equal
1446            }
1447        }) {
1448            Ok(idx) => idx,
1449            Err(idx) => idx.saturating_sub(1),
1450        };
1451
1452        let line = &lines[idx];
1453        let line_num = idx + 1;
1454        let col = byte_offset.saturating_sub(line.byte_offset);
1455
1456        (idx, line_num, col)
1457    }
1458
1459    /// Check if a byte offset is within a code span using binary search
1460    #[inline]
1461    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1462        // Since spans are sorted by byte_offset, use partition_point for binary search
1463        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1464
1465        // Check the span that starts at or before our offset
1466        if idx > 0 {
1467            let span = &code_spans[idx - 1];
1468            if offset >= span.byte_offset && offset < span.byte_end {
1469                return true;
1470            }
1471        }
1472
1473        false
1474    }
1475
1476    /// Collect byte ranges of all links using pulldown-cmark
1477    /// This is used to skip heading detection for lines that fall within link syntax
1478    /// (e.g., multiline links like `[text](url\n#fragment)`)
1479    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1480        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1481
1482        let mut link_ranges = Vec::new();
1483        let mut options = Options::empty();
1484        options.insert(Options::ENABLE_WIKILINKS);
1485        options.insert(Options::ENABLE_FOOTNOTES);
1486
1487        let parser = Parser::new_ext(content, options).into_offset_iter();
1488        let mut link_stack: Vec<usize> = Vec::new();
1489
1490        for (event, range) in parser {
1491            match event {
1492                Event::Start(Tag::Link { .. }) => {
1493                    link_stack.push(range.start);
1494                }
1495                Event::End(TagEnd::Link) => {
1496                    if let Some(start_pos) = link_stack.pop() {
1497                        link_ranges.push((start_pos, range.end));
1498                    }
1499                }
1500                _ => {}
1501            }
1502        }
1503
1504        link_ranges
1505    }
1506
1507    /// Parse all links in the content
1508    fn parse_links(
1509        content: &'a str,
1510        lines: &[LineInfo],
1511        code_blocks: &[(usize, usize)],
1512        code_spans: &[CodeSpan],
1513        flavor: MarkdownFlavor,
1514        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1515    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1516        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1517        use std::collections::HashSet;
1518
1519        let mut links = Vec::with_capacity(content.len() / 500);
1520        let mut broken_links = Vec::new();
1521        let mut footnote_refs = Vec::new();
1522
1523        // Track byte positions of links found by pulldown-cmark
1524        let mut found_positions = HashSet::new();
1525
1526        // Use pulldown-cmark's streaming parser with BrokenLink callback
1527        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1528        // This automatically handles:
1529        // - Escaped links (won't generate events)
1530        // - Links in code blocks/spans (won't generate Link events)
1531        // - Images (generates Tag::Image instead)
1532        // - Reference resolution (dest_url is already resolved!)
1533        // - Broken references (callback is invoked)
1534        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1535        let mut options = Options::empty();
1536        options.insert(Options::ENABLE_WIKILINKS);
1537        options.insert(Options::ENABLE_FOOTNOTES);
1538
1539        let parser = Parser::new_with_broken_link_callback(
1540            content,
1541            options,
1542            Some(|link: BrokenLink<'_>| {
1543                broken_links.push(BrokenLinkInfo {
1544                    reference: link.reference.to_string(),
1545                    span: link.span.clone(),
1546                });
1547                None
1548            }),
1549        )
1550        .into_offset_iter();
1551
1552        let mut link_stack: Vec<(
1553            usize,
1554            usize,
1555            pulldown_cmark::CowStr<'a>,
1556            LinkType,
1557            pulldown_cmark::CowStr<'a>,
1558        )> = Vec::new();
1559        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1560
1561        for (event, range) in parser {
1562            match event {
1563                Event::Start(Tag::Link {
1564                    link_type,
1565                    dest_url,
1566                    id,
1567                    ..
1568                }) => {
1569                    // Link start - record position, URL, and reference ID
1570                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1571                    text_chunks.clear();
1572                }
1573                Event::Text(text) if !link_stack.is_empty() => {
1574                    // Track text content with its byte range
1575                    text_chunks.push((text.to_string(), range.start, range.end));
1576                }
1577                Event::Code(code) if !link_stack.is_empty() => {
1578                    // Include inline code in link text (with backticks)
1579                    let code_text = format!("`{code}`");
1580                    text_chunks.push((code_text, range.start, range.end));
1581                }
1582                Event::End(TagEnd::Link) => {
1583                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1584                        // Skip if in HTML comment
1585                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1586                            text_chunks.clear();
1587                            continue;
1588                        }
1589
1590                        // Find line and column information
1591                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1592
1593                        // Skip if this link is on a MkDocs snippet line
1594                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1595                            text_chunks.clear();
1596                            continue;
1597                        }
1598
1599                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1600
1601                        let is_reference = matches!(
1602                            link_type,
1603                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1604                        );
1605
1606                        // Extract link text directly from source bytes to preserve escaping
1607                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1608                        let link_text = if matches!(link_type, LinkType::WikiLink { .. }) {
1609                            // WikiLinks: [[destination]] or [[destination|display text]]
1610                            // pulldown-cmark's range excludes the final ]], so standard extraction fails
1611                            // Use accumulated text chunks (from Text events) for accurate text
1612                            if !text_chunks.is_empty() {
1613                                let text: String = text_chunks.iter().map(|(t, _, _)| t.as_str()).collect();
1614                                Cow::Owned(text)
1615                            } else {
1616                                // Fallback: use the URL as text (for simple [[destination]] links)
1617                                Cow::Owned(url.to_string())
1618                            }
1619                        } else if start_pos < content.len() {
1620                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1621
1622                            // Find MATCHING ] by tracking bracket depth for nested brackets
1623                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1624                            // Brackets inside code spans (between backticks) should be ignored
1625                            let mut close_pos = None;
1626                            let mut depth = 0;
1627                            let mut in_code_span = false;
1628
1629                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1630                                // Count preceding backslashes
1631                                let mut backslash_count = 0;
1632                                let mut j = i;
1633                                while j > 0 && link_bytes[j - 1] == b'\\' {
1634                                    backslash_count += 1;
1635                                    j -= 1;
1636                                }
1637                                let is_escaped = backslash_count % 2 != 0;
1638
1639                                // Track code spans - backticks toggle in/out of code
1640                                if byte == b'`' && !is_escaped {
1641                                    in_code_span = !in_code_span;
1642                                }
1643
1644                                // Only count brackets when NOT in a code span
1645                                if !is_escaped && !in_code_span {
1646                                    if byte == b'[' {
1647                                        depth += 1;
1648                                    } else if byte == b']' {
1649                                        if depth == 0 {
1650                                            // Found the matching closing bracket
1651                                            close_pos = Some(i);
1652                                            break;
1653                                        } else {
1654                                            depth -= 1;
1655                                        }
1656                                    }
1657                                }
1658                            }
1659
1660                            if let Some(pos) = close_pos {
1661                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1662                            } else {
1663                                Cow::Borrowed("")
1664                            }
1665                        } else {
1666                            Cow::Borrowed("")
1667                        };
1668
1669                        // For reference links, use the actual reference ID from pulldown-cmark
1670                        let reference_id = if is_reference && !ref_id.is_empty() {
1671                            Some(Cow::Owned(ref_id.to_lowercase()))
1672                        } else if is_reference {
1673                            // For collapsed/shortcut references without explicit ID, use the link text
1674                            Some(Cow::Owned(link_text.to_lowercase()))
1675                        } else {
1676                            None
1677                        };
1678
1679                        // Track this position as found
1680                        found_positions.insert(start_pos);
1681
1682                        links.push(ParsedLink {
1683                            line: line_num,
1684                            start_col: col_start,
1685                            end_col: col_end,
1686                            byte_offset: start_pos,
1687                            byte_end: range.end,
1688                            text: link_text,
1689                            url: Cow::Owned(url.to_string()),
1690                            is_reference,
1691                            reference_id,
1692                            link_type,
1693                        });
1694
1695                        text_chunks.clear();
1696                    }
1697                }
1698                Event::FootnoteReference(footnote_id) => {
1699                    // Capture footnote references like [^1], [^note]
1700                    // Skip if in HTML comment
1701                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1702                        continue;
1703                    }
1704
1705                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1706                    footnote_refs.push(FootnoteRef {
1707                        id: footnote_id.to_string(),
1708                        line: line_num,
1709                        byte_offset: range.start,
1710                        byte_end: range.end,
1711                    });
1712                }
1713                _ => {}
1714            }
1715        }
1716
1717        // Also find undefined references using regex
1718        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1719        // because the reference is undefined
1720        for cap in LINK_PATTERN.captures_iter(content) {
1721            let full_match = cap.get(0).unwrap();
1722            let match_start = full_match.start();
1723            let match_end = full_match.end();
1724
1725            // Skip if this was already found by pulldown-cmark (it's a valid link)
1726            if found_positions.contains(&match_start) {
1727                continue;
1728            }
1729
1730            // Skip if escaped
1731            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1732                continue;
1733            }
1734
1735            // Skip if it's an image
1736            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1737                continue;
1738            }
1739
1740            // Skip if in code block
1741            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1742                continue;
1743            }
1744
1745            // Skip if in code span
1746            if Self::is_offset_in_code_span(code_spans, match_start) {
1747                continue;
1748            }
1749
1750            // Skip if in HTML comment
1751            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1752                continue;
1753            }
1754
1755            // Find line and column information
1756            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1757
1758            // Skip if this link is on a MkDocs snippet line
1759            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1760                continue;
1761            }
1762
1763            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1764
1765            let text = cap.get(1).map_or("", |m| m.as_str());
1766
1767            // Only process reference links (group 6)
1768            if let Some(ref_id) = cap.get(6) {
1769                let ref_id_str = ref_id.as_str();
1770                let normalized_ref = if ref_id_str.is_empty() {
1771                    Cow::Owned(text.to_lowercase()) // Implicit reference
1772                } else {
1773                    Cow::Owned(ref_id_str.to_lowercase())
1774                };
1775
1776                // This is an undefined reference (pulldown-cmark didn't parse it)
1777                links.push(ParsedLink {
1778                    line: line_num,
1779                    start_col: col_start,
1780                    end_col: col_end,
1781                    byte_offset: match_start,
1782                    byte_end: match_end,
1783                    text: Cow::Borrowed(text),
1784                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1785                    is_reference: true,
1786                    reference_id: Some(normalized_ref),
1787                    link_type: LinkType::Reference, // Undefined references are reference-style
1788                });
1789            }
1790        }
1791
1792        (links, broken_links, footnote_refs)
1793    }
1794
1795    /// Parse all images in the content
1796    fn parse_images(
1797        content: &'a str,
1798        lines: &[LineInfo],
1799        code_blocks: &[(usize, usize)],
1800        code_spans: &[CodeSpan],
1801        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1802    ) -> Vec<ParsedImage<'a>> {
1803        use crate::utils::skip_context::is_in_html_comment_ranges;
1804        use std::collections::HashSet;
1805
1806        // Pre-size based on a heuristic: images are less common than links
1807        let mut images = Vec::with_capacity(content.len() / 1000);
1808        let mut found_positions = HashSet::new();
1809
1810        // Use pulldown-cmark for parsing - more accurate and faster
1811        let parser = Parser::new(content).into_offset_iter();
1812        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1813            Vec::new();
1814        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1815
1816        for (event, range) in parser {
1817            match event {
1818                Event::Start(Tag::Image {
1819                    link_type,
1820                    dest_url,
1821                    id,
1822                    ..
1823                }) => {
1824                    image_stack.push((range.start, dest_url, link_type, id));
1825                    text_chunks.clear();
1826                }
1827                Event::Text(text) if !image_stack.is_empty() => {
1828                    text_chunks.push((text.to_string(), range.start, range.end));
1829                }
1830                Event::Code(code) if !image_stack.is_empty() => {
1831                    let code_text = format!("`{code}`");
1832                    text_chunks.push((code_text, range.start, range.end));
1833                }
1834                Event::End(TagEnd::Image) => {
1835                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1836                        // Skip if in code block
1837                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1838                            continue;
1839                        }
1840
1841                        // Skip if in code span
1842                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1843                            continue;
1844                        }
1845
1846                        // Skip if in HTML comment
1847                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1848                            continue;
1849                        }
1850
1851                        // Find line and column using binary search
1852                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1853                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1854
1855                        let is_reference = matches!(
1856                            link_type,
1857                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1858                        );
1859
1860                        // Extract alt text directly from source bytes to preserve escaping
1861                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1862                        let alt_text = if start_pos < content.len() {
1863                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1864
1865                            // Find MATCHING ] by tracking bracket depth for nested brackets
1866                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1867                            let mut close_pos = None;
1868                            let mut depth = 0;
1869
1870                            if image_bytes.len() > 2 {
1871                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1872                                    // Count preceding backslashes
1873                                    let mut backslash_count = 0;
1874                                    let mut j = i;
1875                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1876                                        backslash_count += 1;
1877                                        j -= 1;
1878                                    }
1879                                    let is_escaped = backslash_count % 2 != 0;
1880
1881                                    if !is_escaped {
1882                                        if byte == b'[' {
1883                                            depth += 1;
1884                                        } else if byte == b']' {
1885                                            if depth == 0 {
1886                                                // Found the matching closing bracket
1887                                                close_pos = Some(i);
1888                                                break;
1889                                            } else {
1890                                                depth -= 1;
1891                                            }
1892                                        }
1893                                    }
1894                                }
1895                            }
1896
1897                            if let Some(pos) = close_pos {
1898                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1899                            } else {
1900                                Cow::Borrowed("")
1901                            }
1902                        } else {
1903                            Cow::Borrowed("")
1904                        };
1905
1906                        let reference_id = if is_reference && !ref_id.is_empty() {
1907                            Some(Cow::Owned(ref_id.to_lowercase()))
1908                        } else if is_reference {
1909                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1910                        } else {
1911                            None
1912                        };
1913
1914                        found_positions.insert(start_pos);
1915                        images.push(ParsedImage {
1916                            line: line_num,
1917                            start_col: col_start,
1918                            end_col: col_end,
1919                            byte_offset: start_pos,
1920                            byte_end: range.end,
1921                            alt_text,
1922                            url: Cow::Owned(url.to_string()),
1923                            is_reference,
1924                            reference_id,
1925                            link_type,
1926                        });
1927                    }
1928                }
1929                _ => {}
1930            }
1931        }
1932
1933        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1934        for cap in IMAGE_PATTERN.captures_iter(content) {
1935            let full_match = cap.get(0).unwrap();
1936            let match_start = full_match.start();
1937            let match_end = full_match.end();
1938
1939            // Skip if already found by pulldown-cmark
1940            if found_positions.contains(&match_start) {
1941                continue;
1942            }
1943
1944            // Skip if the ! is escaped
1945            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1946                continue;
1947            }
1948
1949            // Skip if in code block, code span, or HTML comment
1950            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1951                || Self::is_offset_in_code_span(code_spans, match_start)
1952                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1953            {
1954                continue;
1955            }
1956
1957            // Only process reference images (undefined references not found by pulldown-cmark)
1958            if let Some(ref_id) = cap.get(6) {
1959                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1960                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1961                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1962                let ref_id_str = ref_id.as_str();
1963                let normalized_ref = if ref_id_str.is_empty() {
1964                    Cow::Owned(alt_text.to_lowercase())
1965                } else {
1966                    Cow::Owned(ref_id_str.to_lowercase())
1967                };
1968
1969                images.push(ParsedImage {
1970                    line: line_num,
1971                    start_col: col_start,
1972                    end_col: col_end,
1973                    byte_offset: match_start,
1974                    byte_end: match_end,
1975                    alt_text: Cow::Borrowed(alt_text),
1976                    url: Cow::Borrowed(""),
1977                    is_reference: true,
1978                    reference_id: Some(normalized_ref),
1979                    link_type: LinkType::Reference, // Undefined references are reference-style
1980                });
1981            }
1982        }
1983
1984        images
1985    }
1986
1987    /// Parse reference definitions
1988    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1989        // Pre-size based on lines count as reference definitions are line-based
1990        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1991
1992        for (line_idx, line_info) in lines.iter().enumerate() {
1993            // Skip lines in code blocks
1994            if line_info.in_code_block {
1995                continue;
1996            }
1997
1998            let line = line_info.content(content);
1999            let line_num = line_idx + 1;
2000
2001            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
2002                let id_raw = cap.get(1).unwrap().as_str();
2003
2004                // Skip footnote definitions - they use [^id]: syntax and are semantically
2005                // different from reference link definitions
2006                if id_raw.starts_with('^') {
2007                    continue;
2008                }
2009
2010                let id = id_raw.to_lowercase();
2011                let url = cap.get(2).unwrap().as_str().to_string();
2012                let title_match = cap.get(3).or_else(|| cap.get(4));
2013                let title = title_match.map(|m| m.as_str().to_string());
2014
2015                // Calculate byte positions
2016                // The match starts at the beginning of the line (0) and extends to the end
2017                let match_obj = cap.get(0).unwrap();
2018                let byte_offset = line_info.byte_offset + match_obj.start();
2019                let byte_end = line_info.byte_offset + match_obj.end();
2020
2021                // Calculate title byte positions (includes the quote character before content)
2022                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
2023                    // The match is the content inside quotes, so we include the quote before
2024                    let start = line_info.byte_offset + m.start().saturating_sub(1);
2025                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
2026                    (Some(start), Some(end))
2027                } else {
2028                    (None, None)
2029                };
2030
2031                refs.push(ReferenceDef {
2032                    line: line_num,
2033                    id,
2034                    url,
2035                    title,
2036                    byte_offset,
2037                    byte_end,
2038                    title_byte_start,
2039                    title_byte_end,
2040                });
2041            }
2042        }
2043
2044        refs
2045    }
2046
2047    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
2048    /// Handles nested blockquotes like `> > > content`
2049    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
2050    #[inline]
2051    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2052        let trimmed_start = line.trim_start();
2053        if !trimmed_start.starts_with('>') {
2054            return None;
2055        }
2056
2057        // Track total prefix length to handle nested blockquotes
2058        let mut remaining = line;
2059        let mut total_prefix_len = 0;
2060
2061        loop {
2062            let trimmed = remaining.trim_start();
2063            if !trimmed.starts_with('>') {
2064                break;
2065            }
2066
2067            // Add leading whitespace + '>' to prefix
2068            let leading_ws_len = remaining.len() - trimmed.len();
2069            total_prefix_len += leading_ws_len + 1;
2070
2071            let after_gt = &trimmed[1..];
2072
2073            // Handle optional whitespace after '>' (space or tab)
2074            if let Some(stripped) = after_gt.strip_prefix(' ') {
2075                total_prefix_len += 1;
2076                remaining = stripped;
2077            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2078                total_prefix_len += 1;
2079                remaining = stripped;
2080            } else {
2081                remaining = after_gt;
2082            }
2083        }
2084
2085        Some((&line[..total_prefix_len], remaining))
2086    }
2087
2088    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2089    ///
2090    /// Returns a HashMap keyed by line byte offset, containing:
2091    /// `(is_ordered, marker, marker_column, content_column, number)`
2092    ///
2093    /// ## Why pulldown-cmark?
2094    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2095    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2096    /// This fixes issue #253 where continuation lines were falsely detected.
2097    ///
2098    /// ## Tab indentation quirk
2099    /// Pulldown-cmark reports nested list items at the newline character position
2100    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2101    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2102    /// We detect this and advance to the correct line.
2103    ///
2104    /// ## HashMap key strategy
2105    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2106    /// that resolve to the same line (after newline adjustment). The first event
2107    /// for each line is authoritative.
2108    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2109    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2110    /// This avoids a separate parse for emphasis detection.
2111    fn detect_list_items_and_emphasis_with_pulldown(
2112        content: &str,
2113        line_offsets: &[usize],
2114        flavor: MarkdownFlavor,
2115        front_matter_end: usize,
2116        code_blocks: &[(usize, usize)],
2117    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2118        use std::collections::HashMap;
2119
2120        let mut list_items = HashMap::new();
2121        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2122
2123        let mut options = Options::empty();
2124        options.insert(Options::ENABLE_TABLES);
2125        options.insert(Options::ENABLE_FOOTNOTES);
2126        options.insert(Options::ENABLE_STRIKETHROUGH);
2127        options.insert(Options::ENABLE_TASKLISTS);
2128        // Always enable GFM features for consistency with existing behavior
2129        options.insert(Options::ENABLE_GFM);
2130
2131        // Suppress unused variable warning
2132        let _ = flavor;
2133
2134        let parser = Parser::new_ext(content, options).into_offset_iter();
2135        let mut list_depth: usize = 0;
2136        let mut list_stack: Vec<bool> = Vec::new();
2137
2138        for (event, range) in parser {
2139            match event {
2140                // Capture emphasis spans (for MD030's emphasis detection)
2141                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2142                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2143                        2
2144                    } else {
2145                        1
2146                    };
2147                    let match_start = range.start;
2148                    let match_end = range.end;
2149
2150                    // Skip if in code block
2151                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2152                        // Determine marker character by looking at the content at the start
2153                        let marker = content[match_start..].chars().next().unwrap_or('*');
2154                        if marker == '*' || marker == '_' {
2155                            // Extract content between markers
2156                            let content_start = match_start + marker_count;
2157                            let content_end = if match_end >= marker_count {
2158                                match_end - marker_count
2159                            } else {
2160                                match_end
2161                            };
2162                            let content_part = if content_start < content_end && content_end <= content.len() {
2163                                &content[content_start..content_end]
2164                            } else {
2165                                ""
2166                            };
2167
2168                            // Find which line this emphasis is on using line_offsets
2169                            let line_idx = match line_offsets.binary_search(&match_start) {
2170                                Ok(idx) => idx,
2171                                Err(idx) => idx.saturating_sub(1),
2172                            };
2173                            let line_num = line_idx + 1;
2174                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2175                            let col_start = match_start - line_start;
2176                            let col_end = match_end - line_start;
2177
2178                            emphasis_spans.push(EmphasisSpan {
2179                                line: line_num,
2180                                start_col: col_start,
2181                                end_col: col_end,
2182                                byte_offset: match_start,
2183                                byte_end: match_end,
2184                                marker,
2185                                marker_count,
2186                                content: content_part.to_string(),
2187                            });
2188                        }
2189                    }
2190                }
2191                Event::Start(Tag::List(start_number)) => {
2192                    list_depth += 1;
2193                    list_stack.push(start_number.is_some());
2194                }
2195                Event::End(TagEnd::List(_)) => {
2196                    list_depth = list_depth.saturating_sub(1);
2197                    list_stack.pop();
2198                }
2199                Event::Start(Tag::Item) if list_depth > 0 => {
2200                    // Get the ordered state for the CURRENT (innermost) list
2201                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2202                    // Find which line this byte offset corresponds to
2203                    let item_start = range.start;
2204
2205                    // Binary search to find the line number
2206                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2207                        Ok(idx) => idx,
2208                        Err(idx) => idx.saturating_sub(1),
2209                    };
2210
2211                    // Pulldown-cmark reports nested list items at the newline before the item
2212                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2213                    // Advance to the actual content line in this case.
2214                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2215                        line_idx += 1;
2216                    }
2217
2218                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2219                    if front_matter_end > 0 && line_idx < front_matter_end {
2220                        continue;
2221                    }
2222
2223                    if line_idx < line_offsets.len() {
2224                        let line_start_byte = line_offsets[line_idx];
2225                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2226                        let line = &content[line_start_byte..line_end.min(content.len())];
2227
2228                        // Strip trailing newline
2229                        let line = line
2230                            .strip_suffix('\n')
2231                            .or_else(|| line.strip_suffix("\r\n"))
2232                            .unwrap_or(line);
2233
2234                        // Strip blockquote prefix if present
2235                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2236                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2237                            (prefix.len(), content)
2238                        } else {
2239                            (0, line)
2240                        };
2241
2242                        // Parse the list marker from the actual line
2243                        if current_list_is_ordered {
2244                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2245                                Self::parse_ordered_list(line_to_parse)
2246                            {
2247                                let marker = format!("{number_str}{delimiter}");
2248                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2249                                let content_column = marker_column + marker.len() + spacing.len();
2250                                let number = number_str.parse().ok();
2251
2252                                list_items.entry(line_start_byte).or_insert((
2253                                    true,
2254                                    marker,
2255                                    marker_column,
2256                                    content_column,
2257                                    number,
2258                                ));
2259                            }
2260                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2261                            Self::parse_unordered_list(line_to_parse)
2262                        {
2263                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2264                            let content_column = marker_column + 1 + spacing.len();
2265
2266                            list_items.entry(line_start_byte).or_insert((
2267                                false,
2268                                marker.to_string(),
2269                                marker_column,
2270                                content_column,
2271                                None,
2272                            ));
2273                        }
2274                    }
2275                }
2276                _ => {}
2277            }
2278        }
2279
2280        (list_items, emphasis_spans)
2281    }
2282
2283    /// Fast unordered list parser - replaces regex for 5-10x speedup
2284    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2285    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2286    #[inline]
2287    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2288        let bytes = line.as_bytes();
2289        let mut i = 0;
2290
2291        // Skip leading whitespace
2292        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2293            i += 1;
2294        }
2295
2296        // Check for marker
2297        if i >= bytes.len() {
2298            return None;
2299        }
2300        let marker = bytes[i] as char;
2301        if marker != '-' && marker != '*' && marker != '+' {
2302            return None;
2303        }
2304        let marker_pos = i;
2305        i += 1;
2306
2307        // Collect spacing after marker (space or tab only)
2308        let spacing_start = i;
2309        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2310            i += 1;
2311        }
2312
2313        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2314    }
2315
2316    /// Fast ordered list parser - replaces regex for 5-10x speedup
2317    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2318    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2319    #[inline]
2320    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2321        let bytes = line.as_bytes();
2322        let mut i = 0;
2323
2324        // Skip leading whitespace
2325        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2326            i += 1;
2327        }
2328
2329        // Collect digits
2330        let number_start = i;
2331        while i < bytes.len() && bytes[i].is_ascii_digit() {
2332            i += 1;
2333        }
2334        if i == number_start {
2335            return None; // No digits found
2336        }
2337
2338        // Check for delimiter
2339        if i >= bytes.len() {
2340            return None;
2341        }
2342        let delimiter = bytes[i] as char;
2343        if delimiter != '.' && delimiter != ')' {
2344            return None;
2345        }
2346        let delimiter_pos = i;
2347        i += 1;
2348
2349        // Collect spacing after delimiter (space or tab only)
2350        let spacing_start = i;
2351        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2352            i += 1;
2353        }
2354
2355        Some((
2356            &line[..number_start],
2357            &line[number_start..delimiter_pos],
2358            delimiter,
2359            &line[spacing_start..i],
2360            &line[i..],
2361        ))
2362    }
2363
2364    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2365    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2366    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2367        let num_lines = line_offsets.len();
2368        let mut in_code_block = vec![false; num_lines];
2369
2370        // For each code block, mark all lines within it
2371        for &(start, end) in code_blocks {
2372            // Ensure we're at valid UTF-8 boundaries
2373            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2374                let mut boundary = start;
2375                while boundary > 0 && !content.is_char_boundary(boundary) {
2376                    boundary -= 1;
2377                }
2378                boundary
2379            } else {
2380                start
2381            };
2382
2383            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2384                let mut boundary = end;
2385                while boundary < content.len() && !content.is_char_boundary(boundary) {
2386                    boundary += 1;
2387                }
2388                boundary
2389            } else {
2390                end.min(content.len())
2391            };
2392
2393            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2394            // That function now has proper list context awareness (see code_block_utils.rs)
2395            // and correctly distinguishes between:
2396            // - Fenced code blocks (``` or ~~~)
2397            // - Indented code blocks at document level (4 spaces + blank line before)
2398            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2399            //
2400            // We no longer need to re-validate here. The original validation logic
2401            // was causing false positives by marking list continuation paragraphs as
2402            // code blocks when they have 4 spaces of indentation.
2403
2404            // Use binary search to find the first and last line indices
2405            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2406            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2407            //
2408            // Find the line that CONTAINS safe_start: the line with the largest
2409            // start offset that is <= safe_start. partition_point gives us the
2410            // first line that starts AFTER safe_start, so we subtract 1.
2411            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2412            let first_line = first_line_after.saturating_sub(1);
2413            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2414
2415            // Mark all lines in the range at once
2416            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2417                *flag = true;
2418            }
2419        }
2420
2421        in_code_block
2422    }
2423
2424    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2425    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2426    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2427        let content_lines: Vec<&str> = content.lines().collect();
2428        let num_lines = content_lines.len();
2429        let mut in_math_block = vec![false; num_lines];
2430
2431        let mut inside_math = false;
2432
2433        for (i, line) in content_lines.iter().enumerate() {
2434            // Skip lines that are in code blocks - math delimiters inside code are literal
2435            if code_block_map.get(i).copied().unwrap_or(false) {
2436                continue;
2437            }
2438
2439            let trimmed = line.trim();
2440
2441            // Check for math block delimiter ($$)
2442            // A line with just $$ toggles the math block state
2443            if trimmed == "$$" {
2444                if inside_math {
2445                    // Closing delimiter - this line is still part of the math block
2446                    in_math_block[i] = true;
2447                    inside_math = false;
2448                } else {
2449                    // Opening delimiter - this line starts the math block
2450                    in_math_block[i] = true;
2451                    inside_math = true;
2452                }
2453            } else if inside_math {
2454                // Content inside math block
2455                in_math_block[i] = true;
2456            }
2457        }
2458
2459        in_math_block
2460    }
2461
2462    /// Pre-compute basic line information (without headings/blockquotes)
2463    /// Also returns emphasis spans detected during the pulldown-cmark parse
2464    fn compute_basic_line_info(
2465        content: &str,
2466        line_offsets: &[usize],
2467        code_blocks: &[(usize, usize)],
2468        flavor: MarkdownFlavor,
2469        skip_ranges: &SkipByteRanges<'_>,
2470    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2471        let content_lines: Vec<&str> = content.lines().collect();
2472        let mut lines = Vec::with_capacity(content_lines.len());
2473
2474        // Pre-compute which lines are in code blocks
2475        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2476
2477        // Pre-compute which lines are in math blocks ($$ ... $$)
2478        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2479
2480        // Detect front matter boundaries FIRST, before any other parsing
2481        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2482        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2483
2484        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2485        // (context-aware, eliminates false positives)
2486        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2487            content,
2488            line_offsets,
2489            flavor,
2490            front_matter_end,
2491            code_blocks,
2492        );
2493
2494        for (i, line) in content_lines.iter().enumerate() {
2495            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2496            let indent = line.len() - line.trim_start().len();
2497            // Compute visual indent with proper CommonMark tab expansion
2498            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2499
2500            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2501            let blockquote_parse = Self::parse_blockquote_prefix(line);
2502
2503            // For blank detection, consider blockquote context
2504            let is_blank = if let Some((_, content)) = blockquote_parse {
2505                // In blockquote context, check if content after prefix is blank
2506                content.trim().is_empty()
2507            } else {
2508                line.trim().is_empty()
2509            };
2510
2511            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2512            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2513
2514            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2515            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2516                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(
2517                    skip_ranges.autodoc_ranges,
2518                    byte_offset,
2519                );
2520            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2521            // This ensures content after `-->` on the same line is not incorrectly skipped
2522            let line_end_offset = byte_offset + line.len();
2523            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2524                skip_ranges.html_comment_ranges,
2525                byte_offset,
2526                line_end_offset,
2527            );
2528            // Use pulldown-cmark's list detection for context-aware parsing
2529            // This eliminates false positives on continuation lines (issue #253)
2530            let list_item =
2531                list_item_map
2532                    .get(&byte_offset)
2533                    .map(
2534                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2535                            marker: marker.clone(),
2536                            is_ordered: *is_ordered,
2537                            number: *number,
2538                            marker_column: *marker_column,
2539                            content_column: *content_column,
2540                        },
2541                    );
2542
2543            // Detect horizontal rules (only outside code blocks and frontmatter)
2544            // Uses CommonMark-compliant check including leading indentation validation
2545            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2546            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2547
2548            // Get math block status for this line
2549            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2550
2551            // Check if line is inside a Quarto div block
2552            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2553                && crate::utils::quarto_divs::is_within_div_block_ranges(skip_ranges.quarto_div_ranges, byte_offset);
2554
2555            // Check if line is inside a PyMdown Blocks region (/// ... ///)
2556            let in_pymdown_block = flavor == MarkdownFlavor::MkDocs
2557                && crate::utils::pymdown_blocks::is_within_block_ranges(skip_ranges.pymdown_block_ranges, byte_offset);
2558
2559            lines.push(LineInfo {
2560                byte_offset,
2561                byte_len: line.len(),
2562                indent,
2563                visual_indent,
2564                is_blank,
2565                in_code_block,
2566                in_front_matter,
2567                in_html_block: false, // Will be populated after line creation
2568                in_html_comment,
2569                list_item,
2570                heading: None,    // Will be populated in second pass for Setext headings
2571                blockquote: None, // Will be populated after line creation
2572                in_mkdocstrings,
2573                in_esm_block: false, // Will be populated after line creation for MDX files
2574                in_code_span_continuation: false, // Will be populated after code spans are parsed
2575                is_horizontal_rule: is_hr,
2576                in_math_block,
2577                in_quarto_div,
2578                in_jsx_expression: false,       // Will be populated for MDX files
2579                in_mdx_comment: false,          // Will be populated for MDX files
2580                in_jsx_component: false,        // Will be populated for MDX files
2581                in_jsx_fragment: false,         // Will be populated for MDX files
2582                in_admonition: false,           // Will be populated for MkDocs files
2583                in_content_tab: false,          // Will be populated for MkDocs files
2584                in_mkdocs_html_markdown: false, // Will be populated for MkDocs files
2585                in_definition_list: false,      // Will be populated for MkDocs files
2586                in_obsidian_comment: false,     // Will be populated for Obsidian files
2587                in_pymdown_block,
2588            });
2589        }
2590
2591        (lines, emphasis_spans)
2592    }
2593
2594    /// Detect headings and blockquotes (called after HTML block detection)
2595    fn detect_headings_and_blockquotes(
2596        content: &str,
2597        lines: &mut [LineInfo],
2598        flavor: MarkdownFlavor,
2599        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2600        link_byte_ranges: &[(usize, usize)],
2601    ) {
2602        // Regex for heading detection
2603        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2604            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2605        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2606            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2607
2608        let content_lines: Vec<&str> = content.lines().collect();
2609
2610        // Detect front matter boundaries to skip those lines
2611        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2612
2613        // Detect headings (including Setext which needs look-ahead) and blockquotes
2614        for i in 0..lines.len() {
2615            let line = content_lines[i];
2616
2617            // Detect blockquotes FIRST, before any skip conditions.
2618            // A line can be both a blockquote AND contain a code block inside it.
2619            // We need to know about the blockquote marker regardless of code block status.
2620            // Skip only frontmatter lines - those are never blockquotes.
2621            if !(front_matter_end > 0 && i < front_matter_end)
2622                && let Some(bq) = parse_blockquote_detailed(line)
2623            {
2624                let nesting_level = bq.markers.len();
2625                let marker_column = bq.indent.len();
2626                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2627                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2628                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2629                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2630
2631                lines[i].blockquote = Some(BlockquoteInfo {
2632                    nesting_level,
2633                    indent: bq.indent.to_string(),
2634                    marker_column,
2635                    prefix,
2636                    content: bq.content.to_string(),
2637                    has_no_space_after_marker: has_no_space,
2638                    has_multiple_spaces_after_marker: has_multiple_spaces,
2639                    needs_md028_fix,
2640                });
2641
2642                // Update is_horizontal_rule for blockquote content
2643                // The original detection doesn't strip blockquote prefix, so we need to check here
2644                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2645                    lines[i].is_horizontal_rule = true;
2646                }
2647            }
2648
2649            // Now apply skip conditions for heading detection
2650            if lines[i].in_code_block {
2651                continue;
2652            }
2653
2654            // Skip lines in front matter
2655            if front_matter_end > 0 && i < front_matter_end {
2656                continue;
2657            }
2658
2659            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2660            if lines[i].in_html_block {
2661                continue;
2662            }
2663
2664            // Skip heading detection for blank lines
2665            if lines[i].is_blank {
2666                continue;
2667            }
2668
2669            // Check for ATX headings (but skip MkDocs snippet lines)
2670            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2671            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2672                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2673                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2674            } else {
2675                false
2676            };
2677
2678            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2679                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2680                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2681                    continue;
2682                }
2683                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2684                // This prevents false positives where `#fragment` is detected as a heading
2685                let line_offset = lines[i].byte_offset;
2686                if link_byte_ranges
2687                    .iter()
2688                    .any(|&(start, end)| line_offset > start && line_offset < end)
2689                {
2690                    continue;
2691                }
2692                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2693                let hashes = caps.get(2).map_or("", |m| m.as_str());
2694                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2695                let rest = caps.get(4).map_or("", |m| m.as_str());
2696
2697                let level = hashes.len() as u8;
2698                let marker_column = leading_spaces.len();
2699
2700                // Check for closing sequence, but handle custom IDs that might come after
2701                let (text, has_closing, closing_seq) = {
2702                    // First check if there's a custom ID at the end
2703                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2704                        // Check if this looks like a valid custom ID (ends with })
2705                        if rest[id_start..].trim_end().ends_with('}') {
2706                            // Split off the custom ID
2707                            (&rest[..id_start], &rest[id_start..])
2708                        } else {
2709                            (rest, "")
2710                        }
2711                    } else {
2712                        (rest, "")
2713                    };
2714
2715                    // Now look for closing hashes in the part before the custom ID
2716                    let trimmed_rest = rest_without_id.trim_end();
2717                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2718                        // Find the start of the hash sequence by walking backwards
2719                        // Use char_indices to get byte positions at char boundaries
2720                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2721
2722                        // Find which char index corresponds to last_hash_byte_pos
2723                        let last_hash_char_idx = char_positions
2724                            .iter()
2725                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2726
2727                        if let Some(mut char_idx) = last_hash_char_idx {
2728                            // Walk backwards to find start of hash sequence
2729                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2730                                char_idx -= 1;
2731                            }
2732
2733                            // Get the byte position of the start of hashes
2734                            let start_of_hashes = char_positions[char_idx].0;
2735
2736                            // Check if there's at least one space before the closing hashes
2737                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2738
2739                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2740                            let potential_closing = &trimmed_rest[start_of_hashes..];
2741                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2742
2743                            if is_all_hashes && has_space_before {
2744                                // This is a closing sequence
2745                                let closing_hashes = potential_closing.to_string();
2746                                // The text is everything before the closing hashes
2747                                // Don't include the custom ID here - it will be extracted later
2748                                let text_part = if !custom_id_part.is_empty() {
2749                                    // If we have a custom ID, append it back to get the full rest
2750                                    // This allows the extract_header_id function to handle it properly
2751                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2752                                } else {
2753                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2754                                };
2755                                (text_part, true, closing_hashes)
2756                            } else {
2757                                // Not a valid closing sequence, return the full content
2758                                (rest.to_string(), false, String::new())
2759                            }
2760                        } else {
2761                            // Couldn't find char boundary, return the full content
2762                            (rest.to_string(), false, String::new())
2763                        }
2764                    } else {
2765                        // No hashes found, return the full content
2766                        (rest.to_string(), false, String::new())
2767                    }
2768                };
2769
2770                let content_column = marker_column + hashes.len() + spaces_after.len();
2771
2772                // Extract custom header ID if present
2773                let raw_text = text.trim().to_string();
2774                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2775
2776                // If no custom ID was found on the header line, check the next line for standalone attr-list
2777                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2778                    let next_line = content_lines[i + 1];
2779                    if !lines[i + 1].in_code_block
2780                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2781                        && let Some(next_line_id) =
2782                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2783                    {
2784                        custom_id = Some(next_line_id);
2785                    }
2786                }
2787
2788                // ATX heading is "valid" for processing by heading rules if:
2789                // 1. Has space after # (CommonMark compliant): `# Heading`
2790                // 2. Is empty (just hashes): `#`
2791                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2792                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2793                //
2794                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2795                // - `#tag` - single # with lowercase (social hashtag)
2796                // - `#123` - single # with number (GitHub issue ref)
2797                let is_valid = !spaces_after.is_empty()
2798                    || rest.is_empty()
2799                    || level > 1
2800                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2801
2802                lines[i].heading = Some(HeadingInfo {
2803                    level,
2804                    style: HeadingStyle::ATX,
2805                    marker: hashes.to_string(),
2806                    marker_column,
2807                    content_column,
2808                    text: clean_text,
2809                    custom_id,
2810                    raw_text,
2811                    has_closing_sequence: has_closing,
2812                    closing_sequence: closing_seq,
2813                    is_valid,
2814                });
2815            }
2816            // Check for Setext headings (need to look at next line)
2817            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2818                let next_line = content_lines[i + 1];
2819                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2820                    // Skip if next line is front matter delimiter
2821                    if front_matter_end > 0 && i < front_matter_end {
2822                        continue;
2823                    }
2824
2825                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2826                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2827                    {
2828                        continue;
2829                    }
2830
2831                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2832                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2833                    let content_line = line.trim();
2834
2835                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2836                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2837                        continue;
2838                    }
2839
2840                    // Skip underscore thematic breaks (___)
2841                    if content_line.starts_with('_') {
2842                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2843                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2844                            continue;
2845                        }
2846                    }
2847
2848                    // Skip numbered lists (1. Item, 2. Item, etc.)
2849                    if let Some(first_char) = content_line.chars().next()
2850                        && first_char.is_ascii_digit()
2851                    {
2852                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2853                        if num_end < content_line.len() {
2854                            let next = content_line.chars().nth(num_end);
2855                            if next == Some('.') || next == Some(')') {
2856                                continue;
2857                            }
2858                        }
2859                    }
2860
2861                    // Skip ATX headings
2862                    if ATX_HEADING_REGEX.is_match(line) {
2863                        continue;
2864                    }
2865
2866                    // Skip blockquotes
2867                    if content_line.starts_with('>') {
2868                        continue;
2869                    }
2870
2871                    // Skip code fences
2872                    let trimmed_start = line.trim_start();
2873                    if trimmed_start.len() >= 3 {
2874                        let first_three: String = trimmed_start.chars().take(3).collect();
2875                        if first_three == "```" || first_three == "~~~" {
2876                            continue;
2877                        }
2878                    }
2879
2880                    // Skip HTML blocks
2881                    if content_line.starts_with('<') {
2882                        continue;
2883                    }
2884
2885                    let underline = next_line.trim();
2886
2887                    let level = if underline.starts_with('=') { 1 } else { 2 };
2888                    let style = if level == 1 {
2889                        HeadingStyle::Setext1
2890                    } else {
2891                        HeadingStyle::Setext2
2892                    };
2893
2894                    // Extract custom header ID if present
2895                    let raw_text = line.trim().to_string();
2896                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2897
2898                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2899                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2900                        let attr_line = content_lines[i + 2];
2901                        if !lines[i + 2].in_code_block
2902                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2903                            && let Some(attr_line_id) =
2904                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2905                        {
2906                            custom_id = Some(attr_line_id);
2907                        }
2908                    }
2909
2910                    lines[i].heading = Some(HeadingInfo {
2911                        level,
2912                        style,
2913                        marker: underline.to_string(),
2914                        marker_column: next_line.len() - next_line.trim_start().len(),
2915                        content_column: lines[i].indent,
2916                        text: clean_text,
2917                        custom_id,
2918                        raw_text,
2919                        has_closing_sequence: false,
2920                        closing_sequence: String::new(),
2921                        is_valid: true, // Setext headings are always valid
2922                    });
2923                }
2924            }
2925        }
2926    }
2927
2928    /// Detect HTML blocks in the content
2929    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2930        // HTML block elements that trigger block context
2931        // Includes HTML5 media, embedded content, and interactive elements
2932        const BLOCK_ELEMENTS: &[&str] = &[
2933            "address",
2934            "article",
2935            "aside",
2936            "audio",
2937            "blockquote",
2938            "canvas",
2939            "details",
2940            "dialog",
2941            "dd",
2942            "div",
2943            "dl",
2944            "dt",
2945            "embed",
2946            "fieldset",
2947            "figcaption",
2948            "figure",
2949            "footer",
2950            "form",
2951            "h1",
2952            "h2",
2953            "h3",
2954            "h4",
2955            "h5",
2956            "h6",
2957            "header",
2958            "hr",
2959            "iframe",
2960            "li",
2961            "main",
2962            "menu",
2963            "nav",
2964            "noscript",
2965            "object",
2966            "ol",
2967            "p",
2968            "picture",
2969            "pre",
2970            "script",
2971            "search",
2972            "section",
2973            "source",
2974            "style",
2975            "summary",
2976            "svg",
2977            "table",
2978            "tbody",
2979            "td",
2980            "template",
2981            "textarea",
2982            "tfoot",
2983            "th",
2984            "thead",
2985            "tr",
2986            "track",
2987            "ul",
2988            "video",
2989        ];
2990
2991        let mut i = 0;
2992        while i < lines.len() {
2993            // Skip if already in code block or front matter
2994            if lines[i].in_code_block || lines[i].in_front_matter {
2995                i += 1;
2996                continue;
2997            }
2998
2999            let trimmed = lines[i].content(content).trim_start();
3000
3001            // Check if line starts with an HTML tag
3002            if trimmed.starts_with('<') && trimmed.len() > 1 {
3003                // Extract tag name safely
3004                let after_bracket = &trimmed[1..];
3005                let is_closing = after_bracket.starts_with('/');
3006                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
3007
3008                // Extract tag name (stop at space, >, /, or end of string)
3009                let tag_name = tag_start
3010                    .chars()
3011                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
3012                    .collect::<String>()
3013                    .to_lowercase();
3014
3015                // Check if it's a block element
3016                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
3017                    // Mark this line as in HTML block
3018                    lines[i].in_html_block = true;
3019
3020                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
3021                    // This avoids complex nesting logic that might cause infinite loops
3022                    // Only search for closing tag on subsequent lines if the opening tag
3023                    // does NOT have its closing tag on the same line
3024                    if !is_closing {
3025                        let closing_tag = format!("</{tag_name}>");
3026
3027                        // Check if closing tag is on the same line as opening tag
3028                        // (e.g., <script src="..."></script> or <style>.class{}</style>)
3029                        let same_line_close = lines[i].content(content).contains(&closing_tag);
3030
3031                        // Only search subsequent lines if the tag isn't self-closed on this line
3032                        if !same_line_close {
3033                            // style and script tags can contain blank lines (CSS/JS formatting)
3034                            let allow_blank_lines = tag_name == "style" || tag_name == "script";
3035                            let mut j = i + 1;
3036                            let mut found_closing_tag = false;
3037                            while j < lines.len() && j < i + 100 {
3038                                // Limit search to 100 lines
3039                                // Stop at blank lines (except for style/script tags)
3040                                if !allow_blank_lines && lines[j].is_blank {
3041                                    break;
3042                                }
3043
3044                                lines[j].in_html_block = true;
3045
3046                                // Check if this line contains the closing tag
3047                                if lines[j].content(content).contains(&closing_tag) {
3048                                    found_closing_tag = true;
3049                                }
3050
3051                                // After finding closing tag, continue marking lines as
3052                                // in_html_block until blank line (per CommonMark spec)
3053                                if found_closing_tag {
3054                                    j += 1;
3055                                    // Continue marking subsequent lines until blank
3056                                    while j < lines.len() && j < i + 100 {
3057                                        if lines[j].is_blank {
3058                                            break;
3059                                        }
3060                                        lines[j].in_html_block = true;
3061                                        j += 1;
3062                                    }
3063                                    break;
3064                                }
3065                                j += 1;
3066                            }
3067                        }
3068                    }
3069                }
3070            }
3071
3072            i += 1;
3073        }
3074    }
3075
3076    /// Detect ESM import/export blocks anywhere in MDX files
3077    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
3078    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3079        // Only process MDX files
3080        if !flavor.supports_esm_blocks() {
3081            return;
3082        }
3083
3084        let mut in_multiline_import = false;
3085
3086        for line in lines.iter_mut() {
3087            // Skip code blocks, front matter, and HTML comments
3088            if line.in_code_block || line.in_front_matter || line.in_html_comment {
3089                in_multiline_import = false;
3090                continue;
3091            }
3092
3093            let line_content = line.content(content);
3094            let trimmed = line_content.trim();
3095
3096            // Handle continuation of multi-line import/export
3097            if in_multiline_import {
3098                line.in_esm_block = true;
3099                // Check if this line completes the statement
3100                // Multi-line import ends when we see the closing quote + optional semicolon
3101                if trimmed.ends_with('\'')
3102                    || trimmed.ends_with('"')
3103                    || trimmed.ends_with("';")
3104                    || trimmed.ends_with("\";")
3105                    || line_content.contains(';')
3106                {
3107                    in_multiline_import = false;
3108                }
3109                continue;
3110            }
3111
3112            // Skip blank lines
3113            if line.is_blank {
3114                continue;
3115            }
3116
3117            // Check if line starts with import or export
3118            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3119                line.in_esm_block = true;
3120
3121                // Determine if this is a complete single-line statement or starts a multi-line one
3122                // Multi-line imports look like:
3123                //   import {
3124                //     Foo,
3125                //     Bar
3126                //   } from 'module'
3127                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3128                let is_import = trimmed.starts_with("import ");
3129
3130                // Check for simple complete statements
3131                let is_complete =
3132                    // Ends with semicolon
3133                    trimmed.ends_with(';')
3134                    // import/export with from clause that ends with quote
3135                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3136                    // Simple export (export const/let/var/function/class without from)
3137                    || (!is_import && !trimmed.contains(" from ") && (
3138                        trimmed.starts_with("export const ")
3139                        || trimmed.starts_with("export let ")
3140                        || trimmed.starts_with("export var ")
3141                        || trimmed.starts_with("export function ")
3142                        || trimmed.starts_with("export class ")
3143                        || trimmed.starts_with("export default ")
3144                    ));
3145
3146                if !is_complete && is_import {
3147                    // Only imports can span multiple lines in the typical case
3148                    // Check if it looks like the start of a multi-line import
3149                    // e.g., "import {" or "import type {"
3150                    if trimmed.contains('{') && !trimmed.contains('}') {
3151                        in_multiline_import = true;
3152                    }
3153                }
3154            }
3155        }
3156    }
3157
3158    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3159    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3160    fn detect_jsx_and_mdx_comments(
3161        content: &str,
3162        lines: &mut [LineInfo],
3163        flavor: MarkdownFlavor,
3164        code_blocks: &[(usize, usize)],
3165    ) -> (ByteRanges, ByteRanges) {
3166        // Only process MDX files
3167        if !flavor.supports_jsx() {
3168            return (Vec::new(), Vec::new());
3169        }
3170
3171        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3172        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3173
3174        // Quick check - if no braces, no JSX expressions or MDX comments
3175        if !content.contains('{') {
3176            return (jsx_expression_ranges, mdx_comment_ranges);
3177        }
3178
3179        let bytes = content.as_bytes();
3180        let mut i = 0;
3181
3182        while i < bytes.len() {
3183            if bytes[i] == b'{' {
3184                // Check if we're in a code block
3185                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3186                    i += 1;
3187                    continue;
3188                }
3189
3190                let start = i;
3191
3192                // Check if it's an MDX comment: {/* ... */}
3193                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3194                    // Find the closing */}
3195                    let mut j = i + 3;
3196                    while j + 2 < bytes.len() {
3197                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3198                            let end = j + 3;
3199                            mdx_comment_ranges.push((start, end));
3200
3201                            // Mark lines as in MDX comment
3202                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3203                                line.in_mdx_comment = true;
3204                            });
3205
3206                            i = end;
3207                            break;
3208                        }
3209                        j += 1;
3210                    }
3211                    if j + 2 >= bytes.len() {
3212                        // Unclosed MDX comment - mark rest as comment
3213                        mdx_comment_ranges.push((start, bytes.len()));
3214                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3215                            line.in_mdx_comment = true;
3216                        });
3217                        break;
3218                    }
3219                } else {
3220                    // Regular JSX expression: { ... }
3221                    // Need to handle nested braces
3222                    let mut brace_depth = 1;
3223                    let mut j = i + 1;
3224                    let mut in_string = false;
3225                    let mut string_char = b'"';
3226
3227                    while j < bytes.len() && brace_depth > 0 {
3228                        let c = bytes[j];
3229
3230                        // Handle strings to avoid counting braces inside them
3231                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3232                            in_string = true;
3233                            string_char = c;
3234                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3235                            in_string = false;
3236                        } else if !in_string {
3237                            if c == b'{' {
3238                                brace_depth += 1;
3239                            } else if c == b'}' {
3240                                brace_depth -= 1;
3241                            }
3242                        }
3243                        j += 1;
3244                    }
3245
3246                    if brace_depth == 0 {
3247                        let end = j;
3248                        jsx_expression_ranges.push((start, end));
3249
3250                        // Mark lines as in JSX expression
3251                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3252                            line.in_jsx_expression = true;
3253                        });
3254
3255                        i = end;
3256                    } else {
3257                        i += 1;
3258                    }
3259                }
3260            } else {
3261                i += 1;
3262            }
3263        }
3264
3265        (jsx_expression_ranges, mdx_comment_ranges)
3266    }
3267
3268    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3269    /// and populate the corresponding fields in LineInfo
3270    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3271        if flavor != MarkdownFlavor::MkDocs {
3272            return;
3273        }
3274
3275        use crate::utils::mkdocs_admonitions;
3276        use crate::utils::mkdocs_definition_lists;
3277        use crate::utils::mkdocs_tabs;
3278
3279        let content_lines: Vec<&str> = content.lines().collect();
3280
3281        // Track admonition context
3282        let mut in_admonition = false;
3283        let mut admonition_indent = 0;
3284
3285        // Track tab context
3286        let mut in_tab = false;
3287        let mut tab_indent = 0;
3288
3289        // Track fenced code blocks within MkDocs containers (separate from pulldown-cmark detection)
3290        let mut in_mkdocs_fenced_code = false;
3291        let mut mkdocs_fence_marker: Option<String> = None;
3292
3293        // Track definition list context
3294        let mut in_definition = false;
3295
3296        // Track markdown-enabled HTML block context (grid cards, etc.)
3297        let mut markdown_html_tracker = MarkdownHtmlTracker::new();
3298
3299        for (i, line) in content_lines.iter().enumerate() {
3300            if i >= lines.len() {
3301                break;
3302            }
3303
3304            // Check for admonition markers first - even on lines marked as code blocks
3305            // Pulldown-cmark marks 4-space indented content as indented code blocks,
3306            // but in MkDocs this is admonition/tab content, not code.
3307            if mkdocs_admonitions::is_admonition_start(line) {
3308                in_admonition = true;
3309                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3310                lines[i].in_admonition = true;
3311            } else if in_admonition {
3312                // Check if still in admonition content
3313                if line.trim().is_empty() {
3314                    // Blank lines are part of admonitions
3315                    lines[i].in_admonition = true;
3316                    // Override code block detection for blank lines inside admonitions
3317                    lines[i].in_code_block = false;
3318                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3319                    lines[i].in_admonition = true;
3320                    // Override code block detection - this is admonition content, not code
3321                    lines[i].in_code_block = false;
3322                } else {
3323                    // End of admonition
3324                    in_admonition = false;
3325                    // Check if this line starts a new admonition
3326                    if mkdocs_admonitions::is_admonition_start(line) {
3327                        in_admonition = true;
3328                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3329                        lines[i].in_admonition = true;
3330                    }
3331                }
3332            }
3333
3334            // Check for tab markers - also before the code block skip
3335            // Tab content also uses 4-space indentation which pulldown-cmark treats as code
3336            if mkdocs_tabs::is_tab_marker(line) {
3337                in_tab = true;
3338                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3339                lines[i].in_content_tab = true;
3340                // Reset fenced code tracking when entering new tab
3341                in_mkdocs_fenced_code = false;
3342                mkdocs_fence_marker = None;
3343            } else if in_tab {
3344                let trimmed = line.trim();
3345
3346                // Track fenced code blocks within tabs
3347                if !in_mkdocs_fenced_code {
3348                    // Check for fence start (``` or ~~~)
3349                    if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3350                        let fence_char = trimmed.chars().next().unwrap();
3351                        let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
3352                        if fence_len >= 3 {
3353                            in_mkdocs_fenced_code = true;
3354                            mkdocs_fence_marker = Some(fence_char.to_string().repeat(fence_len));
3355                        }
3356                    }
3357                } else if let Some(ref marker) = mkdocs_fence_marker {
3358                    // Check for fence end (same or more chars)
3359                    let fence_char = marker.chars().next().unwrap();
3360                    if trimmed.starts_with(marker.as_str())
3361                        && trimmed
3362                            .chars()
3363                            .skip(marker.len())
3364                            .all(|c| c == fence_char || c.is_whitespace())
3365                    {
3366                        in_mkdocs_fenced_code = false;
3367                        mkdocs_fence_marker = None;
3368                    }
3369                }
3370
3371                // Check if still in tab content
3372                if line.trim().is_empty() {
3373                    // Blank lines are part of tabs
3374                    lines[i].in_content_tab = true;
3375                    // Only override code block if not in a fenced code block
3376                    if !in_mkdocs_fenced_code {
3377                        lines[i].in_code_block = false;
3378                    }
3379                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3380                    lines[i].in_content_tab = true;
3381                    // Override INDENTED code block detection - this is tab content, not code
3382                    // But preserve fenced code block detection (```...```)
3383                    if !in_mkdocs_fenced_code {
3384                        lines[i].in_code_block = false;
3385                    }
3386                } else {
3387                    // End of tab content
3388                    in_tab = false;
3389                    in_mkdocs_fenced_code = false;
3390                    mkdocs_fence_marker = None;
3391                    // Check if this line starts a new tab
3392                    if mkdocs_tabs::is_tab_marker(line) {
3393                        in_tab = true;
3394                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3395                        lines[i].in_content_tab = true;
3396                    }
3397                }
3398            }
3399
3400            // Check for markdown-enabled HTML blocks (grid cards, etc.)
3401            // Supports div, section, article, aside, details, figure, footer, header, main, nav
3402            // with markdown, markdown="1", or markdown="block" attributes
3403            lines[i].in_mkdocs_html_markdown = markdown_html_tracker.process_line(line);
3404
3405            // Skip remaining detection for lines in actual code blocks
3406            if lines[i].in_code_block {
3407                continue;
3408            }
3409
3410            // Check for definition list items
3411            if mkdocs_definition_lists::is_definition_line(line) {
3412                in_definition = true;
3413                lines[i].in_definition_list = true;
3414            } else if in_definition {
3415                // Check if continuation
3416                if mkdocs_definition_lists::is_definition_continuation(line) {
3417                    lines[i].in_definition_list = true;
3418                } else if line.trim().is_empty() {
3419                    // Blank line might continue definition
3420                    lines[i].in_definition_list = true;
3421                } else if mkdocs_definition_lists::could_be_term_line(line) {
3422                    // This could be a new term - check if followed by definition
3423                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3424                    {
3425                        lines[i].in_definition_list = true;
3426                    } else {
3427                        in_definition = false;
3428                    }
3429                } else {
3430                    in_definition = false;
3431                }
3432            } else if mkdocs_definition_lists::could_be_term_line(line) {
3433                // Check if this is a term followed by a definition
3434                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3435                    lines[i].in_definition_list = true;
3436                    in_definition = true;
3437                }
3438            }
3439        }
3440    }
3441
3442    /// Detect Obsidian comment blocks (%%...%%) in Obsidian flavor
3443    ///
3444    /// Obsidian comments use `%%` as delimiters:
3445    /// - Inline: `text %%hidden%% text`
3446    /// - Block: `%%\nmulti-line\n%%`
3447    ///
3448    /// Comments do NOT nest - the first `%%` after an opening `%%` closes the comment.
3449    /// Comments are NOT detected inside code blocks or HTML comments.
3450    ///
3451    /// Returns the computed comment ranges for use by rules that need position-level checking.
3452    fn detect_obsidian_comments(
3453        content: &str,
3454        lines: &mut [LineInfo],
3455        flavor: MarkdownFlavor,
3456        code_span_ranges: &[(usize, usize)],
3457    ) -> Vec<(usize, usize)> {
3458        // Only process Obsidian files
3459        if flavor != MarkdownFlavor::Obsidian {
3460            return Vec::new();
3461        }
3462
3463        // Compute Obsidian comment ranges (byte ranges)
3464        let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3465
3466        // Mark lines that fall within comment ranges
3467        for range in &comment_ranges {
3468            for line in lines.iter_mut() {
3469                // Skip lines in code blocks or HTML comments - they take precedence
3470                if line.in_code_block || line.in_html_comment {
3471                    continue;
3472                }
3473
3474                let line_start = line.byte_offset;
3475                let line_end = line.byte_offset + line.byte_len;
3476
3477                // Check if this line is entirely within a comment
3478                // A line is "in" a comment if it starts within or after the comment start
3479                // AND ends within or before the comment end
3480                if line_start >= range.0 && line_end <= range.1 {
3481                    line.in_obsidian_comment = true;
3482                } else if line_start < range.1 && line_end > range.0 {
3483                    // Line partially overlaps with comment - check if the overlap is significant
3484                    // For inline comments on a line, we still mark the line if any part is in comment
3485                    // However, for the filtered_lines API, we only skip lines entirely within comments
3486                    // This matches the behavior of HTML comments
3487
3488                    // Check if the ENTIRE line content (excluding leading/trailing whitespace)
3489                    // is within the comment range
3490                    let line_content_start = line_start;
3491                    let line_content_end = line_end;
3492
3493                    if line_content_start >= range.0 && line_content_end <= range.1 {
3494                        line.in_obsidian_comment = true;
3495                    }
3496                }
3497            }
3498        }
3499
3500        comment_ranges
3501    }
3502
3503    /// Compute byte ranges for all Obsidian comments in the content
3504    ///
3505    /// Returns a vector of (start, end) byte offset pairs for each comment.
3506    /// Comments do not nest - first `%%` after an opening `%%` closes it.
3507    fn compute_obsidian_comment_ranges(
3508        content: &str,
3509        lines: &[LineInfo],
3510        code_span_ranges: &[(usize, usize)],
3511    ) -> Vec<(usize, usize)> {
3512        let mut ranges = Vec::new();
3513
3514        // Quick check - if no %% at all, no comments
3515        if !content.contains("%%") {
3516            return ranges;
3517        }
3518
3519        // Build skip ranges for code blocks, HTML comments, and inline code spans
3520        // to avoid detecting %% inside those regions.
3521        let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3522        for line in lines {
3523            if line.in_code_block || line.in_html_comment {
3524                skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3525            }
3526        }
3527        skip_ranges.extend(code_span_ranges.iter().copied());
3528
3529        if !skip_ranges.is_empty() {
3530            // Sort and merge overlapping ranges for efficient scanning
3531            skip_ranges.sort_by_key(|(start, _)| *start);
3532            let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3533            for (start, end) in skip_ranges {
3534                if let Some((_, last_end)) = merged.last_mut()
3535                    && start <= *last_end
3536                {
3537                    *last_end = (*last_end).max(end);
3538                    continue;
3539                }
3540                merged.push((start, end));
3541            }
3542            skip_ranges = merged;
3543        }
3544
3545        let content_bytes = content.as_bytes();
3546        let len = content.len();
3547        let mut i = 0;
3548        let mut in_comment = false;
3549        let mut comment_start = 0;
3550        let mut skip_idx = 0;
3551
3552        while i < len.saturating_sub(1) {
3553            // Fast-skip any ranges we should ignore (code blocks, HTML comments, code spans)
3554            if skip_idx < skip_ranges.len() {
3555                let (skip_start, skip_end) = skip_ranges[skip_idx];
3556                if i >= skip_end {
3557                    skip_idx += 1;
3558                    continue;
3559                }
3560                if i >= skip_start {
3561                    i = skip_end;
3562                    continue;
3563                }
3564            }
3565
3566            // Check for %%
3567            if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3568                if !in_comment {
3569                    // Opening %%
3570                    in_comment = true;
3571                    comment_start = i;
3572                    i += 2;
3573                } else {
3574                    // Closing %%
3575                    let comment_end = i + 2;
3576                    ranges.push((comment_start, comment_end));
3577                    in_comment = false;
3578                    i += 2;
3579                }
3580            } else {
3581                i += 1;
3582            }
3583        }
3584
3585        // Handle unclosed comment - extends to end of document
3586        if in_comment {
3587            ranges.push((comment_start, len));
3588        }
3589
3590        ranges
3591    }
3592
3593    /// Helper to mark lines within a byte range
3594    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3595    where
3596        F: FnMut(&mut LineInfo),
3597    {
3598        // Find lines that overlap with the range
3599        for line in lines.iter_mut() {
3600            let line_start = line.byte_offset;
3601            let line_end = line.byte_offset + line.byte_len;
3602
3603            // Check if this line overlaps with the range
3604            if line_start < end && line_end > start {
3605                f(line);
3606            }
3607        }
3608
3609        // Silence unused warning for content (needed for signature consistency)
3610        let _ = content;
3611    }
3612
3613    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3614    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3615        // Quick check - if no backticks, no code spans
3616        if !content.contains('`') {
3617            return Vec::new();
3618        }
3619
3620        // Use pulldown-cmark's streaming parser with byte offsets
3621        let parser = Parser::new(content).into_offset_iter();
3622        let mut ranges = Vec::new();
3623
3624        for (event, range) in parser {
3625            if let Event::Code(_) = event {
3626                ranges.push((range.start, range.end));
3627            }
3628        }
3629
3630        Self::build_code_spans_from_ranges(content, lines, &ranges)
3631    }
3632
3633    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3634        let mut code_spans = Vec::new();
3635        if ranges.is_empty() {
3636            return code_spans;
3637        }
3638
3639        for &(start_pos, end_pos) in ranges {
3640            // The range includes the backticks, extract the actual content
3641            let full_span = &content[start_pos..end_pos];
3642            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3643
3644            // Extract content between backticks, preserving spaces
3645            let content_start = start_pos + backtick_count;
3646            let content_end = end_pos - backtick_count;
3647            let span_content = if content_start < content_end {
3648                content[content_start..content_end].to_string()
3649            } else {
3650                String::new()
3651            };
3652
3653            // Use binary search to find line number - O(log n) instead of O(n)
3654            // Find the rightmost line whose byte_offset <= start_pos
3655            let line_idx = lines
3656                .partition_point(|line| line.byte_offset <= start_pos)
3657                .saturating_sub(1);
3658            let line_num = line_idx + 1;
3659            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3660
3661            // Find end column using binary search
3662            let end_line_idx = lines
3663                .partition_point(|line| line.byte_offset <= end_pos)
3664                .saturating_sub(1);
3665            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3666
3667            // Convert byte offsets to character positions for correct Unicode handling
3668            // This ensures consistency with warning.column which uses character positions
3669            let line_content = lines[line_idx].content(content);
3670            let col_start = if byte_col_start <= line_content.len() {
3671                line_content[..byte_col_start].chars().count()
3672            } else {
3673                line_content.chars().count()
3674            };
3675
3676            let end_line_content = lines[end_line_idx].content(content);
3677            let col_end = if byte_col_end <= end_line_content.len() {
3678                end_line_content[..byte_col_end].chars().count()
3679            } else {
3680                end_line_content.chars().count()
3681            };
3682
3683            code_spans.push(CodeSpan {
3684                line: line_num,
3685                end_line: end_line_idx + 1,
3686                start_col: col_start,
3687                end_col: col_end,
3688                byte_offset: start_pos,
3689                byte_end: end_pos,
3690                backtick_count,
3691                content: span_content,
3692            });
3693        }
3694
3695        // Sort by position to ensure consistent ordering
3696        code_spans.sort_by_key(|span| span.byte_offset);
3697
3698        code_spans
3699    }
3700
3701    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3702    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3703        let mut math_spans = Vec::new();
3704
3705        // Quick check - if no $ signs, no math spans
3706        if !content.contains('$') {
3707            return math_spans;
3708        }
3709
3710        // Use pulldown-cmark with ENABLE_MATH option
3711        let mut options = Options::empty();
3712        options.insert(Options::ENABLE_MATH);
3713        let parser = Parser::new_ext(content, options).into_offset_iter();
3714
3715        for (event, range) in parser {
3716            let (is_display, math_content) = match &event {
3717                Event::InlineMath(text) => (false, text.as_ref()),
3718                Event::DisplayMath(text) => (true, text.as_ref()),
3719                _ => continue,
3720            };
3721
3722            let start_pos = range.start;
3723            let end_pos = range.end;
3724
3725            // Use binary search to find line number - O(log n) instead of O(n)
3726            let line_idx = lines
3727                .partition_point(|line| line.byte_offset <= start_pos)
3728                .saturating_sub(1);
3729            let line_num = line_idx + 1;
3730            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3731
3732            // Find end column using binary search
3733            let end_line_idx = lines
3734                .partition_point(|line| line.byte_offset <= end_pos)
3735                .saturating_sub(1);
3736            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3737
3738            // Convert byte offsets to character positions for correct Unicode handling
3739            let line_content = lines[line_idx].content(content);
3740            let col_start = if byte_col_start <= line_content.len() {
3741                line_content[..byte_col_start].chars().count()
3742            } else {
3743                line_content.chars().count()
3744            };
3745
3746            let end_line_content = lines[end_line_idx].content(content);
3747            let col_end = if byte_col_end <= end_line_content.len() {
3748                end_line_content[..byte_col_end].chars().count()
3749            } else {
3750                end_line_content.chars().count()
3751            };
3752
3753            math_spans.push(MathSpan {
3754                line: line_num,
3755                end_line: end_line_idx + 1,
3756                start_col: col_start,
3757                end_col: col_end,
3758                byte_offset: start_pos,
3759                byte_end: end_pos,
3760                is_display,
3761                content: math_content.to_string(),
3762            });
3763        }
3764
3765        // Sort by position to ensure consistent ordering
3766        math_spans.sort_by_key(|span| span.byte_offset);
3767
3768        math_spans
3769    }
3770
3771    /// Parse all list blocks in the content (legacy line-by-line approach)
3772    ///
3773    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3774    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3775    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3776    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3777    ///   treated as list continuation (based on the list marker width)
3778    ///
3779    /// When a new list item is encountered, we check if list-breaking content was seen
3780    /// since the last item. If so, we start a new list block.
3781    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3782        // Minimum indentation for unordered list continuation per CommonMark spec
3783        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3784
3785        /// Initialize or reset the forward-scanning tracking state.
3786        /// This helper eliminates code duplication across three initialization sites.
3787        #[inline]
3788        fn reset_tracking_state(
3789            list_item: &ListItemInfo,
3790            has_list_breaking_content: &mut bool,
3791            min_continuation: &mut usize,
3792        ) {
3793            *has_list_breaking_content = false;
3794            let marker_width = if list_item.is_ordered {
3795                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3796            } else {
3797                list_item.marker.len()
3798            };
3799            *min_continuation = if list_item.is_ordered {
3800                marker_width
3801            } else {
3802                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3803            };
3804        }
3805
3806        // Pre-size based on lines that could be list items
3807        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3808        let mut current_block: Option<ListBlock> = None;
3809        let mut last_list_item_line = 0;
3810        let mut current_indent_level = 0;
3811        let mut last_marker_width = 0;
3812
3813        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3814        let mut has_list_breaking_content_since_last_item = false;
3815        let mut min_continuation_for_tracking = 0;
3816
3817        for (line_idx, line_info) in lines.iter().enumerate() {
3818            let line_num = line_idx + 1;
3819
3820            // Enhanced code block handling using Design #3's context analysis
3821            if line_info.in_code_block {
3822                if let Some(ref mut block) = current_block {
3823                    // Calculate minimum indentation for list continuation
3824                    let min_continuation_indent =
3825                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3826
3827                    // Analyze code block context using the three-tier classification
3828                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3829
3830                    match context {
3831                        CodeBlockContext::Indented => {
3832                            // Code block is properly indented - continues the list
3833                            block.end_line = line_num;
3834                            continue;
3835                        }
3836                        CodeBlockContext::Standalone => {
3837                            // Code block separates lists - end current block
3838                            let completed_block = current_block.take().unwrap();
3839                            list_blocks.push(completed_block);
3840                            continue;
3841                        }
3842                        CodeBlockContext::Adjacent => {
3843                            // Edge case - use conservative behavior (continue list)
3844                            block.end_line = line_num;
3845                            continue;
3846                        }
3847                    }
3848                } else {
3849                    // No current list block - skip code block lines
3850                    continue;
3851                }
3852            }
3853
3854            // Extract blockquote prefix if any
3855            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3856                caps.get(0).unwrap().as_str().to_string()
3857            } else {
3858                String::new()
3859            };
3860
3861            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3862            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3863            if let Some(ref block) = current_block
3864                && line_info.list_item.is_none()
3865                && !line_info.is_blank
3866                && !line_info.in_code_span_continuation
3867            {
3868                let line_content = line_info.content(content).trim();
3869
3870                // Check for structural separators that break lists
3871                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3872                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3873                // as they indicate improper indentation rather than lazy continuation.
3874                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3875
3876                // Check if blockquote context changes (different prefix than current block)
3877                // Lines within the SAME blockquote context don't break lists
3878                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3879
3880                let breaks_list = line_info.heading.is_some()
3881                    || line_content.starts_with("---")
3882                    || line_content.starts_with("***")
3883                    || line_content.starts_with("___")
3884                    || crate::utils::skip_context::is_table_line(line_content)
3885                    || blockquote_prefix_changes
3886                    || (line_info.indent > 0
3887                        && line_info.indent < min_continuation_for_tracking
3888                        && !is_lazy_continuation);
3889
3890                if breaks_list {
3891                    has_list_breaking_content_since_last_item = true;
3892                }
3893            }
3894
3895            // If this line is a code span continuation within an active list block,
3896            // extend the block's end_line to include this line (maintains list continuity)
3897            if line_info.in_code_span_continuation
3898                && line_info.list_item.is_none()
3899                && let Some(ref mut block) = current_block
3900            {
3901                block.end_line = line_num;
3902            }
3903
3904            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3905            // properly indented lines within the list). This ensures the workaround at line 2448
3906            // works correctly when there are multiple continuation lines before a nested list item.
3907            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3908            // For blockquote lines, compute effective indent after stripping the prefix
3909            let effective_continuation_indent = if let Some(ref block) = current_block {
3910                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3911                let line_content = line_info.content(content);
3912                let line_bq_level = line_content
3913                    .chars()
3914                    .take_while(|c| *c == '>' || c.is_whitespace())
3915                    .filter(|&c| c == '>')
3916                    .count();
3917                if line_bq_level > 0 && line_bq_level == block_bq_level {
3918                    // Compute indent after blockquote markers
3919                    let mut pos = 0;
3920                    let mut found_markers = 0;
3921                    for c in line_content.chars() {
3922                        pos += c.len_utf8();
3923                        if c == '>' {
3924                            found_markers += 1;
3925                            if found_markers == line_bq_level {
3926                                if line_content.get(pos..pos + 1) == Some(" ") {
3927                                    pos += 1;
3928                                }
3929                                break;
3930                            }
3931                        }
3932                    }
3933                    let after_bq = &line_content[pos..];
3934                    after_bq.len() - after_bq.trim_start().len()
3935                } else {
3936                    line_info.indent
3937                }
3938            } else {
3939                line_info.indent
3940            };
3941            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3942                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3943                if block_bq_level > 0 {
3944                    if block.is_ordered { last_marker_width } else { 2 }
3945                } else {
3946                    min_continuation_for_tracking
3947                }
3948            } else {
3949                min_continuation_for_tracking
3950            };
3951            // Lazy continuation allows unindented text to continue a list item,
3952            // but NOT structural elements like headings, code fences, or horizontal rules
3953            let is_structural_element = line_info.heading.is_some()
3954                || line_info.content(content).trim().starts_with("```")
3955                || line_info.content(content).trim().starts_with("~~~");
3956            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3957                || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3958
3959            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3960                eprintln!(
3961                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3962                    line_num,
3963                    effective_continuation_indent,
3964                    adjusted_min_continuation_for_tracking,
3965                    is_valid_continuation,
3966                    line_info.in_code_span_continuation,
3967                    line_info.in_code_block,
3968                    current_block.is_some()
3969                );
3970            }
3971
3972            if !line_info.in_code_span_continuation
3973                && line_info.list_item.is_none()
3974                && !line_info.is_blank
3975                && !line_info.in_code_block
3976                && is_valid_continuation
3977                && let Some(ref mut block) = current_block
3978            {
3979                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3980                    eprintln!(
3981                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3982                        line_num, block.end_line, line_num
3983                    );
3984                }
3985                block.end_line = line_num;
3986            }
3987
3988            // Check if this line is a list item
3989            if let Some(list_item) = &line_info.list_item {
3990                // Calculate nesting level based on indentation
3991                let item_indent = list_item.marker_column;
3992                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3993
3994                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3995                    eprintln!(
3996                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3997                        line_num, list_item.marker, item_indent
3998                    );
3999                }
4000
4001                if let Some(ref mut block) = current_block {
4002                    // Check if this continues the current block
4003                    // For nested lists, we need to check if this is a nested item (higher nesting level)
4004                    // or a continuation at the same or lower level
4005                    let is_nested = nesting > block.nesting_level;
4006                    let same_type =
4007                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
4008                    let same_context = block.blockquote_prefix == blockquote_prefix;
4009                    // Allow one blank line after last item, or lines immediately after block content
4010                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
4011
4012                    // For unordered lists, also check marker consistency
4013                    let marker_compatible =
4014                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
4015
4016                    // O(1) check: Use the tracked variable instead of O(n) nested loop
4017                    // This eliminates the quadratic bottleneck from issue #148
4018                    let has_non_list_content = has_list_breaking_content_since_last_item;
4019
4020                    // A list continues if:
4021                    // 1. It's a nested item (indented more than the parent), OR
4022                    // 2. It's the same type at the same level with reasonable distance
4023                    let mut continues_list = if is_nested {
4024                        // Nested items always continue the list if they're in the same context
4025                        same_context && reasonable_distance && !has_non_list_content
4026                    } else {
4027                        // Same-level items need to match type and markers
4028                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
4029                    };
4030
4031                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4032                        eprintln!(
4033                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
4034                            line_num,
4035                            continues_list,
4036                            is_nested,
4037                            same_type,
4038                            same_context,
4039                            reasonable_distance,
4040                            marker_compatible,
4041                            has_non_list_content,
4042                            last_list_item_line,
4043                            block.end_line
4044                        );
4045                    }
4046
4047                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
4048                    // This handles edge cases where content patterns might otherwise split lists incorrectly
4049                    // Apply for: nested items (different types OK), OR same-level same-type items
4050                    if !continues_list
4051                        && (is_nested || same_type)
4052                        && reasonable_distance
4053                        && line_num > 0
4054                        && block.end_line == line_num - 1
4055                    {
4056                        // Check if the previous line was a list item or a continuation of a list item
4057                        // (including lazy continuation lines)
4058                        if block.item_lines.contains(&(line_num - 1)) {
4059                            // They're consecutive list items - force them to be in the same list
4060                            continues_list = true;
4061                        } else {
4062                            // Previous line is a continuation line within this block
4063                            // (e.g., lazy continuation with indent=0)
4064                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
4065                            continues_list = true;
4066                        }
4067                    }
4068
4069                    if continues_list {
4070                        // Extend current block
4071                        block.end_line = line_num;
4072                        block.item_lines.push(line_num);
4073
4074                        // Update max marker width
4075                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
4076                            list_item.marker.len() + 1
4077                        } else {
4078                            list_item.marker.len()
4079                        });
4080
4081                        // Update marker consistency for unordered lists
4082                        if !block.is_ordered
4083                            && block.marker.is_some()
4084                            && block.marker.as_ref() != Some(&list_item.marker)
4085                        {
4086                            // Mixed markers, clear the marker field
4087                            block.marker = None;
4088                        }
4089
4090                        // Reset tracked state for issue #148 optimization
4091                        reset_tracking_state(
4092                            list_item,
4093                            &mut has_list_breaking_content_since_last_item,
4094                            &mut min_continuation_for_tracking,
4095                        );
4096                    } else {
4097                        // End current block and start a new one
4098                        // When a different list type starts AT THE SAME LEVEL (not nested),
4099                        // trim back lazy continuation lines (they become part of the gap, not the list)
4100                        // For nested items, different types are fine - they're sub-lists
4101                        if !same_type
4102                            && !is_nested
4103                            && let Some(&last_item) = block.item_lines.last()
4104                        {
4105                            block.end_line = last_item;
4106                        }
4107
4108                        list_blocks.push(block.clone());
4109
4110                        *block = ListBlock {
4111                            start_line: line_num,
4112                            end_line: line_num,
4113                            is_ordered: list_item.is_ordered,
4114                            marker: if list_item.is_ordered {
4115                                None
4116                            } else {
4117                                Some(list_item.marker.clone())
4118                            },
4119                            blockquote_prefix: blockquote_prefix.clone(),
4120                            item_lines: vec![line_num],
4121                            nesting_level: nesting,
4122                            max_marker_width: if list_item.is_ordered {
4123                                list_item.marker.len() + 1
4124                            } else {
4125                                list_item.marker.len()
4126                            },
4127                        };
4128
4129                        // Initialize tracked state for new block (issue #148 optimization)
4130                        reset_tracking_state(
4131                            list_item,
4132                            &mut has_list_breaking_content_since_last_item,
4133                            &mut min_continuation_for_tracking,
4134                        );
4135                    }
4136                } else {
4137                    // Start a new block
4138                    current_block = Some(ListBlock {
4139                        start_line: line_num,
4140                        end_line: line_num,
4141                        is_ordered: list_item.is_ordered,
4142                        marker: if list_item.is_ordered {
4143                            None
4144                        } else {
4145                            Some(list_item.marker.clone())
4146                        },
4147                        blockquote_prefix,
4148                        item_lines: vec![line_num],
4149                        nesting_level: nesting,
4150                        max_marker_width: list_item.marker.len(),
4151                    });
4152
4153                    // Initialize tracked state for new block (issue #148 optimization)
4154                    reset_tracking_state(
4155                        list_item,
4156                        &mut has_list_breaking_content_since_last_item,
4157                        &mut min_continuation_for_tracking,
4158                    );
4159                }
4160
4161                last_list_item_line = line_num;
4162                current_indent_level = item_indent;
4163                last_marker_width = if list_item.is_ordered {
4164                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
4165                } else {
4166                    list_item.marker.len()
4167                };
4168            } else if let Some(ref mut block) = current_block {
4169                // Not a list item - check if it continues the current block
4170                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4171                    eprintln!(
4172                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4173                        line_num, line_info.is_blank
4174                    );
4175                }
4176
4177                // For MD032 compatibility, we use a simple approach:
4178                // - Indented lines continue the list
4179                // - Blank lines followed by indented content continue the list
4180                // - Everything else ends the list
4181
4182                // Check if the last line in the list block ended with a backslash (hard line break)
4183                // This handles cases where list items use backslash for hard line breaks
4184                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4185                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4186                } else {
4187                    false
4188                };
4189
4190                // Calculate minimum indentation for list continuation
4191                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
4192                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
4193                let min_continuation_indent = if block.is_ordered {
4194                    current_indent_level + last_marker_width
4195                } else {
4196                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
4197                };
4198
4199                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4200                    // Indented line or backslash continuation continues the list
4201                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4202                        eprintln!(
4203                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4204                            line_num, line_info.indent, min_continuation_indent
4205                        );
4206                    }
4207                    block.end_line = line_num;
4208                } else if line_info.is_blank {
4209                    // Blank line - check if it's internal to the list or ending it
4210                    // We only include blank lines that are followed by more list content
4211                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4212                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4213                    }
4214                    let mut check_idx = line_idx + 1;
4215                    let mut found_continuation = false;
4216
4217                    // Skip additional blank lines
4218                    while check_idx < lines.len() && lines[check_idx].is_blank {
4219                        check_idx += 1;
4220                    }
4221
4222                    if check_idx < lines.len() {
4223                        let next_line = &lines[check_idx];
4224                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
4225                        let next_content = next_line.content(content);
4226                        // Use blockquote level (count of >) to compare, not the full prefix
4227                        // This avoids issues where the regex captures extra whitespace
4228                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4229                        let next_bq_level_for_indent = next_content
4230                            .chars()
4231                            .take_while(|c| *c == '>' || c.is_whitespace())
4232                            .filter(|&c| c == '>')
4233                            .count();
4234                        let effective_indent =
4235                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4236                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
4237                                // Find position after ">" and one space
4238                                let mut pos = 0;
4239                                let mut found_markers = 0;
4240                                for c in next_content.chars() {
4241                                    pos += c.len_utf8();
4242                                    if c == '>' {
4243                                        found_markers += 1;
4244                                        if found_markers == next_bq_level_for_indent {
4245                                            // Skip optional space after last >
4246                                            if next_content.get(pos..pos + 1) == Some(" ") {
4247                                                pos += 1;
4248                                            }
4249                                            break;
4250                                        }
4251                                    }
4252                                }
4253                                let after_blockquote_marker = &next_content[pos..];
4254                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4255                            } else {
4256                                next_line.indent
4257                            };
4258                        // Also adjust min_continuation_indent for blockquote lists
4259                        // The marker_column includes blockquote prefix, so subtract it
4260                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4261                            // For blockquote lists, the continuation is relative to blockquote content
4262                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
4263                            if block.is_ordered { last_marker_width } else { 2 }
4264                        } else {
4265                            min_continuation_indent
4266                        };
4267                        // Check if followed by indented content (list continuation)
4268                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4269                            eprintln!(
4270                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4271                                line_num,
4272                                check_idx + 1,
4273                                effective_indent,
4274                                adjusted_min_continuation,
4275                                next_line.list_item.is_some(),
4276                                next_line.in_code_block
4277                            );
4278                        }
4279                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4280                            found_continuation = true;
4281                        }
4282                        // Check if followed by another list item at the same level
4283                        else if !next_line.in_code_block
4284                            && next_line.list_item.is_some()
4285                            && let Some(item) = &next_line.list_item
4286                        {
4287                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4288                                .find(next_line.content(content))
4289                                .map_or(String::new(), |m| m.as_str().to_string());
4290                            if item.marker_column == current_indent_level
4291                                && item.is_ordered == block.is_ordered
4292                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4293                            {
4294                                // Check if there was meaningful content between the list items (unused now)
4295                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
4296                                // Pre-compute block's blockquote level for use in closures
4297                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4298                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4299                                    if let Some(between_line) = lines.get(idx) {
4300                                        let between_content = between_line.content(content);
4301                                        let trimmed = between_content.trim();
4302                                        // Skip empty lines
4303                                        if trimmed.is_empty() {
4304                                            return false;
4305                                        }
4306                                        // Check for meaningful content
4307                                        let line_indent = between_content.len() - between_content.trim_start().len();
4308
4309                                        // Check if blockquote level changed (not just if line starts with ">")
4310                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4311                                            .find(between_content)
4312                                            .map_or(String::new(), |m| m.as_str().to_string());
4313                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4314                                        let blockquote_level_changed =
4315                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4316
4317                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4318                                        if trimmed.starts_with("```")
4319                                            || trimmed.starts_with("~~~")
4320                                            || trimmed.starts_with("---")
4321                                            || trimmed.starts_with("***")
4322                                            || trimmed.starts_with("___")
4323                                            || blockquote_level_changed
4324                                            || crate::utils::skip_context::is_table_line(trimmed)
4325                                            || between_line.heading.is_some()
4326                                        {
4327                                            return true; // These are structural separators - meaningful content that breaks lists
4328                                        }
4329
4330                                        // Only properly indented content continues the list
4331                                        line_indent >= min_continuation_indent
4332                                    } else {
4333                                        false
4334                                    }
4335                                });
4336
4337                                if block.is_ordered {
4338                                    // For ordered lists: don't continue if there are structural separators
4339                                    // Check if there are structural separators between the list items
4340                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4341                                        if let Some(between_line) = lines.get(idx) {
4342                                            let between_content = between_line.content(content);
4343                                            let trimmed = between_content.trim();
4344                                            if trimmed.is_empty() {
4345                                                return false;
4346                                            }
4347                                            // Check if blockquote level changed (not just if line starts with ">")
4348                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4349                                                .find(between_content)
4350                                                .map_or(String::new(), |m| m.as_str().to_string());
4351                                            let between_bq_level =
4352                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4353                                            let blockquote_level_changed =
4354                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4355                                            // Check for structural separators that break lists
4356                                            trimmed.starts_with("```")
4357                                                || trimmed.starts_with("~~~")
4358                                                || trimmed.starts_with("---")
4359                                                || trimmed.starts_with("***")
4360                                                || trimmed.starts_with("___")
4361                                                || blockquote_level_changed
4362                                                || crate::utils::skip_context::is_table_line(trimmed)
4363                                                || between_line.heading.is_some()
4364                                        } else {
4365                                            false
4366                                        }
4367                                    });
4368                                    found_continuation = !has_structural_separators;
4369                                } else {
4370                                    // For unordered lists: also check for structural separators
4371                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4372                                        if let Some(between_line) = lines.get(idx) {
4373                                            let between_content = between_line.content(content);
4374                                            let trimmed = between_content.trim();
4375                                            if trimmed.is_empty() {
4376                                                return false;
4377                                            }
4378                                            // Check if blockquote level changed (not just if line starts with ">")
4379                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4380                                                .find(between_content)
4381                                                .map_or(String::new(), |m| m.as_str().to_string());
4382                                            let between_bq_level =
4383                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4384                                            let blockquote_level_changed =
4385                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4386                                            // Check for structural separators that break lists
4387                                            trimmed.starts_with("```")
4388                                                || trimmed.starts_with("~~~")
4389                                                || trimmed.starts_with("---")
4390                                                || trimmed.starts_with("***")
4391                                                || trimmed.starts_with("___")
4392                                                || blockquote_level_changed
4393                                                || crate::utils::skip_context::is_table_line(trimmed)
4394                                                || between_line.heading.is_some()
4395                                        } else {
4396                                            false
4397                                        }
4398                                    });
4399                                    found_continuation = !has_structural_separators;
4400                                }
4401                            }
4402                        }
4403                    }
4404
4405                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4406                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4407                    }
4408                    if found_continuation {
4409                        // Include the blank line in the block
4410                        block.end_line = line_num;
4411                    } else {
4412                        // Blank line ends the list - don't include it
4413                        list_blocks.push(block.clone());
4414                        current_block = None;
4415                    }
4416                } else {
4417                    // Check for lazy continuation - non-indented line immediately after a list item
4418                    // But only if the line has sufficient indentation for the list type
4419                    let min_required_indent = if block.is_ordered {
4420                        current_indent_level + last_marker_width
4421                    } else {
4422                        current_indent_level + 2
4423                    };
4424
4425                    // For lazy continuation to apply, the line must either:
4426                    // 1. Have no indentation (true lazy continuation)
4427                    // 2. Have sufficient indentation for the list type
4428                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4429                    let line_content = line_info.content(content).trim();
4430
4431                    // Check for table-like patterns
4432                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4433
4434                    // Check if blockquote level changed (not just if line starts with ">")
4435                    // Lines within the same blockquote level are NOT structural separators
4436                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4437                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4438                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4439
4440                    let is_structural_separator = line_info.heading.is_some()
4441                        || line_content.starts_with("```")
4442                        || line_content.starts_with("~~~")
4443                        || line_content.starts_with("---")
4444                        || line_content.starts_with("***")
4445                        || line_content.starts_with("___")
4446                        || blockquote_level_changed
4447                        || looks_like_table;
4448
4449                    // Allow lazy continuation if we're still within the same list block
4450                    // (not just immediately after a list item)
4451                    // Also treat code span continuations as valid continuations regardless of indent
4452                    let is_lazy_continuation = !is_structural_separator
4453                        && !line_info.is_blank
4454                        && (line_info.indent == 0
4455                            || line_info.indent >= min_required_indent
4456                            || line_info.in_code_span_continuation);
4457
4458                    if is_lazy_continuation {
4459                        // Per CommonMark, lazy continuation continues until a blank line
4460                        // or structural element, regardless of uppercase at line start
4461                        block.end_line = line_num;
4462                    } else {
4463                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4464                        list_blocks.push(block.clone());
4465                        current_block = None;
4466                    }
4467                }
4468            }
4469        }
4470
4471        // Don't forget the last block
4472        if let Some(block) = current_block {
4473            list_blocks.push(block);
4474        }
4475
4476        // Merge adjacent blocks that should be one
4477        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4478
4479        list_blocks
4480    }
4481
4482    /// Compute character frequency for fast content analysis
4483    fn compute_char_frequency(content: &str) -> CharFrequency {
4484        let mut frequency = CharFrequency::default();
4485
4486        for ch in content.chars() {
4487            match ch {
4488                '#' => frequency.hash_count += 1,
4489                '*' => frequency.asterisk_count += 1,
4490                '_' => frequency.underscore_count += 1,
4491                '-' => frequency.hyphen_count += 1,
4492                '+' => frequency.plus_count += 1,
4493                '>' => frequency.gt_count += 1,
4494                '|' => frequency.pipe_count += 1,
4495                '[' => frequency.bracket_count += 1,
4496                '`' => frequency.backtick_count += 1,
4497                '<' => frequency.lt_count += 1,
4498                '!' => frequency.exclamation_count += 1,
4499                '\n' => frequency.newline_count += 1,
4500                _ => {}
4501            }
4502        }
4503
4504        frequency
4505    }
4506
4507    /// Parse HTML tags in the content
4508    fn parse_html_tags(
4509        content: &str,
4510        lines: &[LineInfo],
4511        code_blocks: &[(usize, usize)],
4512        flavor: MarkdownFlavor,
4513    ) -> Vec<HtmlTag> {
4514        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4515            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4516
4517        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4518
4519        for cap in HTML_TAG_REGEX.captures_iter(content) {
4520            let full_match = cap.get(0).unwrap();
4521            let match_start = full_match.start();
4522            let match_end = full_match.end();
4523
4524            // Skip if in code block
4525            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4526                continue;
4527            }
4528
4529            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4530            let tag_name_original = cap.get(2).unwrap().as_str();
4531            let tag_name = tag_name_original.to_lowercase();
4532            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4533
4534            // Skip JSX components in MDX files (tags starting with uppercase letter)
4535            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4536            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4537                continue;
4538            }
4539
4540            // Find which line this tag is on
4541            let mut line_num = 1;
4542            let mut col_start = match_start;
4543            let mut col_end = match_end;
4544            for (idx, line_info) in lines.iter().enumerate() {
4545                if match_start >= line_info.byte_offset {
4546                    line_num = idx + 1;
4547                    col_start = match_start - line_info.byte_offset;
4548                    col_end = match_end - line_info.byte_offset;
4549                } else {
4550                    break;
4551                }
4552            }
4553
4554            html_tags.push(HtmlTag {
4555                line: line_num,
4556                start_col: col_start,
4557                end_col: col_end,
4558                byte_offset: match_start,
4559                byte_end: match_end,
4560                tag_name,
4561                is_closing,
4562                is_self_closing,
4563                raw_content: full_match.as_str().to_string(),
4564            });
4565        }
4566
4567        html_tags
4568    }
4569
4570    /// Parse table rows in the content
4571    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4572        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4573
4574        for (line_idx, line_info) in lines.iter().enumerate() {
4575            // Skip lines in code blocks or blank lines
4576            if line_info.in_code_block || line_info.is_blank {
4577                continue;
4578            }
4579
4580            let line = line_info.content(content);
4581            let line_num = line_idx + 1;
4582
4583            // Check if this line contains pipes (potential table row)
4584            if !line.contains('|') {
4585                continue;
4586            }
4587
4588            // Count columns by splitting on pipes
4589            let parts: Vec<&str> = line.split('|').collect();
4590            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4591
4592            // Check if this is a separator row
4593            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4594            let mut column_alignments = Vec::new();
4595
4596            if is_separator {
4597                for part in &parts[1..parts.len() - 1] {
4598                    // Skip first and last empty parts
4599                    let trimmed = part.trim();
4600                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4601                        "center".to_string()
4602                    } else if trimmed.ends_with(':') {
4603                        "right".to_string()
4604                    } else if trimmed.starts_with(':') {
4605                        "left".to_string()
4606                    } else {
4607                        "none".to_string()
4608                    };
4609                    column_alignments.push(alignment);
4610                }
4611            }
4612
4613            table_rows.push(TableRow {
4614                line: line_num,
4615                is_separator,
4616                column_count,
4617                column_alignments,
4618            });
4619        }
4620
4621        table_rows
4622    }
4623
4624    /// Parse bare URLs and emails in the content
4625    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4626        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4627
4628        // Check for bare URLs (not in angle brackets or markdown links)
4629        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4630            let full_match = cap.get(0).unwrap();
4631            let match_start = full_match.start();
4632            let match_end = full_match.end();
4633
4634            // Skip if in code block
4635            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4636                continue;
4637            }
4638
4639            // Skip if already in angle brackets or markdown links
4640            let preceding_char = if match_start > 0 {
4641                content.chars().nth(match_start - 1)
4642            } else {
4643                None
4644            };
4645            let following_char = content.chars().nth(match_end);
4646
4647            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4648                continue;
4649            }
4650            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4651                continue;
4652            }
4653
4654            let url = full_match.as_str();
4655            let url_type = if url.starts_with("https://") {
4656                "https"
4657            } else if url.starts_with("http://") {
4658                "http"
4659            } else if url.starts_with("ftp://") {
4660                "ftp"
4661            } else {
4662                "other"
4663            };
4664
4665            // Find which line this URL is on
4666            let mut line_num = 1;
4667            let mut col_start = match_start;
4668            let mut col_end = match_end;
4669            for (idx, line_info) in lines.iter().enumerate() {
4670                if match_start >= line_info.byte_offset {
4671                    line_num = idx + 1;
4672                    col_start = match_start - line_info.byte_offset;
4673                    col_end = match_end - line_info.byte_offset;
4674                } else {
4675                    break;
4676                }
4677            }
4678
4679            bare_urls.push(BareUrl {
4680                line: line_num,
4681                start_col: col_start,
4682                end_col: col_end,
4683                byte_offset: match_start,
4684                byte_end: match_end,
4685                url: url.to_string(),
4686                url_type: url_type.to_string(),
4687            });
4688        }
4689
4690        // Check for bare email addresses
4691        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4692            let full_match = cap.get(0).unwrap();
4693            let match_start = full_match.start();
4694            let match_end = full_match.end();
4695
4696            // Skip if in code block
4697            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4698                continue;
4699            }
4700
4701            // Skip if already in angle brackets or markdown links
4702            let preceding_char = if match_start > 0 {
4703                content.chars().nth(match_start - 1)
4704            } else {
4705                None
4706            };
4707            let following_char = content.chars().nth(match_end);
4708
4709            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4710                continue;
4711            }
4712            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4713                continue;
4714            }
4715
4716            let email = full_match.as_str();
4717
4718            // Find which line this email is on
4719            let mut line_num = 1;
4720            let mut col_start = match_start;
4721            let mut col_end = match_end;
4722            for (idx, line_info) in lines.iter().enumerate() {
4723                if match_start >= line_info.byte_offset {
4724                    line_num = idx + 1;
4725                    col_start = match_start - line_info.byte_offset;
4726                    col_end = match_end - line_info.byte_offset;
4727                } else {
4728                    break;
4729                }
4730            }
4731
4732            bare_urls.push(BareUrl {
4733                line: line_num,
4734                start_col: col_start,
4735                end_col: col_end,
4736                byte_offset: match_start,
4737                byte_end: match_end,
4738                url: email.to_string(),
4739                url_type: "email".to_string(),
4740            });
4741        }
4742
4743        bare_urls
4744    }
4745
4746    /// Get an iterator over valid CommonMark headings
4747    ///
4748    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4749    /// that should be flagged by MD018 but should not be processed by other heading rules.
4750    ///
4751    /// # Examples
4752    ///
4753    /// ```rust
4754    /// use rumdl_lib::lint_context::LintContext;
4755    /// use rumdl_lib::config::MarkdownFlavor;
4756    ///
4757    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4758    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4759    ///
4760    /// for heading in ctx.valid_headings() {
4761    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4762    /// }
4763    /// // Only prints valid headings, skips `#NoSpace`
4764    /// ```
4765    #[must_use]
4766    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4767        ValidHeadingsIter::new(&self.lines)
4768    }
4769
4770    /// Check if the document contains any valid CommonMark headings
4771    ///
4772    /// Returns `true` if there is at least one heading with proper space after `#`.
4773    #[must_use]
4774    pub fn has_valid_headings(&self) -> bool {
4775        self.lines
4776            .iter()
4777            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4778    }
4779}
4780
4781/// Merge adjacent list blocks that should be treated as one
4782fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4783    if list_blocks.len() < 2 {
4784        return;
4785    }
4786
4787    let mut merger = ListBlockMerger::new(content, lines);
4788    *list_blocks = merger.merge(list_blocks);
4789}
4790
4791/// Helper struct to manage the complex logic of merging list blocks
4792struct ListBlockMerger<'a> {
4793    content: &'a str,
4794    lines: &'a [LineInfo],
4795}
4796
4797impl<'a> ListBlockMerger<'a> {
4798    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4799        Self { content, lines }
4800    }
4801
4802    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4803        let mut merged = Vec::with_capacity(list_blocks.len());
4804        let mut current = list_blocks[0].clone();
4805
4806        for next in list_blocks.iter().skip(1) {
4807            if self.should_merge_blocks(&current, next) {
4808                current = self.merge_two_blocks(current, next);
4809            } else {
4810                merged.push(current);
4811                current = next.clone();
4812            }
4813        }
4814
4815        merged.push(current);
4816        merged
4817    }
4818
4819    /// Determine if two adjacent list blocks should be merged
4820    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4821        // Basic compatibility checks
4822        if !self.blocks_are_compatible(current, next) {
4823            return false;
4824        }
4825
4826        // Check spacing and content between blocks
4827        let spacing = self.analyze_spacing_between(current, next);
4828        match spacing {
4829            BlockSpacing::Consecutive => true,
4830            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4831            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4832                self.can_merge_with_content_between(current, next)
4833            }
4834        }
4835    }
4836
4837    /// Check if blocks have compatible structure for merging
4838    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4839        current.is_ordered == next.is_ordered
4840            && current.blockquote_prefix == next.blockquote_prefix
4841            && current.nesting_level == next.nesting_level
4842    }
4843
4844    /// Analyze the spacing between two list blocks
4845    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4846        let gap = next.start_line - current.end_line;
4847
4848        match gap {
4849            1 => BlockSpacing::Consecutive,
4850            2 => BlockSpacing::SingleBlank,
4851            _ if gap > 2 => {
4852                if self.has_only_blank_lines_between(current, next) {
4853                    BlockSpacing::MultipleBlanks
4854                } else {
4855                    BlockSpacing::ContentBetween
4856                }
4857            }
4858            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4859        }
4860    }
4861
4862    /// Check if unordered lists can be merged with a single blank line between
4863    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4864        // Check if there are structural separators between the blocks
4865        // If has_meaningful_content_between returns true, it means there are structural separators
4866        if has_meaningful_content_between(self.content, current, next, self.lines) {
4867            return false; // Structural separators prevent merging
4868        }
4869
4870        // Only merge unordered lists with same marker across single blank
4871        !current.is_ordered && current.marker == next.marker
4872    }
4873
4874    /// Check if ordered lists can be merged when there's content between them
4875    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4876        // Do not merge lists if there are structural separators between them
4877        if has_meaningful_content_between(self.content, current, next, self.lines) {
4878            return false; // Structural separators prevent merging
4879        }
4880
4881        // Only consider merging ordered lists if there's no structural content between
4882        current.is_ordered && next.is_ordered
4883    }
4884
4885    /// Check if there are only blank lines between blocks
4886    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4887        for line_num in (current.end_line + 1)..next.start_line {
4888            if let Some(line_info) = self.lines.get(line_num - 1)
4889                && !line_info.content(self.content).trim().is_empty()
4890            {
4891                return false;
4892            }
4893        }
4894        true
4895    }
4896
4897    /// Merge two compatible list blocks into one
4898    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4899        current.end_line = next.end_line;
4900        current.item_lines.extend_from_slice(&next.item_lines);
4901
4902        // Update max marker width
4903        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4904
4905        // Handle marker consistency for unordered lists
4906        if !current.is_ordered && self.markers_differ(&current, next) {
4907            current.marker = None; // Mixed markers
4908        }
4909
4910        current
4911    }
4912
4913    /// Check if two blocks have different markers
4914    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4915        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4916    }
4917}
4918
4919/// Types of spacing between list blocks
4920#[derive(Debug, PartialEq)]
4921enum BlockSpacing {
4922    Consecutive,    // No gap between blocks
4923    SingleBlank,    // One blank line between blocks
4924    MultipleBlanks, // Multiple blank lines but no content
4925    ContentBetween, // Content exists between blocks
4926}
4927
4928/// Check if there's meaningful content (not just blank lines) between two list blocks
4929fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4930    // Check lines between current.end_line and next.start_line
4931    for line_num in (current.end_line + 1)..next.start_line {
4932        if let Some(line_info) = lines.get(line_num - 1) {
4933            // Convert to 0-indexed
4934            let trimmed = line_info.content(content).trim();
4935
4936            // Skip empty lines
4937            if trimmed.is_empty() {
4938                continue;
4939            }
4940
4941            // Check for structural separators that should separate lists (CommonMark compliant)
4942
4943            // Headings separate lists
4944            if line_info.heading.is_some() {
4945                return true; // Has meaningful content - headings separate lists
4946            }
4947
4948            // Horizontal rules separate lists (---, ***, ___)
4949            if is_horizontal_rule(trimmed) {
4950                return true; // Has meaningful content - horizontal rules separate lists
4951            }
4952
4953            // Tables separate lists
4954            if crate::utils::skip_context::is_table_line(trimmed) {
4955                return true; // Has meaningful content - tables separate lists
4956            }
4957
4958            // Blockquotes separate lists
4959            if trimmed.starts_with('>') {
4960                return true; // Has meaningful content - blockquotes separate lists
4961            }
4962
4963            // Code block fences separate lists (unless properly indented as list content)
4964            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4965                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4966
4967                // Check if this code block is properly indented as list continuation
4968                let min_continuation_indent = if current.is_ordered {
4969                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4970                } else {
4971                    current.nesting_level + 2
4972                };
4973
4974                if line_indent < min_continuation_indent {
4975                    // This is a standalone code block that separates lists
4976                    return true; // Has meaningful content - standalone code blocks separate lists
4977                }
4978            }
4979
4980            // Check if this line has proper indentation for list continuation
4981            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4982
4983            // Calculate minimum indentation needed to be list continuation
4984            let min_indent = if current.is_ordered {
4985                current.nesting_level + current.max_marker_width
4986            } else {
4987                current.nesting_level + 2
4988            };
4989
4990            // If the line is not indented enough to be list continuation, it's meaningful content
4991            if line_indent < min_indent {
4992                return true; // Has meaningful content - content not indented as list continuation
4993            }
4994
4995            // If we reach here, the line is properly indented as list continuation
4996            // Continue checking other lines
4997        }
4998    }
4999
5000    // Only blank lines or properly indented list continuation content between blocks
5001    false
5002}
5003
5004/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
5005/// CommonMark rules for thematic breaks (horizontal rules):
5006/// - May have 0-3 spaces of leading indentation (but NOT tabs)
5007/// - Must have 3+ of the same character (-, *, or _)
5008/// - May have spaces between characters
5009/// - No other characters allowed
5010pub fn is_horizontal_rule_line(line: &str) -> bool {
5011    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
5012    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
5013    if leading_spaces > 3 || line.starts_with('\t') {
5014        return false;
5015    }
5016
5017    is_horizontal_rule_content(line.trim())
5018}
5019
5020/// Check if trimmed content matches horizontal rule pattern.
5021/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
5022pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
5023    if trimmed.len() < 3 {
5024        return false;
5025    }
5026
5027    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
5028    let chars: Vec<char> = trimmed.chars().collect();
5029    if let Some(&first_char) = chars.first()
5030        && (first_char == '-' || first_char == '*' || first_char == '_')
5031    {
5032        let mut count = 0;
5033        for &ch in &chars {
5034            if ch == first_char {
5035                count += 1;
5036            } else if ch != ' ' && ch != '\t' {
5037                return false; // Non-matching, non-whitespace character
5038            }
5039        }
5040        return count >= 3;
5041    }
5042    false
5043}
5044
5045/// Backwards-compatible alias for `is_horizontal_rule_content`
5046pub fn is_horizontal_rule(trimmed: &str) -> bool {
5047    is_horizontal_rule_content(trimmed)
5048}
5049
5050/// Check if content contains patterns that cause the markdown crate to panic
5051#[cfg(test)]
5052mod tests {
5053    use super::*;
5054
5055    #[test]
5056    fn test_empty_content() {
5057        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5058        assert_eq!(ctx.content, "");
5059        assert_eq!(ctx.line_offsets, vec![0]);
5060        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5061        assert_eq!(ctx.lines.len(), 0);
5062    }
5063
5064    #[test]
5065    fn test_single_line() {
5066        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
5067        assert_eq!(ctx.content, "# Hello");
5068        assert_eq!(ctx.line_offsets, vec![0]);
5069        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5070        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
5071    }
5072
5073    #[test]
5074    fn test_multi_line() {
5075        let content = "# Title\n\nSecond line\nThird line";
5076        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5077        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
5078        // Test offset to line/col
5079        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
5080        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
5081        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
5082        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
5083        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
5084    }
5085
5086    #[test]
5087    fn test_line_info() {
5088        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
5089        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5090
5091        // Test line info
5092        assert_eq!(ctx.lines.len(), 7);
5093
5094        // Line 1: "# Title"
5095        let line1 = &ctx.lines[0];
5096        assert_eq!(line1.content(ctx.content), "# Title");
5097        assert_eq!(line1.byte_offset, 0);
5098        assert_eq!(line1.indent, 0);
5099        assert!(!line1.is_blank);
5100        assert!(!line1.in_code_block);
5101        assert!(line1.list_item.is_none());
5102
5103        // Line 2: "    indented"
5104        let line2 = &ctx.lines[1];
5105        assert_eq!(line2.content(ctx.content), "    indented");
5106        assert_eq!(line2.byte_offset, 8);
5107        assert_eq!(line2.indent, 4);
5108        assert!(!line2.is_blank);
5109
5110        // Line 3: "" (blank)
5111        let line3 = &ctx.lines[2];
5112        assert_eq!(line3.content(ctx.content), "");
5113        assert!(line3.is_blank);
5114
5115        // Test helper methods
5116        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5117        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5118        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5119        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5120    }
5121
5122    #[test]
5123    fn test_list_item_detection() {
5124        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
5125        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5126
5127        // Line 1: "- Unordered item"
5128        let line1 = &ctx.lines[0];
5129        assert!(line1.list_item.is_some());
5130        let list1 = line1.list_item.as_ref().unwrap();
5131        assert_eq!(list1.marker, "-");
5132        assert!(!list1.is_ordered);
5133        assert_eq!(list1.marker_column, 0);
5134        assert_eq!(list1.content_column, 2);
5135
5136        // Line 2: "  * Nested item"
5137        let line2 = &ctx.lines[1];
5138        assert!(line2.list_item.is_some());
5139        let list2 = line2.list_item.as_ref().unwrap();
5140        assert_eq!(list2.marker, "*");
5141        assert_eq!(list2.marker_column, 2);
5142
5143        // Line 3: "1. Ordered item"
5144        let line3 = &ctx.lines[2];
5145        assert!(line3.list_item.is_some());
5146        let list3 = line3.list_item.as_ref().unwrap();
5147        assert_eq!(list3.marker, "1.");
5148        assert!(list3.is_ordered);
5149        assert_eq!(list3.number, Some(1));
5150
5151        // Line 6: "Not a list"
5152        let line6 = &ctx.lines[5];
5153        assert!(line6.list_item.is_none());
5154    }
5155
5156    #[test]
5157    fn test_offset_to_line_col_edge_cases() {
5158        let content = "a\nb\nc";
5159        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5160        // line_offsets: [0, 2, 4]
5161        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
5162        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
5163        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
5164        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
5165        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
5166        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
5167    }
5168
5169    #[test]
5170    fn test_mdx_esm_blocks() {
5171        let content = r##"import {Chart} from './snowfall.js'
5172export const year = 2023
5173
5174# Last year's snowfall
5175
5176In {year}, the snowfall was above average.
5177It was followed by a warm spring which caused
5178flood conditions in many of the nearby rivers.
5179
5180<Chart color="#fcb32c" year={year} />
5181"##;
5182
5183        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5184
5185        // Check that lines 1 and 2 are marked as ESM blocks
5186        assert_eq!(ctx.lines.len(), 10);
5187        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5188        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5189        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5190        assert!(
5191            !ctx.lines[3].in_esm_block,
5192            "Line 4 (heading) should NOT be in_esm_block"
5193        );
5194        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5195        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5196    }
5197
5198    #[test]
5199    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5200        let content = r#"import {Chart} from './snowfall.js'
5201export const year = 2023
5202
5203# Last year's snowfall
5204"#;
5205
5206        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5207
5208        // ESM blocks should NOT be detected in Standard flavor
5209        assert!(
5210            !ctx.lines[0].in_esm_block,
5211            "Line 1 should NOT be in_esm_block in Standard flavor"
5212        );
5213        assert!(
5214            !ctx.lines[1].in_esm_block,
5215            "Line 2 should NOT be in_esm_block in Standard flavor"
5216        );
5217    }
5218
5219    #[test]
5220    fn test_blockquote_with_indented_content() {
5221        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
5222        // The content inside the blockquote may also be detected as a code block (which is correct),
5223        // but for MD046 purposes, we need to know the line is inside a blockquote.
5224        let content = r#"# Heading
5225
5226>      -S socket-path
5227>                    More text
5228"#;
5229        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5230
5231        // Line 3 (index 2) should be detected as blockquote
5232        assert!(
5233            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5234            "Line 3 should be a blockquote"
5235        );
5236        // Line 4 (index 3) should also be blockquote
5237        assert!(
5238            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5239            "Line 4 should be a blockquote"
5240        );
5241
5242        // Verify blockquote content is correctly parsed
5243        // Note: spaces_after includes the spaces between `>` and content
5244        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5245        assert_eq!(bq3.content, "-S socket-path");
5246        assert_eq!(bq3.nesting_level, 1);
5247        // 6 spaces after the `>` marker
5248        assert!(bq3.has_multiple_spaces_after_marker);
5249
5250        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5251        assert_eq!(bq4.content, "More text");
5252        assert_eq!(bq4.nesting_level, 1);
5253    }
5254
5255    #[test]
5256    fn test_footnote_definitions_not_parsed_as_reference_defs() {
5257        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
5258        let content = r#"# Title
5259
5260A footnote[^1].
5261
5262[^1]: This is the footnote content.
5263
5264[^note]: Another footnote with [link](https://example.com).
5265
5266[regular]: ./path.md "A real reference definition"
5267"#;
5268        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5269
5270        // Should only have one reference definition (the regular one)
5271        assert_eq!(
5272            ctx.reference_defs.len(),
5273            1,
5274            "Footnotes should not be parsed as reference definitions"
5275        );
5276
5277        // The only reference def should be the regular one
5278        assert_eq!(ctx.reference_defs[0].id, "regular");
5279        assert_eq!(ctx.reference_defs[0].url, "./path.md");
5280        assert_eq!(
5281            ctx.reference_defs[0].title,
5282            Some("A real reference definition".to_string())
5283        );
5284    }
5285
5286    #[test]
5287    fn test_footnote_with_inline_link_not_misidentified() {
5288        // Regression test for issue #286: footnote containing an inline link
5289        // was incorrectly parsed as a reference definition with URL "[link](url)"
5290        let content = r#"# Title
5291
5292A footnote[^1].
5293
5294[^1]: [link](https://www.google.com).
5295"#;
5296        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5297
5298        // Should have no reference definitions
5299        assert!(
5300            ctx.reference_defs.is_empty(),
5301            "Footnote with inline link should not create a reference definition"
5302        );
5303    }
5304
5305    #[test]
5306    fn test_various_footnote_formats_excluded() {
5307        // Test various footnote ID formats are all excluded
5308        let content = r#"[^1]: Numeric footnote
5309[^note]: Named footnote
5310[^a]: Single char footnote
5311[^long-footnote-name]: Long named footnote
5312[^123abc]: Mixed alphanumeric
5313
5314[ref1]: ./file1.md
5315[ref2]: ./file2.md
5316"#;
5317        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5318
5319        // Should only have the two regular reference definitions
5320        assert_eq!(
5321            ctx.reference_defs.len(),
5322            2,
5323            "Only regular reference definitions should be parsed"
5324        );
5325
5326        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5327        assert!(ids.contains(&"ref1"));
5328        assert!(ids.contains(&"ref2"));
5329        assert!(!ids.iter().any(|id| id.starts_with('^')));
5330    }
5331
5332    // =========================================================================
5333    // Tests for has_char and char_count methods
5334    // =========================================================================
5335
5336    #[test]
5337    fn test_has_char_tracked_characters() {
5338        // Test all 12 tracked characters
5339        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5340        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5341
5342        // All tracked characters should be detected
5343        assert!(ctx.has_char('#'), "Should detect hash");
5344        assert!(ctx.has_char('*'), "Should detect asterisk");
5345        assert!(ctx.has_char('_'), "Should detect underscore");
5346        assert!(ctx.has_char('-'), "Should detect hyphen");
5347        assert!(ctx.has_char('+'), "Should detect plus");
5348        assert!(ctx.has_char('>'), "Should detect gt");
5349        assert!(ctx.has_char('|'), "Should detect pipe");
5350        assert!(ctx.has_char('['), "Should detect bracket");
5351        assert!(ctx.has_char('`'), "Should detect backtick");
5352        assert!(ctx.has_char('<'), "Should detect lt");
5353        assert!(ctx.has_char('!'), "Should detect exclamation");
5354        assert!(ctx.has_char('\n'), "Should detect newline");
5355    }
5356
5357    #[test]
5358    fn test_has_char_absent_characters() {
5359        let content = "Simple text without special chars";
5360        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5361
5362        // None of the tracked characters should be present
5363        assert!(!ctx.has_char('#'), "Should not detect hash");
5364        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5365        assert!(!ctx.has_char('_'), "Should not detect underscore");
5366        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5367        assert!(!ctx.has_char('+'), "Should not detect plus");
5368        assert!(!ctx.has_char('>'), "Should not detect gt");
5369        assert!(!ctx.has_char('|'), "Should not detect pipe");
5370        assert!(!ctx.has_char('['), "Should not detect bracket");
5371        assert!(!ctx.has_char('`'), "Should not detect backtick");
5372        assert!(!ctx.has_char('<'), "Should not detect lt");
5373        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5374        // Note: single line content has no newlines
5375        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5376    }
5377
5378    #[test]
5379    fn test_has_char_fallback_for_untracked() {
5380        let content = "Text with @mention and $dollar and %percent";
5381        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5382
5383        // Untracked characters should fall back to content.contains()
5384        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5385        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5386        assert!(ctx.has_char('%'), "Should detect % via fallback");
5387        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5388    }
5389
5390    #[test]
5391    fn test_char_count_tracked_characters() {
5392        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5393        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5394
5395        // Count each tracked character
5396        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5397        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5398        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5399        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5400        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5401        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5402        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5403        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5404        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5405        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5406        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5407        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5408    }
5409
5410    #[test]
5411    fn test_char_count_zero_for_absent() {
5412        let content = "Plain text";
5413        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5414
5415        assert_eq!(ctx.char_count('#'), 0);
5416        assert_eq!(ctx.char_count('*'), 0);
5417        assert_eq!(ctx.char_count('_'), 0);
5418        assert_eq!(ctx.char_count('\n'), 0);
5419    }
5420
5421    #[test]
5422    fn test_char_count_fallback_for_untracked() {
5423        let content = "@@@ $$ %%%";
5424        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5425
5426        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5427        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5428        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5429        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5430    }
5431
5432    #[test]
5433    fn test_char_count_empty_content() {
5434        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5435
5436        assert_eq!(ctx.char_count('#'), 0);
5437        assert_eq!(ctx.char_count('*'), 0);
5438        assert_eq!(ctx.char_count('@'), 0);
5439        assert!(!ctx.has_char('#'));
5440        assert!(!ctx.has_char('@'));
5441    }
5442
5443    // =========================================================================
5444    // Tests for is_in_html_tag method
5445    // =========================================================================
5446
5447    #[test]
5448    fn test_is_in_html_tag_simple() {
5449        let content = "<div>content</div>";
5450        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5451
5452        // Inside opening tag
5453        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5454        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5455        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5456
5457        // Outside tag (in content)
5458        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5459        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5460
5461        // Inside closing tag
5462        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5463        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5464    }
5465
5466    #[test]
5467    fn test_is_in_html_tag_self_closing() {
5468        let content = "Text <br/> more text";
5469        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5470
5471        // Before tag
5472        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5473        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5474
5475        // Inside self-closing tag
5476        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5477        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5478        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5479
5480        // After tag
5481        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5482    }
5483
5484    #[test]
5485    fn test_is_in_html_tag_with_attributes() {
5486        let content = r#"<a href="url" class="link">text</a>"#;
5487        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5488
5489        // All positions inside opening tag with attributes
5490        assert!(ctx.is_in_html_tag(0), "Start of tag");
5491        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5492        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5493        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5494
5495        // Content between tags
5496        assert!(!ctx.is_in_html_tag(27), "Start of content");
5497        assert!(!ctx.is_in_html_tag(30), "End of content");
5498
5499        // Closing tag
5500        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5501    }
5502
5503    #[test]
5504    fn test_is_in_html_tag_multiline() {
5505        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5506        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5507
5508        // Opening tag spans multiple lines
5509        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5510        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5511        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5512
5513        // After closing > of opening tag
5514        let closing_bracket_pos = content.find(">\n").unwrap();
5515        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5516    }
5517
5518    #[test]
5519    fn test_is_in_html_tag_no_tags() {
5520        let content = "Plain text without any HTML";
5521        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5522
5523        // No position should be in an HTML tag
5524        for i in 0..content.len() {
5525            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5526        }
5527    }
5528
5529    // =========================================================================
5530    // Tests for is_in_jinja_range method
5531    // =========================================================================
5532
5533    #[test]
5534    fn test_is_in_jinja_range_expression() {
5535        let content = "Hello {{ name }}!";
5536        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5537
5538        // Before Jinja
5539        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5540        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5541
5542        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5543        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5544        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5545        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5546        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5547        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5548
5549        // After Jinja
5550        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5551    }
5552
5553    #[test]
5554    fn test_is_in_jinja_range_statement() {
5555        let content = "{% if condition %}content{% endif %}";
5556        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5557
5558        // Inside opening statement
5559        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5560        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5561        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5562
5563        // Content between
5564        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5565
5566        // Inside closing statement
5567        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5568        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5569    }
5570
5571    #[test]
5572    fn test_is_in_jinja_range_multiple() {
5573        let content = "{{ a }} and {{ b }}";
5574        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5575
5576        // First Jinja expression
5577        assert!(ctx.is_in_jinja_range(0));
5578        assert!(ctx.is_in_jinja_range(3));
5579        assert!(ctx.is_in_jinja_range(6));
5580
5581        // Between expressions
5582        assert!(!ctx.is_in_jinja_range(8));
5583        assert!(!ctx.is_in_jinja_range(11));
5584
5585        // Second Jinja expression
5586        assert!(ctx.is_in_jinja_range(12));
5587        assert!(ctx.is_in_jinja_range(15));
5588        assert!(ctx.is_in_jinja_range(18));
5589    }
5590
5591    #[test]
5592    fn test_is_in_jinja_range_no_jinja() {
5593        let content = "Plain text with single braces but not Jinja";
5594        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5595
5596        // No position should be in Jinja
5597        for i in 0..content.len() {
5598            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5599        }
5600    }
5601
5602    // =========================================================================
5603    // Tests for is_in_link_title method
5604    // =========================================================================
5605
5606    #[test]
5607    fn test_is_in_link_title_with_title() {
5608        let content = r#"[ref]: https://example.com "Title text"
5609
5610Some content."#;
5611        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5612
5613        // Verify we have a reference def with title
5614        assert_eq!(ctx.reference_defs.len(), 1);
5615        let def = &ctx.reference_defs[0];
5616        assert!(def.title_byte_start.is_some());
5617        assert!(def.title_byte_end.is_some());
5618
5619        let title_start = def.title_byte_start.unwrap();
5620        let title_end = def.title_byte_end.unwrap();
5621
5622        // Before title (in URL)
5623        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5624
5625        // Inside title
5626        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5627        assert!(
5628            ctx.is_in_link_title(title_start + 5),
5629            "Middle of title should be in title"
5630        );
5631        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5632
5633        // After title
5634        assert!(
5635            !ctx.is_in_link_title(title_end),
5636            "After title end should not be in title"
5637        );
5638    }
5639
5640    #[test]
5641    fn test_is_in_link_title_without_title() {
5642        let content = "[ref]: https://example.com\n\nSome content.";
5643        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5644
5645        // Reference def without title
5646        assert_eq!(ctx.reference_defs.len(), 1);
5647        let def = &ctx.reference_defs[0];
5648        assert!(def.title_byte_start.is_none());
5649        assert!(def.title_byte_end.is_none());
5650
5651        // No position should be in a title
5652        for i in 0..content.len() {
5653            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5654        }
5655    }
5656
5657    #[test]
5658    fn test_is_in_link_title_multiple_refs() {
5659        let content = r#"[ref1]: /url1 "Title One"
5660[ref2]: /url2
5661[ref3]: /url3 "Title Three"
5662"#;
5663        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5664
5665        // Should have 3 reference defs
5666        assert_eq!(ctx.reference_defs.len(), 3);
5667
5668        // ref1 has title
5669        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5670        assert!(ref1.title_byte_start.is_some());
5671
5672        // ref2 has no title
5673        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5674        assert!(ref2.title_byte_start.is_none());
5675
5676        // ref3 has title
5677        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5678        assert!(ref3.title_byte_start.is_some());
5679
5680        // Check positions in ref1's title
5681        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5682            assert!(ctx.is_in_link_title(start + 1));
5683            assert!(!ctx.is_in_link_title(end + 5));
5684        }
5685
5686        // Check positions in ref3's title
5687        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5688            assert!(ctx.is_in_link_title(start + 1));
5689        }
5690    }
5691
5692    #[test]
5693    fn test_is_in_link_title_single_quotes() {
5694        let content = "[ref]: /url 'Single quoted title'\n";
5695        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5696
5697        assert_eq!(ctx.reference_defs.len(), 1);
5698        let def = &ctx.reference_defs[0];
5699
5700        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5701            assert!(ctx.is_in_link_title(start));
5702            assert!(ctx.is_in_link_title(start + 5));
5703            assert!(!ctx.is_in_link_title(end));
5704        }
5705    }
5706
5707    #[test]
5708    fn test_is_in_link_title_parentheses() {
5709        // Note: The reference def parser may not support parenthesized titles
5710        // This test verifies the is_in_link_title method works when titles exist
5711        let content = "[ref]: /url (Parenthesized title)\n";
5712        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5713
5714        // Parser behavior: may or may not parse parenthesized titles
5715        // We test that is_in_link_title correctly reflects whatever was parsed
5716        if ctx.reference_defs.is_empty() {
5717            // Parser didn't recognize this as a reference def
5718            for i in 0..content.len() {
5719                assert!(!ctx.is_in_link_title(i));
5720            }
5721        } else {
5722            let def = &ctx.reference_defs[0];
5723            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5724                assert!(ctx.is_in_link_title(start));
5725                assert!(ctx.is_in_link_title(start + 5));
5726                assert!(!ctx.is_in_link_title(end));
5727            } else {
5728                // Title wasn't parsed, so no position should be in title
5729                for i in 0..content.len() {
5730                    assert!(!ctx.is_in_link_title(i));
5731                }
5732            }
5733        }
5734    }
5735
5736    #[test]
5737    fn test_is_in_link_title_no_refs() {
5738        let content = "Just plain text without any reference definitions.";
5739        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5740
5741        assert!(ctx.reference_defs.is_empty());
5742
5743        for i in 0..content.len() {
5744            assert!(!ctx.is_in_link_title(i));
5745        }
5746    }
5747
5748    // =========================================================================
5749    // Math span tests (Issue #289)
5750    // =========================================================================
5751
5752    #[test]
5753    fn test_math_spans_inline() {
5754        let content = "Text with inline math $[f](x)$ in it.";
5755        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5756
5757        let math_spans = ctx.math_spans();
5758        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5759
5760        let span = &math_spans[0];
5761        assert!(!span.is_display, "Should be inline math, not display");
5762        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5763    }
5764
5765    #[test]
5766    fn test_math_spans_display_single_line() {
5767        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5768        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5769
5770        let math_spans = ctx.math_spans();
5771        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5772
5773        let span = &math_spans[0];
5774        assert!(span.is_display, "Should be display math");
5775        assert!(
5776            span.content.contains("[x](\\zeta)"),
5777            "Content should contain the link-like pattern"
5778        );
5779    }
5780
5781    #[test]
5782    fn test_math_spans_display_multiline() {
5783        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5784        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5785
5786        let math_spans = ctx.math_spans();
5787        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5788
5789        let span = &math_spans[0];
5790        assert!(span.is_display, "Should be display math");
5791    }
5792
5793    #[test]
5794    fn test_is_in_math_span() {
5795        let content = "Text $[f](x)$ more text";
5796        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5797
5798        // Position inside the math span
5799        let math_start = content.find('$').unwrap();
5800        let math_end = content.rfind('$').unwrap() + 1;
5801
5802        assert!(
5803            ctx.is_in_math_span(math_start + 1),
5804            "Position inside math span should return true"
5805        );
5806        assert!(
5807            ctx.is_in_math_span(math_start + 3),
5808            "Position inside math span should return true"
5809        );
5810
5811        // Position outside the math span
5812        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5813        assert!(
5814            !ctx.is_in_math_span(math_end + 1),
5815            "Position after math span should return false"
5816        );
5817    }
5818
5819    #[test]
5820    fn test_math_spans_mixed_with_code() {
5821        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5822        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5823
5824        let math_spans = ctx.math_spans();
5825        let code_spans = ctx.code_spans();
5826
5827        assert_eq!(math_spans.len(), 1, "Should have one math span");
5828        assert_eq!(code_spans.len(), 1, "Should have one code span");
5829
5830        // Verify math span content
5831        assert_eq!(math_spans[0].content, "[f](x)");
5832        // Verify code span content
5833        assert_eq!(code_spans[0].content, "[g](y)");
5834    }
5835
5836    #[test]
5837    fn test_math_spans_no_math() {
5838        let content = "Regular text without any math at all.";
5839        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5840
5841        let math_spans = ctx.math_spans();
5842        assert!(math_spans.is_empty(), "Should have no math spans");
5843    }
5844
5845    #[test]
5846    fn test_math_spans_multiple() {
5847        let content = "First $a$ and second $b$ and display $$c$$";
5848        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5849
5850        let math_spans = ctx.math_spans();
5851        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5852
5853        // Two inline, one display
5854        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5855        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5856
5857        assert_eq!(inline_count, 2, "Should have two inline math spans");
5858        assert_eq!(display_count, 1, "Should have one display math span");
5859    }
5860
5861    #[test]
5862    fn test_is_in_math_span_boundary_positions() {
5863        // Test exact boundary positions: $[f](x)$
5864        // Byte positions:                0123456789
5865        let content = "$[f](x)$";
5866        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5867
5868        let math_spans = ctx.math_spans();
5869        assert_eq!(math_spans.len(), 1, "Should have one math span");
5870
5871        let span = &math_spans[0];
5872
5873        // Position at opening $ should be in span (byte 0)
5874        assert!(
5875            ctx.is_in_math_span(span.byte_offset),
5876            "Start position should be in span"
5877        );
5878
5879        // Position just inside should be in span
5880        assert!(
5881            ctx.is_in_math_span(span.byte_offset + 1),
5882            "Position after start should be in span"
5883        );
5884
5885        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5886        assert!(
5887            ctx.is_in_math_span(span.byte_end - 1),
5888            "Position at end-1 should be in span"
5889        );
5890
5891        // Position at byte_end should NOT be in span (exclusive end)
5892        assert!(
5893            !ctx.is_in_math_span(span.byte_end),
5894            "Position at byte_end should NOT be in span (exclusive)"
5895        );
5896    }
5897
5898    #[test]
5899    fn test_math_spans_at_document_start() {
5900        let content = "$x$ text";
5901        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5902
5903        let math_spans = ctx.math_spans();
5904        assert_eq!(math_spans.len(), 1);
5905        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5906    }
5907
5908    #[test]
5909    fn test_math_spans_at_document_end() {
5910        let content = "text $x$";
5911        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5912
5913        let math_spans = ctx.math_spans();
5914        assert_eq!(math_spans.len(), 1);
5915        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5916    }
5917
5918    #[test]
5919    fn test_math_spans_consecutive() {
5920        let content = "$a$$b$";
5921        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5922
5923        let math_spans = ctx.math_spans();
5924        // pulldown-cmark should parse these as separate spans
5925        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5926
5927        // All positions should be in some math span
5928        for i in 0..content.len() {
5929            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5930        }
5931    }
5932
5933    #[test]
5934    fn test_math_spans_currency_not_math() {
5935        // Unbalanced $ should not create math spans
5936        let content = "Price is $100";
5937        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5938
5939        let math_spans = ctx.math_spans();
5940        // pulldown-cmark requires balanced delimiters for math
5941        // $100 alone is not math
5942        assert!(
5943            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5944            "Unbalanced $ should not create math span containing 100"
5945        );
5946    }
5947
5948    // =========================================================================
5949    // Tests for O(1) reference definition lookups via HashMap
5950    // =========================================================================
5951
5952    #[test]
5953    fn test_reference_lookup_o1_basic() {
5954        let content = r#"[ref1]: /url1
5955[REF2]: /url2 "Title"
5956[Ref3]: /url3
5957
5958Use [link][ref1] and [link][REF2]."#;
5959        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5960
5961        // Verify we have 3 reference defs
5962        assert_eq!(ctx.reference_defs.len(), 3);
5963
5964        // Test get_reference_url with various cases
5965        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5966        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5967        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5968        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5969        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5970        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5971        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5972    }
5973
5974    #[test]
5975    fn test_reference_lookup_o1_get_reference_def() {
5976        let content = r#"[myref]: https://example.com "My Title"
5977"#;
5978        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5979
5980        // Test get_reference_def
5981        let def = ctx.get_reference_def("myref").expect("Should find myref");
5982        assert_eq!(def.url, "https://example.com");
5983        assert_eq!(def.title.as_deref(), Some("My Title"));
5984
5985        // Case insensitive
5986        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5987        assert_eq!(def2.url, "https://example.com");
5988
5989        // Non-existent
5990        assert!(ctx.get_reference_def("nonexistent").is_none());
5991    }
5992
5993    #[test]
5994    fn test_reference_lookup_o1_has_reference_def() {
5995        let content = r#"[foo]: /foo
5996[BAR]: /bar
5997"#;
5998        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5999
6000        // Test has_reference_def
6001        assert!(ctx.has_reference_def("foo"));
6002        assert!(ctx.has_reference_def("FOO")); // case insensitive
6003        assert!(ctx.has_reference_def("bar"));
6004        assert!(ctx.has_reference_def("Bar")); // case insensitive
6005        assert!(!ctx.has_reference_def("baz")); // doesn't exist
6006    }
6007
6008    #[test]
6009    fn test_reference_lookup_o1_empty_content() {
6010        let content = "No references here.";
6011        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6012
6013        assert!(ctx.reference_defs.is_empty());
6014        assert_eq!(ctx.get_reference_url("anything"), None);
6015        assert!(ctx.get_reference_def("anything").is_none());
6016        assert!(!ctx.has_reference_def("anything"));
6017    }
6018
6019    #[test]
6020    fn test_reference_lookup_o1_special_characters_in_id() {
6021        let content = r#"[ref-with-dash]: /url1
6022[ref_with_underscore]: /url2
6023[ref.with.dots]: /url3
6024"#;
6025        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6026
6027        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
6028        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
6029        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
6030    }
6031
6032    #[test]
6033    fn test_reference_lookup_o1_unicode_id() {
6034        let content = r#"[日本語]: /japanese
6035[émoji]: /emoji
6036"#;
6037        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6038
6039        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
6040        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
6041        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
6042    }
6043}