Skip to main content

rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14/// Macro for profiling sections - only active in non-WASM builds
15#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17    ($name:expr, $profile:expr, $code:expr) => {{
18        let start = std::time::Instant::now();
19        let result = $code;
20        if $profile {
21            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22        }
23        result
24    }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32// Comprehensive link pattern that captures both inline and reference links
33// Use (?s) flag to make . match newlines
34static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(
36        r#"(?sx)
37        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38        (?:
39            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
40            |
41            \[([^\]]*)\]      # Reference ID in group 6
42        )"#
43    ).unwrap()
44});
45
46// Image pattern (similar to links but with ! prefix)
47// Use (?s) flag to make . match newlines
48static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r#"(?sx)
51        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52        (?:
53            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
54            |
55            \[([^\]]*)\]      # Reference ID in group 6
56        )"#
57    ).unwrap()
58});
59
60// Reference definition pattern
61static REF_DEF_PATTERN: LazyLock<Regex> =
62    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64// Pattern for bare URLs - uses centralized URL pattern from regex_cache
65
66// Pattern for email addresses
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70// Pattern for blockquote prefix in parse_list_blocks
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73/// Pre-computed information about a line
74#[derive(Debug, Clone)]
75pub struct LineInfo {
76    /// Byte offset where this line starts in the document
77    pub byte_offset: usize,
78    /// Length of the line in bytes (without newline)
79    pub byte_len: usize,
80    /// Number of bytes of leading whitespace (for substring extraction)
81    pub indent: usize,
82    /// Visual column width of leading whitespace (with proper tab expansion)
83    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
84    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
85    pub visual_indent: usize,
86    /// Whether the line is blank (empty or only whitespace)
87    pub is_blank: bool,
88    /// Whether this line is inside a code block
89    pub in_code_block: bool,
90    /// Whether this line is inside front matter
91    pub in_front_matter: bool,
92    /// Whether this line is inside an HTML block
93    pub in_html_block: bool,
94    /// Whether this line is inside an HTML comment
95    pub in_html_comment: bool,
96    /// List item information if this line starts a list item
97    pub list_item: Option<ListItemInfo>,
98    /// Heading information if this line is a heading
99    pub heading: Option<HeadingInfo>,
100    /// Blockquote information if this line is a blockquote
101    pub blockquote: Option<BlockquoteInfo>,
102    /// Whether this line is inside a mkdocstrings autodoc block
103    pub in_mkdocstrings: bool,
104    /// Whether this line is part of an ESM import/export block (MDX only)
105    pub in_esm_block: bool,
106    /// Whether this line is a continuation of a multi-line code span from a previous line
107    pub in_code_span_continuation: bool,
108    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
109    /// Pre-computed for consistent detection across all rules
110    pub is_horizontal_rule: bool,
111    /// Whether this line is inside a math block ($$ ... $$)
112    pub in_math_block: bool,
113    /// Whether this line is inside a Quarto div block (::: ... :::)
114    pub in_quarto_div: bool,
115    /// Whether this line contains or is inside a JSX expression (MDX only)
116    pub in_jsx_expression: bool,
117    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
118    pub in_mdx_comment: bool,
119    /// Whether this line is inside a JSX component (MDX only)
120    pub in_jsx_component: bool,
121    /// Whether this line is inside a JSX fragment (MDX only)
122    pub in_jsx_fragment: bool,
123    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
124    pub in_admonition: bool,
125    /// Whether this line is inside an MkDocs content tab block (===)
126    pub in_content_tab: bool,
127    /// Whether this line is a definition list item (: definition)
128    pub in_definition_list: bool,
129    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
130    pub in_obsidian_comment: bool,
131}
132
133impl LineInfo {
134    /// Get the line content as a string slice from the source document
135    pub fn content<'a>(&self, source: &'a str) -> &'a str {
136        &source[self.byte_offset..self.byte_offset + self.byte_len]
137    }
138
139    /// Check if this line is inside MkDocs-specific indented content (admonitions or tabs).
140    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
141    /// but in MkDocs flavor it's actually container content that should be preserved.
142    #[inline]
143    pub fn in_mkdocs_container(&self) -> bool {
144        self.in_admonition || self.in_content_tab
145    }
146}
147
148/// Information about a list item
149#[derive(Debug, Clone)]
150pub struct ListItemInfo {
151    /// The marker used (*, -, +, or number with . or ))
152    pub marker: String,
153    /// Whether it's ordered (true) or unordered (false)
154    pub is_ordered: bool,
155    /// The number for ordered lists
156    pub number: Option<usize>,
157    /// Column where the marker starts (0-based)
158    pub marker_column: usize,
159    /// Column where content after marker starts
160    pub content_column: usize,
161}
162
163/// Heading style type
164#[derive(Debug, Clone, PartialEq)]
165pub enum HeadingStyle {
166    /// ATX style heading (# Heading)
167    ATX,
168    /// Setext style heading with = underline
169    Setext1,
170    /// Setext style heading with - underline
171    Setext2,
172}
173
174/// Parsed link information
175#[derive(Debug, Clone)]
176pub struct ParsedLink<'a> {
177    /// Line number (1-indexed)
178    pub line: usize,
179    /// Start column (0-indexed) in the line
180    pub start_col: usize,
181    /// End column (0-indexed) in the line
182    pub end_col: usize,
183    /// Byte offset in document
184    pub byte_offset: usize,
185    /// End byte offset in document
186    pub byte_end: usize,
187    /// Link text
188    pub text: Cow<'a, str>,
189    /// Link URL or reference
190    pub url: Cow<'a, str>,
191    /// Whether this is a reference link [text][ref] vs inline [text](url)
192    pub is_reference: bool,
193    /// Reference ID for reference links
194    pub reference_id: Option<Cow<'a, str>>,
195    /// Link type from pulldown-cmark
196    pub link_type: LinkType,
197}
198
199/// Information about a broken link reported by pulldown-cmark
200#[derive(Debug, Clone)]
201pub struct BrokenLinkInfo {
202    /// The reference text that couldn't be resolved
203    pub reference: String,
204    /// Byte span in the source document
205    pub span: std::ops::Range<usize>,
206}
207
208/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
209#[derive(Debug, Clone)]
210pub struct FootnoteRef {
211    /// The footnote ID (without the ^ prefix)
212    pub id: String,
213    /// Line number (1-indexed)
214    pub line: usize,
215    /// Start byte offset in document
216    pub byte_offset: usize,
217    /// End byte offset in document
218    pub byte_end: usize,
219}
220
221/// Parsed image information
222#[derive(Debug, Clone)]
223pub struct ParsedImage<'a> {
224    /// Line number (1-indexed)
225    pub line: usize,
226    /// Start column (0-indexed) in the line
227    pub start_col: usize,
228    /// End column (0-indexed) in the line
229    pub end_col: usize,
230    /// Byte offset in document
231    pub byte_offset: usize,
232    /// End byte offset in document
233    pub byte_end: usize,
234    /// Alt text
235    pub alt_text: Cow<'a, str>,
236    /// Image URL or reference
237    pub url: Cow<'a, str>,
238    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
239    pub is_reference: bool,
240    /// Reference ID for reference images
241    pub reference_id: Option<Cow<'a, str>>,
242    /// Link type from pulldown-cmark
243    pub link_type: LinkType,
244}
245
246/// Reference definition [ref]: url "title"
247#[derive(Debug, Clone)]
248pub struct ReferenceDef {
249    /// Line number (1-indexed)
250    pub line: usize,
251    /// Reference ID (normalized to lowercase)
252    pub id: String,
253    /// URL
254    pub url: String,
255    /// Optional title
256    pub title: Option<String>,
257    /// Byte offset where the reference definition starts
258    pub byte_offset: usize,
259    /// Byte offset where the reference definition ends
260    pub byte_end: usize,
261    /// Byte offset where the title starts (if present, includes quote)
262    pub title_byte_start: Option<usize>,
263    /// Byte offset where the title ends (if present, includes quote)
264    pub title_byte_end: Option<usize>,
265}
266
267/// Parsed code span information
268#[derive(Debug, Clone)]
269pub struct CodeSpan {
270    /// Line number where the code span starts (1-indexed)
271    pub line: usize,
272    /// Line number where the code span ends (1-indexed)
273    pub end_line: usize,
274    /// Start column (0-indexed) in the line
275    pub start_col: usize,
276    /// End column (0-indexed) in the line
277    pub end_col: usize,
278    /// Byte offset in document
279    pub byte_offset: usize,
280    /// End byte offset in document
281    pub byte_end: usize,
282    /// Number of backticks used (1, 2, 3, etc.)
283    pub backtick_count: usize,
284    /// Content inside the code span (without backticks)
285    pub content: String,
286}
287
288/// Parsed math span information (inline $...$ or display $$...$$)
289#[derive(Debug, Clone)]
290pub struct MathSpan {
291    /// Line number where the math span starts (1-indexed)
292    pub line: usize,
293    /// Line number where the math span ends (1-indexed)
294    pub end_line: usize,
295    /// Start column (0-indexed) in the line
296    pub start_col: usize,
297    /// End column (0-indexed) in the line
298    pub end_col: usize,
299    /// Byte offset in document
300    pub byte_offset: usize,
301    /// End byte offset in document
302    pub byte_end: usize,
303    /// Whether this is display math ($$...$$) vs inline ($...$)
304    pub is_display: bool,
305    /// Content inside the math delimiters
306    pub content: String,
307}
308
309/// Information about a heading
310#[derive(Debug, Clone)]
311pub struct HeadingInfo {
312    /// Heading level (1-6 for ATX, 1-2 for Setext)
313    pub level: u8,
314    /// Style of heading
315    pub style: HeadingStyle,
316    /// The heading marker (# characters or underline)
317    pub marker: String,
318    /// Column where the marker starts (0-based)
319    pub marker_column: usize,
320    /// Column where heading text starts
321    pub content_column: usize,
322    /// The heading text (without markers and without custom ID syntax)
323    pub text: String,
324    /// Custom header ID if present (e.g., from {#custom-id} syntax)
325    pub custom_id: Option<String>,
326    /// Original heading text including custom ID syntax
327    pub raw_text: String,
328    /// Whether it has a closing sequence (for ATX)
329    pub has_closing_sequence: bool,
330    /// The closing sequence if present
331    pub closing_sequence: String,
332    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
333    /// False for malformed headings like `#NoSpace` that MD018 should flag
334    pub is_valid: bool,
335}
336
337/// A valid heading from a filtered iteration
338///
339/// Only includes headings that are CommonMark-compliant (have space after #).
340/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
341#[derive(Debug, Clone)]
342pub struct ValidHeading<'a> {
343    /// The 1-indexed line number in the document
344    pub line_num: usize,
345    /// Reference to the heading information
346    pub heading: &'a HeadingInfo,
347    /// Reference to the full line info (for rules that need additional context)
348    pub line_info: &'a LineInfo,
349}
350
351/// Iterator over valid CommonMark headings in a document
352///
353/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
354/// but should not be processed by other heading rules.
355pub struct ValidHeadingsIter<'a> {
356    lines: &'a [LineInfo],
357    current_index: usize,
358}
359
360impl<'a> ValidHeadingsIter<'a> {
361    fn new(lines: &'a [LineInfo]) -> Self {
362        Self {
363            lines,
364            current_index: 0,
365        }
366    }
367}
368
369impl<'a> Iterator for ValidHeadingsIter<'a> {
370    type Item = ValidHeading<'a>;
371
372    fn next(&mut self) -> Option<Self::Item> {
373        while self.current_index < self.lines.len() {
374            let idx = self.current_index;
375            self.current_index += 1;
376
377            let line_info = &self.lines[idx];
378            if let Some(heading) = &line_info.heading
379                && heading.is_valid
380            {
381                return Some(ValidHeading {
382                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
383                    heading,
384                    line_info,
385                });
386            }
387        }
388        None
389    }
390}
391
392/// Information about a blockquote line
393#[derive(Debug, Clone)]
394pub struct BlockquoteInfo {
395    /// Nesting level (1 for >, 2 for >>, etc.)
396    pub nesting_level: usize,
397    /// The indentation before the blockquote marker
398    pub indent: String,
399    /// Column where the first > starts (0-based)
400    pub marker_column: usize,
401    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
402    pub prefix: String,
403    /// Content after the blockquote marker(s)
404    pub content: String,
405    /// Whether the line has no space after the marker
406    pub has_no_space_after_marker: bool,
407    /// Whether the line has multiple spaces after the marker
408    pub has_multiple_spaces_after_marker: bool,
409    /// Whether this is an empty blockquote line needing MD028 fix
410    pub needs_md028_fix: bool,
411}
412
413/// Information about a list block
414#[derive(Debug, Clone)]
415pub struct ListBlock {
416    /// Line number where the list starts (1-indexed)
417    pub start_line: usize,
418    /// Line number where the list ends (1-indexed)
419    pub end_line: usize,
420    /// Whether it's ordered or unordered
421    pub is_ordered: bool,
422    /// The consistent marker for unordered lists (if any)
423    pub marker: Option<String>,
424    /// Blockquote prefix for this list (empty if not in blockquote)
425    pub blockquote_prefix: String,
426    /// Lines that are list items within this block
427    pub item_lines: Vec<usize>,
428    /// Nesting level (0 for top-level lists)
429    pub nesting_level: usize,
430    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
431    pub max_marker_width: usize,
432}
433
434use std::sync::{Arc, OnceLock};
435
436/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
437type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
438
439/// Type alias for byte ranges used in JSX expression and MDX comment detection
440type ByteRanges = Vec<(usize, usize)>;
441
442/// Character frequency data for fast content analysis
443#[derive(Debug, Clone, Default)]
444pub struct CharFrequency {
445    /// Count of # characters (headings)
446    pub hash_count: usize,
447    /// Count of * characters (emphasis, lists, horizontal rules)
448    pub asterisk_count: usize,
449    /// Count of _ characters (emphasis, horizontal rules)
450    pub underscore_count: usize,
451    /// Count of - characters (lists, horizontal rules, setext headings)
452    pub hyphen_count: usize,
453    /// Count of + characters (lists)
454    pub plus_count: usize,
455    /// Count of > characters (blockquotes)
456    pub gt_count: usize,
457    /// Count of | characters (tables)
458    pub pipe_count: usize,
459    /// Count of [ characters (links, images)
460    pub bracket_count: usize,
461    /// Count of ` characters (code spans, code blocks)
462    pub backtick_count: usize,
463    /// Count of < characters (HTML tags, autolinks)
464    pub lt_count: usize,
465    /// Count of ! characters (images)
466    pub exclamation_count: usize,
467    /// Count of newline characters
468    pub newline_count: usize,
469}
470
471/// Pre-parsed HTML tag information
472#[derive(Debug, Clone)]
473pub struct HtmlTag {
474    /// Line number (1-indexed)
475    pub line: usize,
476    /// Start column (0-indexed) in the line
477    pub start_col: usize,
478    /// End column (0-indexed) in the line
479    pub end_col: usize,
480    /// Byte offset in document
481    pub byte_offset: usize,
482    /// End byte offset in document
483    pub byte_end: usize,
484    /// Tag name (e.g., "div", "img", "br")
485    pub tag_name: String,
486    /// Whether it's a closing tag (`</tag>`)
487    pub is_closing: bool,
488    /// Whether it's self-closing (`<tag />`)
489    pub is_self_closing: bool,
490    /// Raw tag content
491    pub raw_content: String,
492}
493
494/// Pre-parsed emphasis span information
495#[derive(Debug, Clone)]
496pub struct EmphasisSpan {
497    /// Line number (1-indexed)
498    pub line: usize,
499    /// Start column (0-indexed) in the line
500    pub start_col: usize,
501    /// End column (0-indexed) in the line
502    pub end_col: usize,
503    /// Byte offset in document
504    pub byte_offset: usize,
505    /// End byte offset in document
506    pub byte_end: usize,
507    /// Type of emphasis ('*' or '_')
508    pub marker: char,
509    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
510    pub marker_count: usize,
511    /// Content inside the emphasis
512    pub content: String,
513}
514
515/// Pre-parsed table row information
516#[derive(Debug, Clone)]
517pub struct TableRow {
518    /// Line number (1-indexed)
519    pub line: usize,
520    /// Whether this is a separator row (contains only |, -, :, and spaces)
521    pub is_separator: bool,
522    /// Number of columns (pipe-separated cells)
523    pub column_count: usize,
524    /// Alignment info from separator row
525    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
526}
527
528/// Pre-parsed bare URL information (not in links)
529#[derive(Debug, Clone)]
530pub struct BareUrl {
531    /// Line number (1-indexed)
532    pub line: usize,
533    /// Start column (0-indexed) in the line
534    pub start_col: usize,
535    /// End column (0-indexed) in the line
536    pub end_col: usize,
537    /// Byte offset in document
538    pub byte_offset: usize,
539    /// End byte offset in document
540    pub byte_end: usize,
541    /// The URL string
542    pub url: String,
543    /// Type of URL ("http", "https", "ftp", "email")
544    pub url_type: String,
545}
546
547pub struct LintContext<'a> {
548    pub content: &'a str,
549    pub line_offsets: Vec<usize>,
550    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
551    pub lines: Vec<LineInfo>,             // Pre-computed line information
552    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
553    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
554    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
555    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
556    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
557    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
558    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
559    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
560    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
561    pub char_frequency: CharFrequency,    // Character frequency analysis
562    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
563    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
564    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
565    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
566    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
567    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
568    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
569    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
570    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
571    pub flavor: MarkdownFlavor,           // Markdown flavor being used
572    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
573    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
574    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
575    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
576    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
577    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
578    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
579}
580
581/// Detailed blockquote parse result with all components
582struct BlockquoteComponents<'a> {
583    indent: &'a str,
584    markers: &'a str,
585    spaces_after: &'a str,
586    content: &'a str,
587}
588
589/// Parse blockquote prefix with detailed components using manual parsing
590#[inline]
591fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
592    let bytes = line.as_bytes();
593    let mut pos = 0;
594
595    // Parse leading whitespace (indent)
596    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
597        pos += 1;
598    }
599    let indent_end = pos;
600
601    // Must have at least one '>' marker
602    if pos >= bytes.len() || bytes[pos] != b'>' {
603        return None;
604    }
605
606    // Parse '>' markers
607    while pos < bytes.len() && bytes[pos] == b'>' {
608        pos += 1;
609    }
610    let markers_end = pos;
611
612    // Parse spaces after markers
613    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
614        pos += 1;
615    }
616    let spaces_end = pos;
617
618    Some(BlockquoteComponents {
619        indent: &line[0..indent_end],
620        markers: &line[indent_end..markers_end],
621        spaces_after: &line[markers_end..spaces_end],
622        content: &line[spaces_end..],
623    })
624}
625
626impl<'a> LintContext<'a> {
627    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
628        #[cfg(not(target_arch = "wasm32"))]
629        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
630        #[cfg(target_arch = "wasm32")]
631        let profile = false;
632
633        let line_offsets = profile_section!("Line offsets", profile, {
634            let mut offsets = vec![0];
635            for (i, c) in content.char_indices() {
636                if c == '\n' {
637                    offsets.push(i + 1);
638                }
639            }
640            offsets
641        });
642
643        // Detect code blocks and code spans once and cache them
644        let (code_blocks, code_span_ranges) = profile_section!(
645            "Code blocks",
646            profile,
647            CodeBlockUtils::detect_code_blocks_and_spans(content)
648        );
649
650        // Pre-compute HTML comment ranges ONCE for all operations
651        let html_comment_ranges = profile_section!(
652            "HTML comment ranges",
653            profile,
654            crate::utils::skip_context::compute_html_comment_ranges(content)
655        );
656
657        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
658        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
659            if flavor == MarkdownFlavor::MkDocs {
660                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
661            } else {
662                Vec::new()
663            }
664        });
665
666        // Pre-compute Quarto div block ranges for Quarto flavor
667        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
668            if flavor == MarkdownFlavor::Quarto {
669                crate::utils::quarto_divs::detect_div_block_ranges(content)
670            } else {
671                Vec::new()
672            }
673        });
674
675        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
676        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
677        let (mut lines, emphasis_spans) = profile_section!(
678            "Basic line info",
679            profile,
680            Self::compute_basic_line_info(
681                content,
682                &line_offsets,
683                &code_blocks,
684                flavor,
685                &html_comment_ranges,
686                &autodoc_ranges,
687                &quarto_div_ranges,
688            )
689        );
690
691        // Detect HTML blocks BEFORE heading detection
692        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
693
694        // Detect ESM import/export blocks in MDX files BEFORE heading detection
695        profile_section!(
696            "ESM blocks",
697            profile,
698            Self::detect_esm_blocks(content, &mut lines, flavor)
699        );
700
701        // Detect JSX expressions and MDX comments in MDX files
702        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
703            "JSX/MDX detection",
704            profile,
705            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
706        );
707
708        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
709        profile_section!(
710            "MkDocs constructs",
711            profile,
712            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
713        );
714
715        // Detect Obsidian comments (%%...%%) in Obsidian flavor
716        let obsidian_comment_ranges = profile_section!(
717            "Obsidian comments",
718            profile,
719            Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
720        );
721
722        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
723        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
724
725        // Now detect headings and blockquotes
726        profile_section!(
727            "Headings & blockquotes",
728            profile,
729            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
730        );
731
732        // Parse code spans early so we can exclude them from link/image parsing
733        let code_spans = profile_section!(
734            "Code spans",
735            profile,
736            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
737        );
738
739        // Mark lines that are continuations of multi-line code spans
740        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
741        for span in &code_spans {
742            if span.end_line > span.line {
743                // Mark lines after the first line as continuations
744                for line_num in (span.line + 1)..=span.end_line {
745                    if let Some(line_info) = lines.get_mut(line_num - 1) {
746                        line_info.in_code_span_continuation = true;
747                    }
748                }
749            }
750        }
751
752        // Parse links, images, references, and list blocks
753        let (links, broken_links, footnote_refs) = profile_section!(
754            "Links",
755            profile,
756            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
757        );
758
759        let images = profile_section!(
760            "Images",
761            profile,
762            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
763        );
764
765        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
766
767        // Build O(1) lookup map for reference definitions by lowercase ID
768        let reference_defs_map: HashMap<String, usize> = reference_defs
769            .iter()
770            .enumerate()
771            .map(|(idx, def)| (def.id.to_lowercase(), idx))
772            .collect();
773
774        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
775
776        // Compute character frequency for fast content analysis
777        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
778
779        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
780        let table_blocks = profile_section!(
781            "Table blocks",
782            profile,
783            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
784                content,
785                &code_blocks,
786                &code_spans,
787                &html_comment_ranges,
788            )
789        );
790
791        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
792        let line_index = profile_section!(
793            "Line index",
794            profile,
795            crate::utils::range_utils::LineIndex::new(content)
796        );
797
798        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
799        let jinja_ranges = profile_section!(
800            "Jinja ranges",
801            profile,
802            crate::utils::jinja_utils::find_jinja_ranges(content)
803        );
804
805        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
806        let citation_ranges = profile_section!("Citation ranges", profile, {
807            if flavor == MarkdownFlavor::Quarto {
808                crate::utils::quarto_divs::find_citation_ranges(content)
809            } else {
810                Vec::new()
811            }
812        });
813
814        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
815        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
816            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
817            let mut ranges = Vec::new();
818            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
819                ranges.push((mat.start(), mat.end()));
820            }
821            ranges
822        });
823
824        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
825
826        Self {
827            content,
828            line_offsets,
829            code_blocks,
830            lines,
831            links,
832            images,
833            broken_links,
834            footnote_refs,
835            reference_defs,
836            reference_defs_map,
837            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
838            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
839            list_blocks,
840            char_frequency,
841            html_tags_cache: OnceLock::new(),
842            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
843            table_rows_cache: OnceLock::new(),
844            bare_urls_cache: OnceLock::new(),
845            has_mixed_list_nesting_cache: OnceLock::new(),
846            html_comment_ranges,
847            table_blocks,
848            line_index,
849            jinja_ranges,
850            flavor,
851            source_file,
852            jsx_expression_ranges,
853            mdx_comment_ranges,
854            citation_ranges,
855            shortcode_ranges,
856            inline_config,
857            obsidian_comment_ranges,
858        }
859    }
860
861    /// Check if a rule is disabled at a specific line number (1-indexed)
862    ///
863    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
864    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
865    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
866        self.inline_config.is_rule_disabled(rule_name, line_number)
867    }
868
869    /// Get code spans - computed lazily on first access
870    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
871        Arc::clone(
872            self.code_spans_cache
873                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
874        )
875    }
876
877    /// Get math spans - computed lazily on first access
878    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
879        Arc::clone(
880            self.math_spans_cache
881                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
882        )
883    }
884
885    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
886    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
887        let math_spans = self.math_spans();
888        math_spans
889            .iter()
890            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
891    }
892
893    /// Get HTML comment ranges - pre-computed during LintContext construction
894    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
895        &self.html_comment_ranges
896    }
897
898    /// Get Obsidian comment ranges - pre-computed during LintContext construction
899    /// Returns empty slice for non-Obsidian flavors
900    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
901        &self.obsidian_comment_ranges
902    }
903
904    /// Check if a byte position is inside an Obsidian comment
905    ///
906    /// Returns false for non-Obsidian flavors.
907    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
908        self.obsidian_comment_ranges
909            .iter()
910            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
911    }
912
913    /// Check if a line/column position is inside an Obsidian comment
914    ///
915    /// Line number is 1-indexed, column is 1-indexed.
916    /// Returns false for non-Obsidian flavors.
917    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
918        if self.obsidian_comment_ranges.is_empty() {
919            return false;
920        }
921
922        // Convert line/column (1-indexed, char-based) to byte position
923        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
924        self.is_in_obsidian_comment(byte_pos)
925    }
926
927    /// Get HTML tags - computed lazily on first access
928    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
929        Arc::clone(self.html_tags_cache.get_or_init(|| {
930            Arc::new(Self::parse_html_tags(
931                self.content,
932                &self.lines,
933                &self.code_blocks,
934                self.flavor,
935            ))
936        }))
937    }
938
939    /// Get emphasis spans - pre-computed during construction
940    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
941        Arc::clone(
942            self.emphasis_spans_cache
943                .get()
944                .expect("emphasis_spans_cache initialized during construction"),
945        )
946    }
947
948    /// Get table rows - computed lazily on first access
949    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
950        Arc::clone(
951            self.table_rows_cache
952                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
953        )
954    }
955
956    /// Get bare URLs - computed lazily on first access
957    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
958        Arc::clone(
959            self.bare_urls_cache
960                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
961        )
962    }
963
964    /// Check if document has mixed ordered/unordered list nesting.
965    /// Result is cached after first computation (document-level invariant).
966    /// This is used by MD007 for smart style auto-detection.
967    pub fn has_mixed_list_nesting(&self) -> bool {
968        *self
969            .has_mixed_list_nesting_cache
970            .get_or_init(|| self.compute_mixed_list_nesting())
971    }
972
973    /// Internal computation for mixed list nesting (only called once per LintContext).
974    fn compute_mixed_list_nesting(&self) -> bool {
975        // Track parent list items by their marker position and type
976        // Using marker_column instead of indent because it works correctly
977        // for blockquoted content where indent doesn't account for the prefix
978        // Stack stores: (marker_column, is_ordered)
979        let mut stack: Vec<(usize, bool)> = Vec::new();
980        let mut last_was_blank = false;
981
982        for line_info in &self.lines {
983            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
984            if line_info.in_code_block
985                || line_info.in_front_matter
986                || line_info.in_mkdocstrings
987                || line_info.in_html_comment
988                || line_info.in_esm_block
989            {
990                continue;
991            }
992
993            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
994            if line_info.is_blank {
995                last_was_blank = true;
996                continue;
997            }
998
999            if let Some(list_item) = &line_info.list_item {
1000                // Normalize column 1 to column 0 (consistent with MD007 check function)
1001                let current_pos = if list_item.marker_column == 1 {
1002                    0
1003                } else {
1004                    list_item.marker_column
1005                };
1006
1007                // If there was a blank line and this item is at root level, reset stack
1008                if last_was_blank && current_pos == 0 {
1009                    stack.clear();
1010                }
1011                last_was_blank = false;
1012
1013                // Pop items at same or greater position (they're siblings or deeper, not parents)
1014                while let Some(&(pos, _)) = stack.last() {
1015                    if pos >= current_pos {
1016                        stack.pop();
1017                    } else {
1018                        break;
1019                    }
1020                }
1021
1022                // Check if immediate parent has different type - this is mixed nesting
1023                if let Some(&(_, parent_is_ordered)) = stack.last()
1024                    && parent_is_ordered != list_item.is_ordered
1025                {
1026                    return true; // Found mixed nesting - early exit
1027                }
1028
1029                stack.push((current_pos, list_item.is_ordered));
1030            } else {
1031                // Non-list line (but not blank) - could be paragraph or other content
1032                last_was_blank = false;
1033            }
1034        }
1035
1036        false
1037    }
1038
1039    /// Map a byte offset to (line, column)
1040    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1041        match self.line_offsets.binary_search(&offset) {
1042            Ok(line) => (line + 1, 1),
1043            Err(line) => {
1044                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1045                (line, offset - line_start + 1)
1046            }
1047        }
1048    }
1049
1050    /// Check if a position is within a code block or code span
1051    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1052        // Check code blocks first
1053        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1054            return true;
1055        }
1056
1057        // Check inline code spans (lazy load if needed)
1058        self.code_spans()
1059            .iter()
1060            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1061    }
1062
1063    /// Get line information by line number (1-indexed)
1064    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1065        if line_num > 0 {
1066            self.lines.get(line_num - 1)
1067        } else {
1068            None
1069        }
1070    }
1071
1072    /// Get byte offset for a line number (1-indexed)
1073    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1074        self.line_info(line_num).map(|info| info.byte_offset)
1075    }
1076
1077    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1078    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1079        let normalized_id = ref_id.to_lowercase();
1080        self.reference_defs_map
1081            .get(&normalized_id)
1082            .map(|&idx| self.reference_defs[idx].url.as_str())
1083    }
1084
1085    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1086    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1087        let normalized_id = ref_id.to_lowercase();
1088        self.reference_defs_map
1089            .get(&normalized_id)
1090            .map(|&idx| &self.reference_defs[idx])
1091    }
1092
1093    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1094    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1095        let normalized_id = ref_id.to_lowercase();
1096        self.reference_defs_map.contains_key(&normalized_id)
1097    }
1098
1099    /// Check if a line is part of a list block
1100    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1101        self.list_blocks
1102            .iter()
1103            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1104    }
1105
1106    /// Get the list block containing a specific line
1107    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1108        self.list_blocks
1109            .iter()
1110            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1111    }
1112
1113    // Compatibility methods for DocumentStructure migration
1114
1115    /// Check if a line is within a code block
1116    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1117        if line_num == 0 || line_num > self.lines.len() {
1118            return false;
1119        }
1120        self.lines[line_num - 1].in_code_block
1121    }
1122
1123    /// Check if a line is within front matter
1124    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1125        if line_num == 0 || line_num > self.lines.len() {
1126            return false;
1127        }
1128        self.lines[line_num - 1].in_front_matter
1129    }
1130
1131    /// Check if a line is within an HTML block
1132    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1133        if line_num == 0 || line_num > self.lines.len() {
1134            return false;
1135        }
1136        self.lines[line_num - 1].in_html_block
1137    }
1138
1139    /// Check if a line and column is within a code span
1140    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1141        if line_num == 0 || line_num > self.lines.len() {
1142            return false;
1143        }
1144
1145        // Use the code spans cache to check
1146        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1147        // Convert col to 0-indexed for comparison
1148        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1149        let code_spans = self.code_spans();
1150        code_spans.iter().any(|span| {
1151            // Check if line is within the span's line range
1152            if line_num < span.line || line_num > span.end_line {
1153                return false;
1154            }
1155
1156            if span.line == span.end_line {
1157                // Single-line span: check column bounds
1158                col_0indexed >= span.start_col && col_0indexed < span.end_col
1159            } else if line_num == span.line {
1160                // First line of multi-line span: anything after start_col is in span
1161                col_0indexed >= span.start_col
1162            } else if line_num == span.end_line {
1163                // Last line of multi-line span: anything before end_col is in span
1164                col_0indexed < span.end_col
1165            } else {
1166                // Middle line of multi-line span: entire line is in span
1167                true
1168            }
1169        })
1170    }
1171
1172    /// Check if a byte offset is within a code span
1173    #[inline]
1174    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1175        let code_spans = self.code_spans();
1176        code_spans
1177            .iter()
1178            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1179    }
1180
1181    /// Check if a byte position is within a reference definition
1182    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1183    #[inline]
1184    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1185        self.reference_defs
1186            .iter()
1187            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1188    }
1189
1190    /// Check if a byte position is within an HTML comment
1191    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1192    /// where k is the number of HTML comments (typically very small)
1193    #[inline]
1194    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1195        self.html_comment_ranges
1196            .iter()
1197            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1198    }
1199
1200    /// Check if a byte position is within an HTML tag (including multiline tags)
1201    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1202    #[inline]
1203    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1204        self.html_tags()
1205            .iter()
1206            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1207    }
1208
1209    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1210    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1211        self.jinja_ranges
1212            .iter()
1213            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214    }
1215
1216    /// Check if a byte position is within a JSX expression (MDX: {expression})
1217    #[inline]
1218    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1219        self.jsx_expression_ranges
1220            .iter()
1221            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1222    }
1223
1224    /// Check if a byte position is within an MDX comment ({/* ... */})
1225    #[inline]
1226    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1227        self.mdx_comment_ranges
1228            .iter()
1229            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1230    }
1231
1232    /// Get all JSX expression byte ranges
1233    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1234        &self.jsx_expression_ranges
1235    }
1236
1237    /// Get all MDX comment byte ranges
1238    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1239        &self.mdx_comment_ranges
1240    }
1241
1242    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1243    /// Only active in Quarto flavor
1244    #[inline]
1245    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1246        self.citation_ranges
1247            .iter()
1248            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1249    }
1250
1251    /// Get all citation byte ranges (Quarto flavor only)
1252    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1253        &self.citation_ranges
1254    }
1255
1256    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1257    #[inline]
1258    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1259        self.shortcode_ranges
1260            .iter()
1261            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1262    }
1263
1264    /// Get all shortcode byte ranges
1265    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1266        &self.shortcode_ranges
1267    }
1268
1269    /// Check if a byte position is within a link reference definition title
1270    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1271        self.reference_defs.iter().any(|def| {
1272            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1273                byte_pos >= start && byte_pos < end
1274            } else {
1275                false
1276            }
1277        })
1278    }
1279
1280    /// Check if content has any instances of a specific character (fast)
1281    pub fn has_char(&self, ch: char) -> bool {
1282        match ch {
1283            '#' => self.char_frequency.hash_count > 0,
1284            '*' => self.char_frequency.asterisk_count > 0,
1285            '_' => self.char_frequency.underscore_count > 0,
1286            '-' => self.char_frequency.hyphen_count > 0,
1287            '+' => self.char_frequency.plus_count > 0,
1288            '>' => self.char_frequency.gt_count > 0,
1289            '|' => self.char_frequency.pipe_count > 0,
1290            '[' => self.char_frequency.bracket_count > 0,
1291            '`' => self.char_frequency.backtick_count > 0,
1292            '<' => self.char_frequency.lt_count > 0,
1293            '!' => self.char_frequency.exclamation_count > 0,
1294            '\n' => self.char_frequency.newline_count > 0,
1295            _ => self.content.contains(ch), // Fallback for other characters
1296        }
1297    }
1298
1299    /// Get count of a specific character (fast)
1300    pub fn char_count(&self, ch: char) -> usize {
1301        match ch {
1302            '#' => self.char_frequency.hash_count,
1303            '*' => self.char_frequency.asterisk_count,
1304            '_' => self.char_frequency.underscore_count,
1305            '-' => self.char_frequency.hyphen_count,
1306            '+' => self.char_frequency.plus_count,
1307            '>' => self.char_frequency.gt_count,
1308            '|' => self.char_frequency.pipe_count,
1309            '[' => self.char_frequency.bracket_count,
1310            '`' => self.char_frequency.backtick_count,
1311            '<' => self.char_frequency.lt_count,
1312            '!' => self.char_frequency.exclamation_count,
1313            '\n' => self.char_frequency.newline_count,
1314            _ => self.content.matches(ch).count(), // Fallback for other characters
1315        }
1316    }
1317
1318    /// Check if content likely contains headings (fast)
1319    pub fn likely_has_headings(&self) -> bool {
1320        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1321    }
1322
1323    /// Check if content likely contains lists (fast)
1324    pub fn likely_has_lists(&self) -> bool {
1325        self.char_frequency.asterisk_count > 0
1326            || self.char_frequency.hyphen_count > 0
1327            || self.char_frequency.plus_count > 0
1328    }
1329
1330    /// Check if content likely contains emphasis (fast)
1331    pub fn likely_has_emphasis(&self) -> bool {
1332        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1333    }
1334
1335    /// Check if content likely contains tables (fast)
1336    pub fn likely_has_tables(&self) -> bool {
1337        self.char_frequency.pipe_count > 2
1338    }
1339
1340    /// Check if content likely contains blockquotes (fast)
1341    pub fn likely_has_blockquotes(&self) -> bool {
1342        self.char_frequency.gt_count > 0
1343    }
1344
1345    /// Check if content likely contains code (fast)
1346    pub fn likely_has_code(&self) -> bool {
1347        self.char_frequency.backtick_count > 0
1348    }
1349
1350    /// Check if content likely contains links or images (fast)
1351    pub fn likely_has_links_or_images(&self) -> bool {
1352        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1353    }
1354
1355    /// Check if content likely contains HTML (fast)
1356    pub fn likely_has_html(&self) -> bool {
1357        self.char_frequency.lt_count > 0
1358    }
1359
1360    /// Get the blockquote prefix for inserting a blank line at the given line index.
1361    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1362    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1363    /// Returns an empty string if the line is not inside a blockquote.
1364    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1365        if let Some(line_info) = self.lines.get(line_idx)
1366            && let Some(ref bq) = line_info.blockquote
1367        {
1368            bq.prefix.trim_end().to_string()
1369        } else {
1370            String::new()
1371        }
1372    }
1373
1374    /// Get HTML tags on a specific line
1375    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1376        self.html_tags()
1377            .iter()
1378            .filter(|tag| tag.line == line_num)
1379            .cloned()
1380            .collect()
1381    }
1382
1383    /// Get emphasis spans on a specific line
1384    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1385        self.emphasis_spans()
1386            .iter()
1387            .filter(|span| span.line == line_num)
1388            .cloned()
1389            .collect()
1390    }
1391
1392    /// Get table rows on a specific line
1393    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1394        self.table_rows()
1395            .iter()
1396            .filter(|row| row.line == line_num)
1397            .cloned()
1398            .collect()
1399    }
1400
1401    /// Get bare URLs on a specific line
1402    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1403        self.bare_urls()
1404            .iter()
1405            .filter(|url| url.line == line_num)
1406            .cloned()
1407            .collect()
1408    }
1409
1410    /// Find the line index for a given byte offset using binary search.
1411    /// Returns (line_index, line_number, column) where:
1412    /// - line_index is the 0-based index in the lines array
1413    /// - line_number is the 1-based line number
1414    /// - column is the byte offset within that line
1415    #[inline]
1416    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1417        // Binary search to find the line containing this byte offset
1418        let idx = match lines.binary_search_by(|line| {
1419            if byte_offset < line.byte_offset {
1420                std::cmp::Ordering::Greater
1421            } else if byte_offset > line.byte_offset + line.byte_len {
1422                std::cmp::Ordering::Less
1423            } else {
1424                std::cmp::Ordering::Equal
1425            }
1426        }) {
1427            Ok(idx) => idx,
1428            Err(idx) => idx.saturating_sub(1),
1429        };
1430
1431        let line = &lines[idx];
1432        let line_num = idx + 1;
1433        let col = byte_offset.saturating_sub(line.byte_offset);
1434
1435        (idx, line_num, col)
1436    }
1437
1438    /// Check if a byte offset is within a code span using binary search
1439    #[inline]
1440    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1441        // Since spans are sorted by byte_offset, use partition_point for binary search
1442        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1443
1444        // Check the span that starts at or before our offset
1445        if idx > 0 {
1446            let span = &code_spans[idx - 1];
1447            if offset >= span.byte_offset && offset < span.byte_end {
1448                return true;
1449            }
1450        }
1451
1452        false
1453    }
1454
1455    /// Collect byte ranges of all links using pulldown-cmark
1456    /// This is used to skip heading detection for lines that fall within link syntax
1457    /// (e.g., multiline links like `[text](url\n#fragment)`)
1458    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1459        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1460
1461        let mut link_ranges = Vec::new();
1462        let mut options = Options::empty();
1463        options.insert(Options::ENABLE_WIKILINKS);
1464        options.insert(Options::ENABLE_FOOTNOTES);
1465
1466        let parser = Parser::new_ext(content, options).into_offset_iter();
1467        let mut link_stack: Vec<usize> = Vec::new();
1468
1469        for (event, range) in parser {
1470            match event {
1471                Event::Start(Tag::Link { .. }) => {
1472                    link_stack.push(range.start);
1473                }
1474                Event::End(TagEnd::Link) => {
1475                    if let Some(start_pos) = link_stack.pop() {
1476                        link_ranges.push((start_pos, range.end));
1477                    }
1478                }
1479                _ => {}
1480            }
1481        }
1482
1483        link_ranges
1484    }
1485
1486    /// Parse all links in the content
1487    fn parse_links(
1488        content: &'a str,
1489        lines: &[LineInfo],
1490        code_blocks: &[(usize, usize)],
1491        code_spans: &[CodeSpan],
1492        flavor: MarkdownFlavor,
1493        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1494    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1495        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1496        use std::collections::HashSet;
1497
1498        let mut links = Vec::with_capacity(content.len() / 500);
1499        let mut broken_links = Vec::new();
1500        let mut footnote_refs = Vec::new();
1501
1502        // Track byte positions of links found by pulldown-cmark
1503        let mut found_positions = HashSet::new();
1504
1505        // Use pulldown-cmark's streaming parser with BrokenLink callback
1506        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1507        // This automatically handles:
1508        // - Escaped links (won't generate events)
1509        // - Links in code blocks/spans (won't generate Link events)
1510        // - Images (generates Tag::Image instead)
1511        // - Reference resolution (dest_url is already resolved!)
1512        // - Broken references (callback is invoked)
1513        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1514        let mut options = Options::empty();
1515        options.insert(Options::ENABLE_WIKILINKS);
1516        options.insert(Options::ENABLE_FOOTNOTES);
1517
1518        let parser = Parser::new_with_broken_link_callback(
1519            content,
1520            options,
1521            Some(|link: BrokenLink<'_>| {
1522                broken_links.push(BrokenLinkInfo {
1523                    reference: link.reference.to_string(),
1524                    span: link.span.clone(),
1525                });
1526                None
1527            }),
1528        )
1529        .into_offset_iter();
1530
1531        let mut link_stack: Vec<(
1532            usize,
1533            usize,
1534            pulldown_cmark::CowStr<'a>,
1535            LinkType,
1536            pulldown_cmark::CowStr<'a>,
1537        )> = Vec::new();
1538        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1539
1540        for (event, range) in parser {
1541            match event {
1542                Event::Start(Tag::Link {
1543                    link_type,
1544                    dest_url,
1545                    id,
1546                    ..
1547                }) => {
1548                    // Link start - record position, URL, and reference ID
1549                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1550                    text_chunks.clear();
1551                }
1552                Event::Text(text) if !link_stack.is_empty() => {
1553                    // Track text content with its byte range
1554                    text_chunks.push((text.to_string(), range.start, range.end));
1555                }
1556                Event::Code(code) if !link_stack.is_empty() => {
1557                    // Include inline code in link text (with backticks)
1558                    let code_text = format!("`{code}`");
1559                    text_chunks.push((code_text, range.start, range.end));
1560                }
1561                Event::End(TagEnd::Link) => {
1562                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1563                        // Skip if in HTML comment
1564                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1565                            text_chunks.clear();
1566                            continue;
1567                        }
1568
1569                        // Find line and column information
1570                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1571
1572                        // Skip if this link is on a MkDocs snippet line
1573                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1574                            text_chunks.clear();
1575                            continue;
1576                        }
1577
1578                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1579
1580                        let is_reference = matches!(
1581                            link_type,
1582                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1583                        );
1584
1585                        // Extract link text directly from source bytes to preserve escaping
1586                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1587                        let link_text = if start_pos < content.len() {
1588                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1589
1590                            // Find MATCHING ] by tracking bracket depth for nested brackets
1591                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1592                            // Brackets inside code spans (between backticks) should be ignored
1593                            let mut close_pos = None;
1594                            let mut depth = 0;
1595                            let mut in_code_span = false;
1596
1597                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1598                                // Count preceding backslashes
1599                                let mut backslash_count = 0;
1600                                let mut j = i;
1601                                while j > 0 && link_bytes[j - 1] == b'\\' {
1602                                    backslash_count += 1;
1603                                    j -= 1;
1604                                }
1605                                let is_escaped = backslash_count % 2 != 0;
1606
1607                                // Track code spans - backticks toggle in/out of code
1608                                if byte == b'`' && !is_escaped {
1609                                    in_code_span = !in_code_span;
1610                                }
1611
1612                                // Only count brackets when NOT in a code span
1613                                if !is_escaped && !in_code_span {
1614                                    if byte == b'[' {
1615                                        depth += 1;
1616                                    } else if byte == b']' {
1617                                        if depth == 0 {
1618                                            // Found the matching closing bracket
1619                                            close_pos = Some(i);
1620                                            break;
1621                                        } else {
1622                                            depth -= 1;
1623                                        }
1624                                    }
1625                                }
1626                            }
1627
1628                            if let Some(pos) = close_pos {
1629                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1630                            } else {
1631                                Cow::Borrowed("")
1632                            }
1633                        } else {
1634                            Cow::Borrowed("")
1635                        };
1636
1637                        // For reference links, use the actual reference ID from pulldown-cmark
1638                        let reference_id = if is_reference && !ref_id.is_empty() {
1639                            Some(Cow::Owned(ref_id.to_lowercase()))
1640                        } else if is_reference {
1641                            // For collapsed/shortcut references without explicit ID, use the link text
1642                            Some(Cow::Owned(link_text.to_lowercase()))
1643                        } else {
1644                            None
1645                        };
1646
1647                        // Track this position as found
1648                        found_positions.insert(start_pos);
1649
1650                        links.push(ParsedLink {
1651                            line: line_num,
1652                            start_col: col_start,
1653                            end_col: col_end,
1654                            byte_offset: start_pos,
1655                            byte_end: range.end,
1656                            text: link_text,
1657                            url: Cow::Owned(url.to_string()),
1658                            is_reference,
1659                            reference_id,
1660                            link_type,
1661                        });
1662
1663                        text_chunks.clear();
1664                    }
1665                }
1666                Event::FootnoteReference(footnote_id) => {
1667                    // Capture footnote references like [^1], [^note]
1668                    // Skip if in HTML comment
1669                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1670                        continue;
1671                    }
1672
1673                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1674                    footnote_refs.push(FootnoteRef {
1675                        id: footnote_id.to_string(),
1676                        line: line_num,
1677                        byte_offset: range.start,
1678                        byte_end: range.end,
1679                    });
1680                }
1681                _ => {}
1682            }
1683        }
1684
1685        // Also find undefined references using regex
1686        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1687        // because the reference is undefined
1688        for cap in LINK_PATTERN.captures_iter(content) {
1689            let full_match = cap.get(0).unwrap();
1690            let match_start = full_match.start();
1691            let match_end = full_match.end();
1692
1693            // Skip if this was already found by pulldown-cmark (it's a valid link)
1694            if found_positions.contains(&match_start) {
1695                continue;
1696            }
1697
1698            // Skip if escaped
1699            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1700                continue;
1701            }
1702
1703            // Skip if it's an image
1704            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1705                continue;
1706            }
1707
1708            // Skip if in code block
1709            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1710                continue;
1711            }
1712
1713            // Skip if in code span
1714            if Self::is_offset_in_code_span(code_spans, match_start) {
1715                continue;
1716            }
1717
1718            // Skip if in HTML comment
1719            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1720                continue;
1721            }
1722
1723            // Find line and column information
1724            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1725
1726            // Skip if this link is on a MkDocs snippet line
1727            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1728                continue;
1729            }
1730
1731            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1732
1733            let text = cap.get(1).map_or("", |m| m.as_str());
1734
1735            // Only process reference links (group 6)
1736            if let Some(ref_id) = cap.get(6) {
1737                let ref_id_str = ref_id.as_str();
1738                let normalized_ref = if ref_id_str.is_empty() {
1739                    Cow::Owned(text.to_lowercase()) // Implicit reference
1740                } else {
1741                    Cow::Owned(ref_id_str.to_lowercase())
1742                };
1743
1744                // This is an undefined reference (pulldown-cmark didn't parse it)
1745                links.push(ParsedLink {
1746                    line: line_num,
1747                    start_col: col_start,
1748                    end_col: col_end,
1749                    byte_offset: match_start,
1750                    byte_end: match_end,
1751                    text: Cow::Borrowed(text),
1752                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1753                    is_reference: true,
1754                    reference_id: Some(normalized_ref),
1755                    link_type: LinkType::Reference, // Undefined references are reference-style
1756                });
1757            }
1758        }
1759
1760        (links, broken_links, footnote_refs)
1761    }
1762
1763    /// Parse all images in the content
1764    fn parse_images(
1765        content: &'a str,
1766        lines: &[LineInfo],
1767        code_blocks: &[(usize, usize)],
1768        code_spans: &[CodeSpan],
1769        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1770    ) -> Vec<ParsedImage<'a>> {
1771        use crate::utils::skip_context::is_in_html_comment_ranges;
1772        use std::collections::HashSet;
1773
1774        // Pre-size based on a heuristic: images are less common than links
1775        let mut images = Vec::with_capacity(content.len() / 1000);
1776        let mut found_positions = HashSet::new();
1777
1778        // Use pulldown-cmark for parsing - more accurate and faster
1779        let parser = Parser::new(content).into_offset_iter();
1780        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1781            Vec::new();
1782        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1783
1784        for (event, range) in parser {
1785            match event {
1786                Event::Start(Tag::Image {
1787                    link_type,
1788                    dest_url,
1789                    id,
1790                    ..
1791                }) => {
1792                    image_stack.push((range.start, dest_url, link_type, id));
1793                    text_chunks.clear();
1794                }
1795                Event::Text(text) if !image_stack.is_empty() => {
1796                    text_chunks.push((text.to_string(), range.start, range.end));
1797                }
1798                Event::Code(code) if !image_stack.is_empty() => {
1799                    let code_text = format!("`{code}`");
1800                    text_chunks.push((code_text, range.start, range.end));
1801                }
1802                Event::End(TagEnd::Image) => {
1803                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1804                        // Skip if in code block
1805                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1806                            continue;
1807                        }
1808
1809                        // Skip if in code span
1810                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1811                            continue;
1812                        }
1813
1814                        // Skip if in HTML comment
1815                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1816                            continue;
1817                        }
1818
1819                        // Find line and column using binary search
1820                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1821                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1822
1823                        let is_reference = matches!(
1824                            link_type,
1825                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1826                        );
1827
1828                        // Extract alt text directly from source bytes to preserve escaping
1829                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1830                        let alt_text = if start_pos < content.len() {
1831                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1832
1833                            // Find MATCHING ] by tracking bracket depth for nested brackets
1834                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1835                            let mut close_pos = None;
1836                            let mut depth = 0;
1837
1838                            if image_bytes.len() > 2 {
1839                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1840                                    // Count preceding backslashes
1841                                    let mut backslash_count = 0;
1842                                    let mut j = i;
1843                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1844                                        backslash_count += 1;
1845                                        j -= 1;
1846                                    }
1847                                    let is_escaped = backslash_count % 2 != 0;
1848
1849                                    if !is_escaped {
1850                                        if byte == b'[' {
1851                                            depth += 1;
1852                                        } else if byte == b']' {
1853                                            if depth == 0 {
1854                                                // Found the matching closing bracket
1855                                                close_pos = Some(i);
1856                                                break;
1857                                            } else {
1858                                                depth -= 1;
1859                                            }
1860                                        }
1861                                    }
1862                                }
1863                            }
1864
1865                            if let Some(pos) = close_pos {
1866                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1867                            } else {
1868                                Cow::Borrowed("")
1869                            }
1870                        } else {
1871                            Cow::Borrowed("")
1872                        };
1873
1874                        let reference_id = if is_reference && !ref_id.is_empty() {
1875                            Some(Cow::Owned(ref_id.to_lowercase()))
1876                        } else if is_reference {
1877                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1878                        } else {
1879                            None
1880                        };
1881
1882                        found_positions.insert(start_pos);
1883                        images.push(ParsedImage {
1884                            line: line_num,
1885                            start_col: col_start,
1886                            end_col: col_end,
1887                            byte_offset: start_pos,
1888                            byte_end: range.end,
1889                            alt_text,
1890                            url: Cow::Owned(url.to_string()),
1891                            is_reference,
1892                            reference_id,
1893                            link_type,
1894                        });
1895                    }
1896                }
1897                _ => {}
1898            }
1899        }
1900
1901        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1902        for cap in IMAGE_PATTERN.captures_iter(content) {
1903            let full_match = cap.get(0).unwrap();
1904            let match_start = full_match.start();
1905            let match_end = full_match.end();
1906
1907            // Skip if already found by pulldown-cmark
1908            if found_positions.contains(&match_start) {
1909                continue;
1910            }
1911
1912            // Skip if the ! is escaped
1913            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1914                continue;
1915            }
1916
1917            // Skip if in code block, code span, or HTML comment
1918            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1919                || Self::is_offset_in_code_span(code_spans, match_start)
1920                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1921            {
1922                continue;
1923            }
1924
1925            // Only process reference images (undefined references not found by pulldown-cmark)
1926            if let Some(ref_id) = cap.get(6) {
1927                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1928                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1929                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1930                let ref_id_str = ref_id.as_str();
1931                let normalized_ref = if ref_id_str.is_empty() {
1932                    Cow::Owned(alt_text.to_lowercase())
1933                } else {
1934                    Cow::Owned(ref_id_str.to_lowercase())
1935                };
1936
1937                images.push(ParsedImage {
1938                    line: line_num,
1939                    start_col: col_start,
1940                    end_col: col_end,
1941                    byte_offset: match_start,
1942                    byte_end: match_end,
1943                    alt_text: Cow::Borrowed(alt_text),
1944                    url: Cow::Borrowed(""),
1945                    is_reference: true,
1946                    reference_id: Some(normalized_ref),
1947                    link_type: LinkType::Reference, // Undefined references are reference-style
1948                });
1949            }
1950        }
1951
1952        images
1953    }
1954
1955    /// Parse reference definitions
1956    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1957        // Pre-size based on lines count as reference definitions are line-based
1958        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1959
1960        for (line_idx, line_info) in lines.iter().enumerate() {
1961            // Skip lines in code blocks
1962            if line_info.in_code_block {
1963                continue;
1964            }
1965
1966            let line = line_info.content(content);
1967            let line_num = line_idx + 1;
1968
1969            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1970                let id_raw = cap.get(1).unwrap().as_str();
1971
1972                // Skip footnote definitions - they use [^id]: syntax and are semantically
1973                // different from reference link definitions
1974                if id_raw.starts_with('^') {
1975                    continue;
1976                }
1977
1978                let id = id_raw.to_lowercase();
1979                let url = cap.get(2).unwrap().as_str().to_string();
1980                let title_match = cap.get(3).or_else(|| cap.get(4));
1981                let title = title_match.map(|m| m.as_str().to_string());
1982
1983                // Calculate byte positions
1984                // The match starts at the beginning of the line (0) and extends to the end
1985                let match_obj = cap.get(0).unwrap();
1986                let byte_offset = line_info.byte_offset + match_obj.start();
1987                let byte_end = line_info.byte_offset + match_obj.end();
1988
1989                // Calculate title byte positions (includes the quote character before content)
1990                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1991                    // The match is the content inside quotes, so we include the quote before
1992                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1993                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1994                    (Some(start), Some(end))
1995                } else {
1996                    (None, None)
1997                };
1998
1999                refs.push(ReferenceDef {
2000                    line: line_num,
2001                    id,
2002                    url,
2003                    title,
2004                    byte_offset,
2005                    byte_end,
2006                    title_byte_start,
2007                    title_byte_end,
2008                });
2009            }
2010        }
2011
2012        refs
2013    }
2014
2015    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
2016    /// Handles nested blockquotes like `> > > content`
2017    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
2018    #[inline]
2019    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2020        let trimmed_start = line.trim_start();
2021        if !trimmed_start.starts_with('>') {
2022            return None;
2023        }
2024
2025        // Track total prefix length to handle nested blockquotes
2026        let mut remaining = line;
2027        let mut total_prefix_len = 0;
2028
2029        loop {
2030            let trimmed = remaining.trim_start();
2031            if !trimmed.starts_with('>') {
2032                break;
2033            }
2034
2035            // Add leading whitespace + '>' to prefix
2036            let leading_ws_len = remaining.len() - trimmed.len();
2037            total_prefix_len += leading_ws_len + 1;
2038
2039            let after_gt = &trimmed[1..];
2040
2041            // Handle optional whitespace after '>' (space or tab)
2042            if let Some(stripped) = after_gt.strip_prefix(' ') {
2043                total_prefix_len += 1;
2044                remaining = stripped;
2045            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2046                total_prefix_len += 1;
2047                remaining = stripped;
2048            } else {
2049                remaining = after_gt;
2050            }
2051        }
2052
2053        Some((&line[..total_prefix_len], remaining))
2054    }
2055
2056    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2057    ///
2058    /// Returns a HashMap keyed by line byte offset, containing:
2059    /// `(is_ordered, marker, marker_column, content_column, number)`
2060    ///
2061    /// ## Why pulldown-cmark?
2062    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2063    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2064    /// This fixes issue #253 where continuation lines were falsely detected.
2065    ///
2066    /// ## Tab indentation quirk
2067    /// Pulldown-cmark reports nested list items at the newline character position
2068    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2069    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2070    /// We detect this and advance to the correct line.
2071    ///
2072    /// ## HashMap key strategy
2073    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2074    /// that resolve to the same line (after newline adjustment). The first event
2075    /// for each line is authoritative.
2076    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2077    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2078    /// This avoids a separate parse for emphasis detection.
2079    fn detect_list_items_and_emphasis_with_pulldown(
2080        content: &str,
2081        line_offsets: &[usize],
2082        flavor: MarkdownFlavor,
2083        front_matter_end: usize,
2084        code_blocks: &[(usize, usize)],
2085    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2086        use std::collections::HashMap;
2087
2088        let mut list_items = HashMap::new();
2089        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2090
2091        let mut options = Options::empty();
2092        options.insert(Options::ENABLE_TABLES);
2093        options.insert(Options::ENABLE_FOOTNOTES);
2094        options.insert(Options::ENABLE_STRIKETHROUGH);
2095        options.insert(Options::ENABLE_TASKLISTS);
2096        // Always enable GFM features for consistency with existing behavior
2097        options.insert(Options::ENABLE_GFM);
2098
2099        // Suppress unused variable warning
2100        let _ = flavor;
2101
2102        let parser = Parser::new_ext(content, options).into_offset_iter();
2103        let mut list_depth: usize = 0;
2104        let mut list_stack: Vec<bool> = Vec::new();
2105
2106        for (event, range) in parser {
2107            match event {
2108                // Capture emphasis spans (for MD030's emphasis detection)
2109                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2110                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2111                        2
2112                    } else {
2113                        1
2114                    };
2115                    let match_start = range.start;
2116                    let match_end = range.end;
2117
2118                    // Skip if in code block
2119                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2120                        // Determine marker character by looking at the content at the start
2121                        let marker = content[match_start..].chars().next().unwrap_or('*');
2122                        if marker == '*' || marker == '_' {
2123                            // Extract content between markers
2124                            let content_start = match_start + marker_count;
2125                            let content_end = if match_end >= marker_count {
2126                                match_end - marker_count
2127                            } else {
2128                                match_end
2129                            };
2130                            let content_part = if content_start < content_end && content_end <= content.len() {
2131                                &content[content_start..content_end]
2132                            } else {
2133                                ""
2134                            };
2135
2136                            // Find which line this emphasis is on using line_offsets
2137                            let line_idx = match line_offsets.binary_search(&match_start) {
2138                                Ok(idx) => idx,
2139                                Err(idx) => idx.saturating_sub(1),
2140                            };
2141                            let line_num = line_idx + 1;
2142                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2143                            let col_start = match_start - line_start;
2144                            let col_end = match_end - line_start;
2145
2146                            emphasis_spans.push(EmphasisSpan {
2147                                line: line_num,
2148                                start_col: col_start,
2149                                end_col: col_end,
2150                                byte_offset: match_start,
2151                                byte_end: match_end,
2152                                marker,
2153                                marker_count,
2154                                content: content_part.to_string(),
2155                            });
2156                        }
2157                    }
2158                }
2159                Event::Start(Tag::List(start_number)) => {
2160                    list_depth += 1;
2161                    list_stack.push(start_number.is_some());
2162                }
2163                Event::End(TagEnd::List(_)) => {
2164                    list_depth = list_depth.saturating_sub(1);
2165                    list_stack.pop();
2166                }
2167                Event::Start(Tag::Item) if list_depth > 0 => {
2168                    // Get the ordered state for the CURRENT (innermost) list
2169                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2170                    // Find which line this byte offset corresponds to
2171                    let item_start = range.start;
2172
2173                    // Binary search to find the line number
2174                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2175                        Ok(idx) => idx,
2176                        Err(idx) => idx.saturating_sub(1),
2177                    };
2178
2179                    // Pulldown-cmark reports nested list items at the newline before the item
2180                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2181                    // Advance to the actual content line in this case.
2182                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2183                        line_idx += 1;
2184                    }
2185
2186                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2187                    if front_matter_end > 0 && line_idx < front_matter_end {
2188                        continue;
2189                    }
2190
2191                    if line_idx < line_offsets.len() {
2192                        let line_start_byte = line_offsets[line_idx];
2193                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2194                        let line = &content[line_start_byte..line_end.min(content.len())];
2195
2196                        // Strip trailing newline
2197                        let line = line
2198                            .strip_suffix('\n')
2199                            .or_else(|| line.strip_suffix("\r\n"))
2200                            .unwrap_or(line);
2201
2202                        // Strip blockquote prefix if present
2203                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2204                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2205                            (prefix.len(), content)
2206                        } else {
2207                            (0, line)
2208                        };
2209
2210                        // Parse the list marker from the actual line
2211                        if current_list_is_ordered {
2212                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2213                                Self::parse_ordered_list(line_to_parse)
2214                            {
2215                                let marker = format!("{number_str}{delimiter}");
2216                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2217                                let content_column = marker_column + marker.len() + spacing.len();
2218                                let number = number_str.parse().ok();
2219
2220                                list_items.entry(line_start_byte).or_insert((
2221                                    true,
2222                                    marker,
2223                                    marker_column,
2224                                    content_column,
2225                                    number,
2226                                ));
2227                            }
2228                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2229                            Self::parse_unordered_list(line_to_parse)
2230                        {
2231                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2232                            let content_column = marker_column + 1 + spacing.len();
2233
2234                            list_items.entry(line_start_byte).or_insert((
2235                                false,
2236                                marker.to_string(),
2237                                marker_column,
2238                                content_column,
2239                                None,
2240                            ));
2241                        }
2242                    }
2243                }
2244                _ => {}
2245            }
2246        }
2247
2248        (list_items, emphasis_spans)
2249    }
2250
2251    /// Fast unordered list parser - replaces regex for 5-10x speedup
2252    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2253    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2254    #[inline]
2255    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2256        let bytes = line.as_bytes();
2257        let mut i = 0;
2258
2259        // Skip leading whitespace
2260        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2261            i += 1;
2262        }
2263
2264        // Check for marker
2265        if i >= bytes.len() {
2266            return None;
2267        }
2268        let marker = bytes[i] as char;
2269        if marker != '-' && marker != '*' && marker != '+' {
2270            return None;
2271        }
2272        let marker_pos = i;
2273        i += 1;
2274
2275        // Collect spacing after marker (space or tab only)
2276        let spacing_start = i;
2277        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2278            i += 1;
2279        }
2280
2281        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2282    }
2283
2284    /// Fast ordered list parser - replaces regex for 5-10x speedup
2285    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2286    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2287    #[inline]
2288    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2289        let bytes = line.as_bytes();
2290        let mut i = 0;
2291
2292        // Skip leading whitespace
2293        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2294            i += 1;
2295        }
2296
2297        // Collect digits
2298        let number_start = i;
2299        while i < bytes.len() && bytes[i].is_ascii_digit() {
2300            i += 1;
2301        }
2302        if i == number_start {
2303            return None; // No digits found
2304        }
2305
2306        // Check for delimiter
2307        if i >= bytes.len() {
2308            return None;
2309        }
2310        let delimiter = bytes[i] as char;
2311        if delimiter != '.' && delimiter != ')' {
2312            return None;
2313        }
2314        let delimiter_pos = i;
2315        i += 1;
2316
2317        // Collect spacing after delimiter (space or tab only)
2318        let spacing_start = i;
2319        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2320            i += 1;
2321        }
2322
2323        Some((
2324            &line[..number_start],
2325            &line[number_start..delimiter_pos],
2326            delimiter,
2327            &line[spacing_start..i],
2328            &line[i..],
2329        ))
2330    }
2331
2332    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2333    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2334    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2335        let num_lines = line_offsets.len();
2336        let mut in_code_block = vec![false; num_lines];
2337
2338        // For each code block, mark all lines within it
2339        for &(start, end) in code_blocks {
2340            // Ensure we're at valid UTF-8 boundaries
2341            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2342                let mut boundary = start;
2343                while boundary > 0 && !content.is_char_boundary(boundary) {
2344                    boundary -= 1;
2345                }
2346                boundary
2347            } else {
2348                start
2349            };
2350
2351            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2352                let mut boundary = end;
2353                while boundary < content.len() && !content.is_char_boundary(boundary) {
2354                    boundary += 1;
2355                }
2356                boundary
2357            } else {
2358                end.min(content.len())
2359            };
2360
2361            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2362            // That function now has proper list context awareness (see code_block_utils.rs)
2363            // and correctly distinguishes between:
2364            // - Fenced code blocks (``` or ~~~)
2365            // - Indented code blocks at document level (4 spaces + blank line before)
2366            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2367            //
2368            // We no longer need to re-validate here. The original validation logic
2369            // was causing false positives by marking list continuation paragraphs as
2370            // code blocks when they have 4 spaces of indentation.
2371
2372            // Use binary search to find the first and last line indices
2373            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2374            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2375            //
2376            // Find the line that CONTAINS safe_start: the line with the largest
2377            // start offset that is <= safe_start. partition_point gives us the
2378            // first line that starts AFTER safe_start, so we subtract 1.
2379            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2380            let first_line = first_line_after.saturating_sub(1);
2381            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2382
2383            // Mark all lines in the range at once
2384            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2385                *flag = true;
2386            }
2387        }
2388
2389        in_code_block
2390    }
2391
2392    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2393    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2394    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2395        let content_lines: Vec<&str> = content.lines().collect();
2396        let num_lines = content_lines.len();
2397        let mut in_math_block = vec![false; num_lines];
2398
2399        let mut inside_math = false;
2400
2401        for (i, line) in content_lines.iter().enumerate() {
2402            // Skip lines that are in code blocks - math delimiters inside code are literal
2403            if code_block_map.get(i).copied().unwrap_or(false) {
2404                continue;
2405            }
2406
2407            let trimmed = line.trim();
2408
2409            // Check for math block delimiter ($$)
2410            // A line with just $$ toggles the math block state
2411            if trimmed == "$$" {
2412                if inside_math {
2413                    // Closing delimiter - this line is still part of the math block
2414                    in_math_block[i] = true;
2415                    inside_math = false;
2416                } else {
2417                    // Opening delimiter - this line starts the math block
2418                    in_math_block[i] = true;
2419                    inside_math = true;
2420                }
2421            } else if inside_math {
2422                // Content inside math block
2423                in_math_block[i] = true;
2424            }
2425        }
2426
2427        in_math_block
2428    }
2429
2430    /// Pre-compute basic line information (without headings/blockquotes)
2431    /// Also returns emphasis spans detected during the pulldown-cmark parse
2432    fn compute_basic_line_info(
2433        content: &str,
2434        line_offsets: &[usize],
2435        code_blocks: &[(usize, usize)],
2436        flavor: MarkdownFlavor,
2437        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2438        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2439        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2440    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2441        let content_lines: Vec<&str> = content.lines().collect();
2442        let mut lines = Vec::with_capacity(content_lines.len());
2443
2444        // Pre-compute which lines are in code blocks
2445        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2446
2447        // Pre-compute which lines are in math blocks ($$ ... $$)
2448        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2449
2450        // Detect front matter boundaries FIRST, before any other parsing
2451        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2452        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2453
2454        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2455        // (context-aware, eliminates false positives)
2456        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2457            content,
2458            line_offsets,
2459            flavor,
2460            front_matter_end,
2461            code_blocks,
2462        );
2463
2464        for (i, line) in content_lines.iter().enumerate() {
2465            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2466            let indent = line.len() - line.trim_start().len();
2467            // Compute visual indent with proper CommonMark tab expansion
2468            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2469
2470            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2471            let blockquote_parse = Self::parse_blockquote_prefix(line);
2472
2473            // For blank detection, consider blockquote context
2474            let is_blank = if let Some((_, content)) = blockquote_parse {
2475                // In blockquote context, check if content after prefix is blank
2476                content.trim().is_empty()
2477            } else {
2478                line.trim().is_empty()
2479            };
2480
2481            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2482            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2483
2484            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2485            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2486                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2487            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2488            // This ensures content after `-->` on the same line is not incorrectly skipped
2489            let line_end_offset = byte_offset + line.len();
2490            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2491                html_comment_ranges,
2492                byte_offset,
2493                line_end_offset,
2494            );
2495            // Use pulldown-cmark's list detection for context-aware parsing
2496            // This eliminates false positives on continuation lines (issue #253)
2497            let list_item =
2498                list_item_map
2499                    .get(&byte_offset)
2500                    .map(
2501                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2502                            marker: marker.clone(),
2503                            is_ordered: *is_ordered,
2504                            number: *number,
2505                            marker_column: *marker_column,
2506                            content_column: *content_column,
2507                        },
2508                    );
2509
2510            // Detect horizontal rules (only outside code blocks and frontmatter)
2511            // Uses CommonMark-compliant check including leading indentation validation
2512            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2513            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2514
2515            // Get math block status for this line
2516            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2517
2518            // Check if line is inside a Quarto div block
2519            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2520                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2521
2522            lines.push(LineInfo {
2523                byte_offset,
2524                byte_len: line.len(),
2525                indent,
2526                visual_indent,
2527                is_blank,
2528                in_code_block,
2529                in_front_matter,
2530                in_html_block: false, // Will be populated after line creation
2531                in_html_comment,
2532                list_item,
2533                heading: None,    // Will be populated in second pass for Setext headings
2534                blockquote: None, // Will be populated after line creation
2535                in_mkdocstrings,
2536                in_esm_block: false, // Will be populated after line creation for MDX files
2537                in_code_span_continuation: false, // Will be populated after code spans are parsed
2538                is_horizontal_rule: is_hr,
2539                in_math_block,
2540                in_quarto_div,
2541                in_jsx_expression: false,   // Will be populated for MDX files
2542                in_mdx_comment: false,      // Will be populated for MDX files
2543                in_jsx_component: false,    // Will be populated for MDX files
2544                in_jsx_fragment: false,     // Will be populated for MDX files
2545                in_admonition: false,       // Will be populated for MkDocs files
2546                in_content_tab: false,      // Will be populated for MkDocs files
2547                in_definition_list: false,  // Will be populated for MkDocs files
2548                in_obsidian_comment: false, // Will be populated for Obsidian files
2549            });
2550        }
2551
2552        (lines, emphasis_spans)
2553    }
2554
2555    /// Detect headings and blockquotes (called after HTML block detection)
2556    fn detect_headings_and_blockquotes(
2557        content: &str,
2558        lines: &mut [LineInfo],
2559        flavor: MarkdownFlavor,
2560        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2561        link_byte_ranges: &[(usize, usize)],
2562    ) {
2563        // Regex for heading detection
2564        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2565            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2566        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2567            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2568
2569        let content_lines: Vec<&str> = content.lines().collect();
2570
2571        // Detect front matter boundaries to skip those lines
2572        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2573
2574        // Detect headings (including Setext which needs look-ahead) and blockquotes
2575        for i in 0..lines.len() {
2576            let line = content_lines[i];
2577
2578            // Detect blockquotes FIRST, before any skip conditions.
2579            // A line can be both a blockquote AND contain a code block inside it.
2580            // We need to know about the blockquote marker regardless of code block status.
2581            // Skip only frontmatter lines - those are never blockquotes.
2582            if !(front_matter_end > 0 && i < front_matter_end)
2583                && let Some(bq) = parse_blockquote_detailed(line)
2584            {
2585                let nesting_level = bq.markers.len();
2586                let marker_column = bq.indent.len();
2587                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2588                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2589                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2590                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2591
2592                lines[i].blockquote = Some(BlockquoteInfo {
2593                    nesting_level,
2594                    indent: bq.indent.to_string(),
2595                    marker_column,
2596                    prefix,
2597                    content: bq.content.to_string(),
2598                    has_no_space_after_marker: has_no_space,
2599                    has_multiple_spaces_after_marker: has_multiple_spaces,
2600                    needs_md028_fix,
2601                });
2602
2603                // Update is_horizontal_rule for blockquote content
2604                // The original detection doesn't strip blockquote prefix, so we need to check here
2605                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2606                    lines[i].is_horizontal_rule = true;
2607                }
2608            }
2609
2610            // Now apply skip conditions for heading detection
2611            if lines[i].in_code_block {
2612                continue;
2613            }
2614
2615            // Skip lines in front matter
2616            if front_matter_end > 0 && i < front_matter_end {
2617                continue;
2618            }
2619
2620            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2621            if lines[i].in_html_block {
2622                continue;
2623            }
2624
2625            // Skip heading detection for blank lines
2626            if lines[i].is_blank {
2627                continue;
2628            }
2629
2630            // Check for ATX headings (but skip MkDocs snippet lines)
2631            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2632            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2633                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2634                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2635            } else {
2636                false
2637            };
2638
2639            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2640                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2641                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2642                    continue;
2643                }
2644                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2645                // This prevents false positives where `#fragment` is detected as a heading
2646                let line_offset = lines[i].byte_offset;
2647                if link_byte_ranges
2648                    .iter()
2649                    .any(|&(start, end)| line_offset > start && line_offset < end)
2650                {
2651                    continue;
2652                }
2653                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2654                let hashes = caps.get(2).map_or("", |m| m.as_str());
2655                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2656                let rest = caps.get(4).map_or("", |m| m.as_str());
2657
2658                let level = hashes.len() as u8;
2659                let marker_column = leading_spaces.len();
2660
2661                // Check for closing sequence, but handle custom IDs that might come after
2662                let (text, has_closing, closing_seq) = {
2663                    // First check if there's a custom ID at the end
2664                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2665                        // Check if this looks like a valid custom ID (ends with })
2666                        if rest[id_start..].trim_end().ends_with('}') {
2667                            // Split off the custom ID
2668                            (&rest[..id_start], &rest[id_start..])
2669                        } else {
2670                            (rest, "")
2671                        }
2672                    } else {
2673                        (rest, "")
2674                    };
2675
2676                    // Now look for closing hashes in the part before the custom ID
2677                    let trimmed_rest = rest_without_id.trim_end();
2678                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2679                        // Find the start of the hash sequence by walking backwards
2680                        // Use char_indices to get byte positions at char boundaries
2681                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2682
2683                        // Find which char index corresponds to last_hash_byte_pos
2684                        let last_hash_char_idx = char_positions
2685                            .iter()
2686                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2687
2688                        if let Some(mut char_idx) = last_hash_char_idx {
2689                            // Walk backwards to find start of hash sequence
2690                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2691                                char_idx -= 1;
2692                            }
2693
2694                            // Get the byte position of the start of hashes
2695                            let start_of_hashes = char_positions[char_idx].0;
2696
2697                            // Check if there's at least one space before the closing hashes
2698                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2699
2700                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2701                            let potential_closing = &trimmed_rest[start_of_hashes..];
2702                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2703
2704                            if is_all_hashes && has_space_before {
2705                                // This is a closing sequence
2706                                let closing_hashes = potential_closing.to_string();
2707                                // The text is everything before the closing hashes
2708                                // Don't include the custom ID here - it will be extracted later
2709                                let text_part = if !custom_id_part.is_empty() {
2710                                    // If we have a custom ID, append it back to get the full rest
2711                                    // This allows the extract_header_id function to handle it properly
2712                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2713                                } else {
2714                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2715                                };
2716                                (text_part, true, closing_hashes)
2717                            } else {
2718                                // Not a valid closing sequence, return the full content
2719                                (rest.to_string(), false, String::new())
2720                            }
2721                        } else {
2722                            // Couldn't find char boundary, return the full content
2723                            (rest.to_string(), false, String::new())
2724                        }
2725                    } else {
2726                        // No hashes found, return the full content
2727                        (rest.to_string(), false, String::new())
2728                    }
2729                };
2730
2731                let content_column = marker_column + hashes.len() + spaces_after.len();
2732
2733                // Extract custom header ID if present
2734                let raw_text = text.trim().to_string();
2735                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2736
2737                // If no custom ID was found on the header line, check the next line for standalone attr-list
2738                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2739                    let next_line = content_lines[i + 1];
2740                    if !lines[i + 1].in_code_block
2741                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2742                        && let Some(next_line_id) =
2743                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2744                    {
2745                        custom_id = Some(next_line_id);
2746                    }
2747                }
2748
2749                // ATX heading is "valid" for processing by heading rules if:
2750                // 1. Has space after # (CommonMark compliant): `# Heading`
2751                // 2. Is empty (just hashes): `#`
2752                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2753                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2754                //
2755                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2756                // - `#tag` - single # with lowercase (social hashtag)
2757                // - `#123` - single # with number (GitHub issue ref)
2758                let is_valid = !spaces_after.is_empty()
2759                    || rest.is_empty()
2760                    || level > 1
2761                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2762
2763                lines[i].heading = Some(HeadingInfo {
2764                    level,
2765                    style: HeadingStyle::ATX,
2766                    marker: hashes.to_string(),
2767                    marker_column,
2768                    content_column,
2769                    text: clean_text,
2770                    custom_id,
2771                    raw_text,
2772                    has_closing_sequence: has_closing,
2773                    closing_sequence: closing_seq,
2774                    is_valid,
2775                });
2776            }
2777            // Check for Setext headings (need to look at next line)
2778            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2779                let next_line = content_lines[i + 1];
2780                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2781                    // Skip if next line is front matter delimiter
2782                    if front_matter_end > 0 && i < front_matter_end {
2783                        continue;
2784                    }
2785
2786                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2787                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2788                    {
2789                        continue;
2790                    }
2791
2792                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2793                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2794                    let content_line = line.trim();
2795
2796                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2797                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2798                        continue;
2799                    }
2800
2801                    // Skip underscore thematic breaks (___)
2802                    if content_line.starts_with('_') {
2803                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2804                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2805                            continue;
2806                        }
2807                    }
2808
2809                    // Skip numbered lists (1. Item, 2. Item, etc.)
2810                    if let Some(first_char) = content_line.chars().next()
2811                        && first_char.is_ascii_digit()
2812                    {
2813                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2814                        if num_end < content_line.len() {
2815                            let next = content_line.chars().nth(num_end);
2816                            if next == Some('.') || next == Some(')') {
2817                                continue;
2818                            }
2819                        }
2820                    }
2821
2822                    // Skip ATX headings
2823                    if ATX_HEADING_REGEX.is_match(line) {
2824                        continue;
2825                    }
2826
2827                    // Skip blockquotes
2828                    if content_line.starts_with('>') {
2829                        continue;
2830                    }
2831
2832                    // Skip code fences
2833                    let trimmed_start = line.trim_start();
2834                    if trimmed_start.len() >= 3 {
2835                        let first_three: String = trimmed_start.chars().take(3).collect();
2836                        if first_three == "```" || first_three == "~~~" {
2837                            continue;
2838                        }
2839                    }
2840
2841                    // Skip HTML blocks
2842                    if content_line.starts_with('<') {
2843                        continue;
2844                    }
2845
2846                    let underline = next_line.trim();
2847
2848                    let level = if underline.starts_with('=') { 1 } else { 2 };
2849                    let style = if level == 1 {
2850                        HeadingStyle::Setext1
2851                    } else {
2852                        HeadingStyle::Setext2
2853                    };
2854
2855                    // Extract custom header ID if present
2856                    let raw_text = line.trim().to_string();
2857                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2858
2859                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2860                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2861                        let attr_line = content_lines[i + 2];
2862                        if !lines[i + 2].in_code_block
2863                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2864                            && let Some(attr_line_id) =
2865                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2866                        {
2867                            custom_id = Some(attr_line_id);
2868                        }
2869                    }
2870
2871                    lines[i].heading = Some(HeadingInfo {
2872                        level,
2873                        style,
2874                        marker: underline.to_string(),
2875                        marker_column: next_line.len() - next_line.trim_start().len(),
2876                        content_column: lines[i].indent,
2877                        text: clean_text,
2878                        custom_id,
2879                        raw_text,
2880                        has_closing_sequence: false,
2881                        closing_sequence: String::new(),
2882                        is_valid: true, // Setext headings are always valid
2883                    });
2884                }
2885            }
2886        }
2887    }
2888
2889    /// Detect HTML blocks in the content
2890    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2891        // HTML block elements that trigger block context
2892        // Includes HTML5 media, embedded content, and interactive elements
2893        const BLOCK_ELEMENTS: &[&str] = &[
2894            "address",
2895            "article",
2896            "aside",
2897            "audio",
2898            "blockquote",
2899            "canvas",
2900            "details",
2901            "dialog",
2902            "dd",
2903            "div",
2904            "dl",
2905            "dt",
2906            "embed",
2907            "fieldset",
2908            "figcaption",
2909            "figure",
2910            "footer",
2911            "form",
2912            "h1",
2913            "h2",
2914            "h3",
2915            "h4",
2916            "h5",
2917            "h6",
2918            "header",
2919            "hr",
2920            "iframe",
2921            "li",
2922            "main",
2923            "menu",
2924            "nav",
2925            "noscript",
2926            "object",
2927            "ol",
2928            "p",
2929            "picture",
2930            "pre",
2931            "script",
2932            "search",
2933            "section",
2934            "source",
2935            "style",
2936            "summary",
2937            "svg",
2938            "table",
2939            "tbody",
2940            "td",
2941            "template",
2942            "textarea",
2943            "tfoot",
2944            "th",
2945            "thead",
2946            "tr",
2947            "track",
2948            "ul",
2949            "video",
2950        ];
2951
2952        let mut i = 0;
2953        while i < lines.len() {
2954            // Skip if already in code block or front matter
2955            if lines[i].in_code_block || lines[i].in_front_matter {
2956                i += 1;
2957                continue;
2958            }
2959
2960            let trimmed = lines[i].content(content).trim_start();
2961
2962            // Check if line starts with an HTML tag
2963            if trimmed.starts_with('<') && trimmed.len() > 1 {
2964                // Extract tag name safely
2965                let after_bracket = &trimmed[1..];
2966                let is_closing = after_bracket.starts_with('/');
2967                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2968
2969                // Extract tag name (stop at space, >, /, or end of string)
2970                let tag_name = tag_start
2971                    .chars()
2972                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2973                    .collect::<String>()
2974                    .to_lowercase();
2975
2976                // Check if it's a block element
2977                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2978                    // Mark this line as in HTML block
2979                    lines[i].in_html_block = true;
2980
2981                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2982                    // This avoids complex nesting logic that might cause infinite loops
2983                    // Only search for closing tag on subsequent lines if the opening tag
2984                    // does NOT have its closing tag on the same line
2985                    if !is_closing {
2986                        let closing_tag = format!("</{tag_name}>");
2987
2988                        // Check if closing tag is on the same line as opening tag
2989                        // (e.g., <script src="..."></script> or <style>.class{}</style>)
2990                        let same_line_close = lines[i].content(content).contains(&closing_tag);
2991
2992                        // Only search subsequent lines if the tag isn't self-closed on this line
2993                        if !same_line_close {
2994                            // style and script tags can contain blank lines (CSS/JS formatting)
2995                            let allow_blank_lines = tag_name == "style" || tag_name == "script";
2996                            let mut j = i + 1;
2997                            let mut found_closing_tag = false;
2998                            while j < lines.len() && j < i + 100 {
2999                                // Limit search to 100 lines
3000                                // Stop at blank lines (except for style/script tags)
3001                                if !allow_blank_lines && lines[j].is_blank {
3002                                    break;
3003                                }
3004
3005                                lines[j].in_html_block = true;
3006
3007                                // Check if this line contains the closing tag
3008                                if lines[j].content(content).contains(&closing_tag) {
3009                                    found_closing_tag = true;
3010                                }
3011
3012                                // After finding closing tag, continue marking lines as
3013                                // in_html_block until blank line (per CommonMark spec)
3014                                if found_closing_tag {
3015                                    j += 1;
3016                                    // Continue marking subsequent lines until blank
3017                                    while j < lines.len() && j < i + 100 {
3018                                        if lines[j].is_blank {
3019                                            break;
3020                                        }
3021                                        lines[j].in_html_block = true;
3022                                        j += 1;
3023                                    }
3024                                    break;
3025                                }
3026                                j += 1;
3027                            }
3028                        }
3029                    }
3030                }
3031            }
3032
3033            i += 1;
3034        }
3035    }
3036
3037    /// Detect ESM import/export blocks anywhere in MDX files
3038    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
3039    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3040        // Only process MDX files
3041        if !flavor.supports_esm_blocks() {
3042            return;
3043        }
3044
3045        let mut in_multiline_import = false;
3046
3047        for line in lines.iter_mut() {
3048            // Skip code blocks, front matter, and HTML comments
3049            if line.in_code_block || line.in_front_matter || line.in_html_comment {
3050                in_multiline_import = false;
3051                continue;
3052            }
3053
3054            let line_content = line.content(content);
3055            let trimmed = line_content.trim();
3056
3057            // Handle continuation of multi-line import/export
3058            if in_multiline_import {
3059                line.in_esm_block = true;
3060                // Check if this line completes the statement
3061                // Multi-line import ends when we see the closing quote + optional semicolon
3062                if trimmed.ends_with('\'')
3063                    || trimmed.ends_with('"')
3064                    || trimmed.ends_with("';")
3065                    || trimmed.ends_with("\";")
3066                    || line_content.contains(';')
3067                {
3068                    in_multiline_import = false;
3069                }
3070                continue;
3071            }
3072
3073            // Skip blank lines
3074            if line.is_blank {
3075                continue;
3076            }
3077
3078            // Check if line starts with import or export
3079            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3080                line.in_esm_block = true;
3081
3082                // Determine if this is a complete single-line statement or starts a multi-line one
3083                // Multi-line imports look like:
3084                //   import {
3085                //     Foo,
3086                //     Bar
3087                //   } from 'module'
3088                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3089                let is_import = trimmed.starts_with("import ");
3090
3091                // Check for simple complete statements
3092                let is_complete =
3093                    // Ends with semicolon
3094                    trimmed.ends_with(';')
3095                    // import/export with from clause that ends with quote
3096                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3097                    // Simple export (export const/let/var/function/class without from)
3098                    || (!is_import && !trimmed.contains(" from ") && (
3099                        trimmed.starts_with("export const ")
3100                        || trimmed.starts_with("export let ")
3101                        || trimmed.starts_with("export var ")
3102                        || trimmed.starts_with("export function ")
3103                        || trimmed.starts_with("export class ")
3104                        || trimmed.starts_with("export default ")
3105                    ));
3106
3107                if !is_complete && is_import {
3108                    // Only imports can span multiple lines in the typical case
3109                    // Check if it looks like the start of a multi-line import
3110                    // e.g., "import {" or "import type {"
3111                    if trimmed.contains('{') && !trimmed.contains('}') {
3112                        in_multiline_import = true;
3113                    }
3114                }
3115            }
3116        }
3117    }
3118
3119    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3120    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3121    fn detect_jsx_and_mdx_comments(
3122        content: &str,
3123        lines: &mut [LineInfo],
3124        flavor: MarkdownFlavor,
3125        code_blocks: &[(usize, usize)],
3126    ) -> (ByteRanges, ByteRanges) {
3127        // Only process MDX files
3128        if !flavor.supports_jsx() {
3129            return (Vec::new(), Vec::new());
3130        }
3131
3132        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3133        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3134
3135        // Quick check - if no braces, no JSX expressions or MDX comments
3136        if !content.contains('{') {
3137            return (jsx_expression_ranges, mdx_comment_ranges);
3138        }
3139
3140        let bytes = content.as_bytes();
3141        let mut i = 0;
3142
3143        while i < bytes.len() {
3144            if bytes[i] == b'{' {
3145                // Check if we're in a code block
3146                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3147                    i += 1;
3148                    continue;
3149                }
3150
3151                let start = i;
3152
3153                // Check if it's an MDX comment: {/* ... */}
3154                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3155                    // Find the closing */}
3156                    let mut j = i + 3;
3157                    while j + 2 < bytes.len() {
3158                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3159                            let end = j + 3;
3160                            mdx_comment_ranges.push((start, end));
3161
3162                            // Mark lines as in MDX comment
3163                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3164                                line.in_mdx_comment = true;
3165                            });
3166
3167                            i = end;
3168                            break;
3169                        }
3170                        j += 1;
3171                    }
3172                    if j + 2 >= bytes.len() {
3173                        // Unclosed MDX comment - mark rest as comment
3174                        mdx_comment_ranges.push((start, bytes.len()));
3175                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3176                            line.in_mdx_comment = true;
3177                        });
3178                        break;
3179                    }
3180                } else {
3181                    // Regular JSX expression: { ... }
3182                    // Need to handle nested braces
3183                    let mut brace_depth = 1;
3184                    let mut j = i + 1;
3185                    let mut in_string = false;
3186                    let mut string_char = b'"';
3187
3188                    while j < bytes.len() && brace_depth > 0 {
3189                        let c = bytes[j];
3190
3191                        // Handle strings to avoid counting braces inside them
3192                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3193                            in_string = true;
3194                            string_char = c;
3195                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3196                            in_string = false;
3197                        } else if !in_string {
3198                            if c == b'{' {
3199                                brace_depth += 1;
3200                            } else if c == b'}' {
3201                                brace_depth -= 1;
3202                            }
3203                        }
3204                        j += 1;
3205                    }
3206
3207                    if brace_depth == 0 {
3208                        let end = j;
3209                        jsx_expression_ranges.push((start, end));
3210
3211                        // Mark lines as in JSX expression
3212                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3213                            line.in_jsx_expression = true;
3214                        });
3215
3216                        i = end;
3217                    } else {
3218                        i += 1;
3219                    }
3220                }
3221            } else {
3222                i += 1;
3223            }
3224        }
3225
3226        (jsx_expression_ranges, mdx_comment_ranges)
3227    }
3228
3229    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3230    /// and populate the corresponding fields in LineInfo
3231    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3232        if flavor != MarkdownFlavor::MkDocs {
3233            return;
3234        }
3235
3236        use crate::utils::mkdocs_admonitions;
3237        use crate::utils::mkdocs_definition_lists;
3238        use crate::utils::mkdocs_tabs;
3239
3240        let content_lines: Vec<&str> = content.lines().collect();
3241
3242        // Track admonition context
3243        let mut in_admonition = false;
3244        let mut admonition_indent = 0;
3245
3246        // Track tab context
3247        let mut in_tab = false;
3248        let mut tab_indent = 0;
3249
3250        // Track definition list context
3251        let mut in_definition = false;
3252
3253        for (i, line) in content_lines.iter().enumerate() {
3254            if i >= lines.len() {
3255                break;
3256            }
3257
3258            // Check for admonition markers first - even on lines marked as code blocks
3259            // Pulldown-cmark marks 4-space indented content as indented code blocks,
3260            // but in MkDocs this is admonition/tab content, not code.
3261            if mkdocs_admonitions::is_admonition_start(line) {
3262                in_admonition = true;
3263                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3264                lines[i].in_admonition = true;
3265            } else if in_admonition {
3266                // Check if still in admonition content
3267                if line.trim().is_empty() {
3268                    // Blank lines are part of admonitions
3269                    lines[i].in_admonition = true;
3270                    // Override code block detection for blank lines inside admonitions
3271                    lines[i].in_code_block = false;
3272                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3273                    lines[i].in_admonition = true;
3274                    // Override code block detection - this is admonition content, not code
3275                    lines[i].in_code_block = false;
3276                } else {
3277                    // End of admonition
3278                    in_admonition = false;
3279                    // Check if this line starts a new admonition
3280                    if mkdocs_admonitions::is_admonition_start(line) {
3281                        in_admonition = true;
3282                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3283                        lines[i].in_admonition = true;
3284                    }
3285                }
3286            }
3287
3288            // Check for tab markers - also before the code block skip
3289            // Tab content also uses 4-space indentation which pulldown-cmark treats as code
3290            if mkdocs_tabs::is_tab_marker(line) {
3291                in_tab = true;
3292                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3293                lines[i].in_content_tab = true;
3294            } else if in_tab {
3295                // Check if still in tab content
3296                if line.trim().is_empty() {
3297                    // Blank lines are part of tabs
3298                    lines[i].in_content_tab = true;
3299                    lines[i].in_code_block = false;
3300                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3301                    lines[i].in_content_tab = true;
3302                    // Override code block detection - this is tab content, not code
3303                    lines[i].in_code_block = false;
3304                } else {
3305                    // End of tab content
3306                    in_tab = false;
3307                    // Check if this line starts a new tab
3308                    if mkdocs_tabs::is_tab_marker(line) {
3309                        in_tab = true;
3310                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3311                        lines[i].in_content_tab = true;
3312                    }
3313                }
3314            }
3315
3316            // Skip remaining detection for lines in actual code blocks
3317            if lines[i].in_code_block {
3318                continue;
3319            }
3320
3321            // Check for definition list items
3322            if mkdocs_definition_lists::is_definition_line(line) {
3323                in_definition = true;
3324                lines[i].in_definition_list = true;
3325            } else if in_definition {
3326                // Check if continuation
3327                if mkdocs_definition_lists::is_definition_continuation(line) {
3328                    lines[i].in_definition_list = true;
3329                } else if line.trim().is_empty() {
3330                    // Blank line might continue definition
3331                    lines[i].in_definition_list = true;
3332                } else if mkdocs_definition_lists::could_be_term_line(line) {
3333                    // This could be a new term - check if followed by definition
3334                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3335                    {
3336                        lines[i].in_definition_list = true;
3337                    } else {
3338                        in_definition = false;
3339                    }
3340                } else {
3341                    in_definition = false;
3342                }
3343            } else if mkdocs_definition_lists::could_be_term_line(line) {
3344                // Check if this is a term followed by a definition
3345                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3346                    lines[i].in_definition_list = true;
3347                    in_definition = true;
3348                }
3349            }
3350        }
3351    }
3352
3353    /// Detect Obsidian comment blocks (%%...%%) in Obsidian flavor
3354    ///
3355    /// Obsidian comments use `%%` as delimiters:
3356    /// - Inline: `text %%hidden%% text`
3357    /// - Block: `%%\nmulti-line\n%%`
3358    ///
3359    /// Comments do NOT nest - the first `%%` after an opening `%%` closes the comment.
3360    /// Comments are NOT detected inside code blocks or HTML comments.
3361    ///
3362    /// Returns the computed comment ranges for use by rules that need position-level checking.
3363    fn detect_obsidian_comments(
3364        content: &str,
3365        lines: &mut [LineInfo],
3366        flavor: MarkdownFlavor,
3367        code_span_ranges: &[(usize, usize)],
3368    ) -> Vec<(usize, usize)> {
3369        // Only process Obsidian files
3370        if flavor != MarkdownFlavor::Obsidian {
3371            return Vec::new();
3372        }
3373
3374        // Compute Obsidian comment ranges (byte ranges)
3375        let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3376
3377        // Mark lines that fall within comment ranges
3378        for range in &comment_ranges {
3379            for line in lines.iter_mut() {
3380                // Skip lines in code blocks or HTML comments - they take precedence
3381                if line.in_code_block || line.in_html_comment {
3382                    continue;
3383                }
3384
3385                let line_start = line.byte_offset;
3386                let line_end = line.byte_offset + line.byte_len;
3387
3388                // Check if this line is entirely within a comment
3389                // A line is "in" a comment if it starts within or after the comment start
3390                // AND ends within or before the comment end
3391                if line_start >= range.0 && line_end <= range.1 {
3392                    line.in_obsidian_comment = true;
3393                } else if line_start < range.1 && line_end > range.0 {
3394                    // Line partially overlaps with comment - check if the overlap is significant
3395                    // For inline comments on a line, we still mark the line if any part is in comment
3396                    // However, for the filtered_lines API, we only skip lines entirely within comments
3397                    // This matches the behavior of HTML comments
3398
3399                    // Check if the ENTIRE line content (excluding leading/trailing whitespace)
3400                    // is within the comment range
3401                    let line_content_start = line_start;
3402                    let line_content_end = line_end;
3403
3404                    if line_content_start >= range.0 && line_content_end <= range.1 {
3405                        line.in_obsidian_comment = true;
3406                    }
3407                }
3408            }
3409        }
3410
3411        comment_ranges
3412    }
3413
3414    /// Compute byte ranges for all Obsidian comments in the content
3415    ///
3416    /// Returns a vector of (start, end) byte offset pairs for each comment.
3417    /// Comments do not nest - first `%%` after an opening `%%` closes it.
3418    fn compute_obsidian_comment_ranges(
3419        content: &str,
3420        lines: &[LineInfo],
3421        code_span_ranges: &[(usize, usize)],
3422    ) -> Vec<(usize, usize)> {
3423        let mut ranges = Vec::new();
3424
3425        // Quick check - if no %% at all, no comments
3426        if !content.contains("%%") {
3427            return ranges;
3428        }
3429
3430        // Build skip ranges for code blocks, HTML comments, and inline code spans
3431        // to avoid detecting %% inside those regions.
3432        let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3433        for line in lines {
3434            if line.in_code_block || line.in_html_comment {
3435                skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3436            }
3437        }
3438        skip_ranges.extend(code_span_ranges.iter().copied());
3439
3440        if !skip_ranges.is_empty() {
3441            // Sort and merge overlapping ranges for efficient scanning
3442            skip_ranges.sort_by_key(|(start, _)| *start);
3443            let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3444            for (start, end) in skip_ranges {
3445                if let Some((_, last_end)) = merged.last_mut()
3446                    && start <= *last_end
3447                {
3448                    *last_end = (*last_end).max(end);
3449                    continue;
3450                }
3451                merged.push((start, end));
3452            }
3453            skip_ranges = merged;
3454        }
3455
3456        let content_bytes = content.as_bytes();
3457        let len = content.len();
3458        let mut i = 0;
3459        let mut in_comment = false;
3460        let mut comment_start = 0;
3461        let mut skip_idx = 0;
3462
3463        while i < len.saturating_sub(1) {
3464            // Fast-skip any ranges we should ignore (code blocks, HTML comments, code spans)
3465            if skip_idx < skip_ranges.len() {
3466                let (skip_start, skip_end) = skip_ranges[skip_idx];
3467                if i >= skip_end {
3468                    skip_idx += 1;
3469                    continue;
3470                }
3471                if i >= skip_start {
3472                    i = skip_end;
3473                    continue;
3474                }
3475            }
3476
3477            // Check for %%
3478            if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3479                if !in_comment {
3480                    // Opening %%
3481                    in_comment = true;
3482                    comment_start = i;
3483                    i += 2;
3484                } else {
3485                    // Closing %%
3486                    let comment_end = i + 2;
3487                    ranges.push((comment_start, comment_end));
3488                    in_comment = false;
3489                    i += 2;
3490                }
3491            } else {
3492                i += 1;
3493            }
3494        }
3495
3496        // Handle unclosed comment - extends to end of document
3497        if in_comment {
3498            ranges.push((comment_start, len));
3499        }
3500
3501        ranges
3502    }
3503
3504    /// Helper to mark lines within a byte range
3505    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3506    where
3507        F: FnMut(&mut LineInfo),
3508    {
3509        // Find lines that overlap with the range
3510        for line in lines.iter_mut() {
3511            let line_start = line.byte_offset;
3512            let line_end = line.byte_offset + line.byte_len;
3513
3514            // Check if this line overlaps with the range
3515            if line_start < end && line_end > start {
3516                f(line);
3517            }
3518        }
3519
3520        // Silence unused warning for content (needed for signature consistency)
3521        let _ = content;
3522    }
3523
3524    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3525    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3526        // Quick check - if no backticks, no code spans
3527        if !content.contains('`') {
3528            return Vec::new();
3529        }
3530
3531        // Use pulldown-cmark's streaming parser with byte offsets
3532        let parser = Parser::new(content).into_offset_iter();
3533        let mut ranges = Vec::new();
3534
3535        for (event, range) in parser {
3536            if let Event::Code(_) = event {
3537                ranges.push((range.start, range.end));
3538            }
3539        }
3540
3541        Self::build_code_spans_from_ranges(content, lines, &ranges)
3542    }
3543
3544    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3545        let mut code_spans = Vec::new();
3546        if ranges.is_empty() {
3547            return code_spans;
3548        }
3549
3550        for &(start_pos, end_pos) in ranges {
3551            // The range includes the backticks, extract the actual content
3552            let full_span = &content[start_pos..end_pos];
3553            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3554
3555            // Extract content between backticks, preserving spaces
3556            let content_start = start_pos + backtick_count;
3557            let content_end = end_pos - backtick_count;
3558            let span_content = if content_start < content_end {
3559                content[content_start..content_end].to_string()
3560            } else {
3561                String::new()
3562            };
3563
3564            // Use binary search to find line number - O(log n) instead of O(n)
3565            // Find the rightmost line whose byte_offset <= start_pos
3566            let line_idx = lines
3567                .partition_point(|line| line.byte_offset <= start_pos)
3568                .saturating_sub(1);
3569            let line_num = line_idx + 1;
3570            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3571
3572            // Find end column using binary search
3573            let end_line_idx = lines
3574                .partition_point(|line| line.byte_offset <= end_pos)
3575                .saturating_sub(1);
3576            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3577
3578            // Convert byte offsets to character positions for correct Unicode handling
3579            // This ensures consistency with warning.column which uses character positions
3580            let line_content = lines[line_idx].content(content);
3581            let col_start = if byte_col_start <= line_content.len() {
3582                line_content[..byte_col_start].chars().count()
3583            } else {
3584                line_content.chars().count()
3585            };
3586
3587            let end_line_content = lines[end_line_idx].content(content);
3588            let col_end = if byte_col_end <= end_line_content.len() {
3589                end_line_content[..byte_col_end].chars().count()
3590            } else {
3591                end_line_content.chars().count()
3592            };
3593
3594            code_spans.push(CodeSpan {
3595                line: line_num,
3596                end_line: end_line_idx + 1,
3597                start_col: col_start,
3598                end_col: col_end,
3599                byte_offset: start_pos,
3600                byte_end: end_pos,
3601                backtick_count,
3602                content: span_content,
3603            });
3604        }
3605
3606        // Sort by position to ensure consistent ordering
3607        code_spans.sort_by_key(|span| span.byte_offset);
3608
3609        code_spans
3610    }
3611
3612    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3613    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3614        let mut math_spans = Vec::new();
3615
3616        // Quick check - if no $ signs, no math spans
3617        if !content.contains('$') {
3618            return math_spans;
3619        }
3620
3621        // Use pulldown-cmark with ENABLE_MATH option
3622        let mut options = Options::empty();
3623        options.insert(Options::ENABLE_MATH);
3624        let parser = Parser::new_ext(content, options).into_offset_iter();
3625
3626        for (event, range) in parser {
3627            let (is_display, math_content) = match &event {
3628                Event::InlineMath(text) => (false, text.as_ref()),
3629                Event::DisplayMath(text) => (true, text.as_ref()),
3630                _ => continue,
3631            };
3632
3633            let start_pos = range.start;
3634            let end_pos = range.end;
3635
3636            // Use binary search to find line number - O(log n) instead of O(n)
3637            let line_idx = lines
3638                .partition_point(|line| line.byte_offset <= start_pos)
3639                .saturating_sub(1);
3640            let line_num = line_idx + 1;
3641            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3642
3643            // Find end column using binary search
3644            let end_line_idx = lines
3645                .partition_point(|line| line.byte_offset <= end_pos)
3646                .saturating_sub(1);
3647            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3648
3649            // Convert byte offsets to character positions for correct Unicode handling
3650            let line_content = lines[line_idx].content(content);
3651            let col_start = if byte_col_start <= line_content.len() {
3652                line_content[..byte_col_start].chars().count()
3653            } else {
3654                line_content.chars().count()
3655            };
3656
3657            let end_line_content = lines[end_line_idx].content(content);
3658            let col_end = if byte_col_end <= end_line_content.len() {
3659                end_line_content[..byte_col_end].chars().count()
3660            } else {
3661                end_line_content.chars().count()
3662            };
3663
3664            math_spans.push(MathSpan {
3665                line: line_num,
3666                end_line: end_line_idx + 1,
3667                start_col: col_start,
3668                end_col: col_end,
3669                byte_offset: start_pos,
3670                byte_end: end_pos,
3671                is_display,
3672                content: math_content.to_string(),
3673            });
3674        }
3675
3676        // Sort by position to ensure consistent ordering
3677        math_spans.sort_by_key(|span| span.byte_offset);
3678
3679        math_spans
3680    }
3681
3682    /// Parse all list blocks in the content (legacy line-by-line approach)
3683    ///
3684    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3685    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3686    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3687    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3688    ///   treated as list continuation (based on the list marker width)
3689    ///
3690    /// When a new list item is encountered, we check if list-breaking content was seen
3691    /// since the last item. If so, we start a new list block.
3692    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3693        // Minimum indentation for unordered list continuation per CommonMark spec
3694        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3695
3696        /// Initialize or reset the forward-scanning tracking state.
3697        /// This helper eliminates code duplication across three initialization sites.
3698        #[inline]
3699        fn reset_tracking_state(
3700            list_item: &ListItemInfo,
3701            has_list_breaking_content: &mut bool,
3702            min_continuation: &mut usize,
3703        ) {
3704            *has_list_breaking_content = false;
3705            let marker_width = if list_item.is_ordered {
3706                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3707            } else {
3708                list_item.marker.len()
3709            };
3710            *min_continuation = if list_item.is_ordered {
3711                marker_width
3712            } else {
3713                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3714            };
3715        }
3716
3717        // Pre-size based on lines that could be list items
3718        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3719        let mut current_block: Option<ListBlock> = None;
3720        let mut last_list_item_line = 0;
3721        let mut current_indent_level = 0;
3722        let mut last_marker_width = 0;
3723
3724        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3725        let mut has_list_breaking_content_since_last_item = false;
3726        let mut min_continuation_for_tracking = 0;
3727
3728        for (line_idx, line_info) in lines.iter().enumerate() {
3729            let line_num = line_idx + 1;
3730
3731            // Enhanced code block handling using Design #3's context analysis
3732            if line_info.in_code_block {
3733                if let Some(ref mut block) = current_block {
3734                    // Calculate minimum indentation for list continuation
3735                    let min_continuation_indent =
3736                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3737
3738                    // Analyze code block context using the three-tier classification
3739                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3740
3741                    match context {
3742                        CodeBlockContext::Indented => {
3743                            // Code block is properly indented - continues the list
3744                            block.end_line = line_num;
3745                            continue;
3746                        }
3747                        CodeBlockContext::Standalone => {
3748                            // Code block separates lists - end current block
3749                            let completed_block = current_block.take().unwrap();
3750                            list_blocks.push(completed_block);
3751                            continue;
3752                        }
3753                        CodeBlockContext::Adjacent => {
3754                            // Edge case - use conservative behavior (continue list)
3755                            block.end_line = line_num;
3756                            continue;
3757                        }
3758                    }
3759                } else {
3760                    // No current list block - skip code block lines
3761                    continue;
3762                }
3763            }
3764
3765            // Extract blockquote prefix if any
3766            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3767                caps.get(0).unwrap().as_str().to_string()
3768            } else {
3769                String::new()
3770            };
3771
3772            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3773            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3774            if let Some(ref block) = current_block
3775                && line_info.list_item.is_none()
3776                && !line_info.is_blank
3777                && !line_info.in_code_span_continuation
3778            {
3779                let line_content = line_info.content(content).trim();
3780
3781                // Check for structural separators that break lists
3782                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3783                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3784                // as they indicate improper indentation rather than lazy continuation.
3785                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3786
3787                // Check if blockquote context changes (different prefix than current block)
3788                // Lines within the SAME blockquote context don't break lists
3789                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3790
3791                let breaks_list = line_info.heading.is_some()
3792                    || line_content.starts_with("---")
3793                    || line_content.starts_with("***")
3794                    || line_content.starts_with("___")
3795                    || crate::utils::skip_context::is_table_line(line_content)
3796                    || blockquote_prefix_changes
3797                    || (line_info.indent > 0
3798                        && line_info.indent < min_continuation_for_tracking
3799                        && !is_lazy_continuation);
3800
3801                if breaks_list {
3802                    has_list_breaking_content_since_last_item = true;
3803                }
3804            }
3805
3806            // If this line is a code span continuation within an active list block,
3807            // extend the block's end_line to include this line (maintains list continuity)
3808            if line_info.in_code_span_continuation
3809                && line_info.list_item.is_none()
3810                && let Some(ref mut block) = current_block
3811            {
3812                block.end_line = line_num;
3813            }
3814
3815            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3816            // properly indented lines within the list). This ensures the workaround at line 2448
3817            // works correctly when there are multiple continuation lines before a nested list item.
3818            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3819            // For blockquote lines, compute effective indent after stripping the prefix
3820            let effective_continuation_indent = if let Some(ref block) = current_block {
3821                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3822                let line_content = line_info.content(content);
3823                let line_bq_level = line_content
3824                    .chars()
3825                    .take_while(|c| *c == '>' || c.is_whitespace())
3826                    .filter(|&c| c == '>')
3827                    .count();
3828                if line_bq_level > 0 && line_bq_level == block_bq_level {
3829                    // Compute indent after blockquote markers
3830                    let mut pos = 0;
3831                    let mut found_markers = 0;
3832                    for c in line_content.chars() {
3833                        pos += c.len_utf8();
3834                        if c == '>' {
3835                            found_markers += 1;
3836                            if found_markers == line_bq_level {
3837                                if line_content.get(pos..pos + 1) == Some(" ") {
3838                                    pos += 1;
3839                                }
3840                                break;
3841                            }
3842                        }
3843                    }
3844                    let after_bq = &line_content[pos..];
3845                    after_bq.len() - after_bq.trim_start().len()
3846                } else {
3847                    line_info.indent
3848                }
3849            } else {
3850                line_info.indent
3851            };
3852            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3853                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3854                if block_bq_level > 0 {
3855                    if block.is_ordered { last_marker_width } else { 2 }
3856                } else {
3857                    min_continuation_for_tracking
3858                }
3859            } else {
3860                min_continuation_for_tracking
3861            };
3862            // Lazy continuation allows unindented text to continue a list item,
3863            // but NOT structural elements like headings, code fences, or horizontal rules
3864            let is_structural_element = line_info.heading.is_some()
3865                || line_info.content(content).trim().starts_with("```")
3866                || line_info.content(content).trim().starts_with("~~~");
3867            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3868                || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3869
3870            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3871                eprintln!(
3872                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3873                    line_num,
3874                    effective_continuation_indent,
3875                    adjusted_min_continuation_for_tracking,
3876                    is_valid_continuation,
3877                    line_info.in_code_span_continuation,
3878                    line_info.in_code_block,
3879                    current_block.is_some()
3880                );
3881            }
3882
3883            if !line_info.in_code_span_continuation
3884                && line_info.list_item.is_none()
3885                && !line_info.is_blank
3886                && !line_info.in_code_block
3887                && is_valid_continuation
3888                && let Some(ref mut block) = current_block
3889            {
3890                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3891                    eprintln!(
3892                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3893                        line_num, block.end_line, line_num
3894                    );
3895                }
3896                block.end_line = line_num;
3897            }
3898
3899            // Check if this line is a list item
3900            if let Some(list_item) = &line_info.list_item {
3901                // Calculate nesting level based on indentation
3902                let item_indent = list_item.marker_column;
3903                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3904
3905                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3906                    eprintln!(
3907                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3908                        line_num, list_item.marker, item_indent
3909                    );
3910                }
3911
3912                if let Some(ref mut block) = current_block {
3913                    // Check if this continues the current block
3914                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3915                    // or a continuation at the same or lower level
3916                    let is_nested = nesting > block.nesting_level;
3917                    let same_type =
3918                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3919                    let same_context = block.blockquote_prefix == blockquote_prefix;
3920                    // Allow one blank line after last item, or lines immediately after block content
3921                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3922
3923                    // For unordered lists, also check marker consistency
3924                    let marker_compatible =
3925                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3926
3927                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3928                    // This eliminates the quadratic bottleneck from issue #148
3929                    let has_non_list_content = has_list_breaking_content_since_last_item;
3930
3931                    // A list continues if:
3932                    // 1. It's a nested item (indented more than the parent), OR
3933                    // 2. It's the same type at the same level with reasonable distance
3934                    let mut continues_list = if is_nested {
3935                        // Nested items always continue the list if they're in the same context
3936                        same_context && reasonable_distance && !has_non_list_content
3937                    } else {
3938                        // Same-level items need to match type and markers
3939                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3940                    };
3941
3942                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3943                        eprintln!(
3944                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3945                            line_num,
3946                            continues_list,
3947                            is_nested,
3948                            same_type,
3949                            same_context,
3950                            reasonable_distance,
3951                            marker_compatible,
3952                            has_non_list_content,
3953                            last_list_item_line,
3954                            block.end_line
3955                        );
3956                    }
3957
3958                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3959                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3960                    // Apply for: nested items (different types OK), OR same-level same-type items
3961                    if !continues_list
3962                        && (is_nested || same_type)
3963                        && reasonable_distance
3964                        && line_num > 0
3965                        && block.end_line == line_num - 1
3966                    {
3967                        // Check if the previous line was a list item or a continuation of a list item
3968                        // (including lazy continuation lines)
3969                        if block.item_lines.contains(&(line_num - 1)) {
3970                            // They're consecutive list items - force them to be in the same list
3971                            continues_list = true;
3972                        } else {
3973                            // Previous line is a continuation line within this block
3974                            // (e.g., lazy continuation with indent=0)
3975                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3976                            continues_list = true;
3977                        }
3978                    }
3979
3980                    if continues_list {
3981                        // Extend current block
3982                        block.end_line = line_num;
3983                        block.item_lines.push(line_num);
3984
3985                        // Update max marker width
3986                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3987                            list_item.marker.len() + 1
3988                        } else {
3989                            list_item.marker.len()
3990                        });
3991
3992                        // Update marker consistency for unordered lists
3993                        if !block.is_ordered
3994                            && block.marker.is_some()
3995                            && block.marker.as_ref() != Some(&list_item.marker)
3996                        {
3997                            // Mixed markers, clear the marker field
3998                            block.marker = None;
3999                        }
4000
4001                        // Reset tracked state for issue #148 optimization
4002                        reset_tracking_state(
4003                            list_item,
4004                            &mut has_list_breaking_content_since_last_item,
4005                            &mut min_continuation_for_tracking,
4006                        );
4007                    } else {
4008                        // End current block and start a new one
4009                        // When a different list type starts AT THE SAME LEVEL (not nested),
4010                        // trim back lazy continuation lines (they become part of the gap, not the list)
4011                        // For nested items, different types are fine - they're sub-lists
4012                        if !same_type
4013                            && !is_nested
4014                            && let Some(&last_item) = block.item_lines.last()
4015                        {
4016                            block.end_line = last_item;
4017                        }
4018
4019                        list_blocks.push(block.clone());
4020
4021                        *block = ListBlock {
4022                            start_line: line_num,
4023                            end_line: line_num,
4024                            is_ordered: list_item.is_ordered,
4025                            marker: if list_item.is_ordered {
4026                                None
4027                            } else {
4028                                Some(list_item.marker.clone())
4029                            },
4030                            blockquote_prefix: blockquote_prefix.clone(),
4031                            item_lines: vec![line_num],
4032                            nesting_level: nesting,
4033                            max_marker_width: if list_item.is_ordered {
4034                                list_item.marker.len() + 1
4035                            } else {
4036                                list_item.marker.len()
4037                            },
4038                        };
4039
4040                        // Initialize tracked state for new block (issue #148 optimization)
4041                        reset_tracking_state(
4042                            list_item,
4043                            &mut has_list_breaking_content_since_last_item,
4044                            &mut min_continuation_for_tracking,
4045                        );
4046                    }
4047                } else {
4048                    // Start a new block
4049                    current_block = Some(ListBlock {
4050                        start_line: line_num,
4051                        end_line: line_num,
4052                        is_ordered: list_item.is_ordered,
4053                        marker: if list_item.is_ordered {
4054                            None
4055                        } else {
4056                            Some(list_item.marker.clone())
4057                        },
4058                        blockquote_prefix,
4059                        item_lines: vec![line_num],
4060                        nesting_level: nesting,
4061                        max_marker_width: list_item.marker.len(),
4062                    });
4063
4064                    // Initialize tracked state for new block (issue #148 optimization)
4065                    reset_tracking_state(
4066                        list_item,
4067                        &mut has_list_breaking_content_since_last_item,
4068                        &mut min_continuation_for_tracking,
4069                    );
4070                }
4071
4072                last_list_item_line = line_num;
4073                current_indent_level = item_indent;
4074                last_marker_width = if list_item.is_ordered {
4075                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
4076                } else {
4077                    list_item.marker.len()
4078                };
4079            } else if let Some(ref mut block) = current_block {
4080                // Not a list item - check if it continues the current block
4081                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4082                    eprintln!(
4083                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4084                        line_num, line_info.is_blank
4085                    );
4086                }
4087
4088                // For MD032 compatibility, we use a simple approach:
4089                // - Indented lines continue the list
4090                // - Blank lines followed by indented content continue the list
4091                // - Everything else ends the list
4092
4093                // Check if the last line in the list block ended with a backslash (hard line break)
4094                // This handles cases where list items use backslash for hard line breaks
4095                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4096                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4097                } else {
4098                    false
4099                };
4100
4101                // Calculate minimum indentation for list continuation
4102                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
4103                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
4104                let min_continuation_indent = if block.is_ordered {
4105                    current_indent_level + last_marker_width
4106                } else {
4107                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
4108                };
4109
4110                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4111                    // Indented line or backslash continuation continues the list
4112                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4113                        eprintln!(
4114                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4115                            line_num, line_info.indent, min_continuation_indent
4116                        );
4117                    }
4118                    block.end_line = line_num;
4119                } else if line_info.is_blank {
4120                    // Blank line - check if it's internal to the list or ending it
4121                    // We only include blank lines that are followed by more list content
4122                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4123                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4124                    }
4125                    let mut check_idx = line_idx + 1;
4126                    let mut found_continuation = false;
4127
4128                    // Skip additional blank lines
4129                    while check_idx < lines.len() && lines[check_idx].is_blank {
4130                        check_idx += 1;
4131                    }
4132
4133                    if check_idx < lines.len() {
4134                        let next_line = &lines[check_idx];
4135                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
4136                        let next_content = next_line.content(content);
4137                        // Use blockquote level (count of >) to compare, not the full prefix
4138                        // This avoids issues where the regex captures extra whitespace
4139                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4140                        let next_bq_level_for_indent = next_content
4141                            .chars()
4142                            .take_while(|c| *c == '>' || c.is_whitespace())
4143                            .filter(|&c| c == '>')
4144                            .count();
4145                        let effective_indent =
4146                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4147                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
4148                                // Find position after ">" and one space
4149                                let mut pos = 0;
4150                                let mut found_markers = 0;
4151                                for c in next_content.chars() {
4152                                    pos += c.len_utf8();
4153                                    if c == '>' {
4154                                        found_markers += 1;
4155                                        if found_markers == next_bq_level_for_indent {
4156                                            // Skip optional space after last >
4157                                            if next_content.get(pos..pos + 1) == Some(" ") {
4158                                                pos += 1;
4159                                            }
4160                                            break;
4161                                        }
4162                                    }
4163                                }
4164                                let after_blockquote_marker = &next_content[pos..];
4165                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4166                            } else {
4167                                next_line.indent
4168                            };
4169                        // Also adjust min_continuation_indent for blockquote lists
4170                        // The marker_column includes blockquote prefix, so subtract it
4171                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4172                            // For blockquote lists, the continuation is relative to blockquote content
4173                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
4174                            if block.is_ordered { last_marker_width } else { 2 }
4175                        } else {
4176                            min_continuation_indent
4177                        };
4178                        // Check if followed by indented content (list continuation)
4179                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4180                            eprintln!(
4181                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4182                                line_num,
4183                                check_idx + 1,
4184                                effective_indent,
4185                                adjusted_min_continuation,
4186                                next_line.list_item.is_some(),
4187                                next_line.in_code_block
4188                            );
4189                        }
4190                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4191                            found_continuation = true;
4192                        }
4193                        // Check if followed by another list item at the same level
4194                        else if !next_line.in_code_block
4195                            && next_line.list_item.is_some()
4196                            && let Some(item) = &next_line.list_item
4197                        {
4198                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4199                                .find(next_line.content(content))
4200                                .map_or(String::new(), |m| m.as_str().to_string());
4201                            if item.marker_column == current_indent_level
4202                                && item.is_ordered == block.is_ordered
4203                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4204                            {
4205                                // Check if there was meaningful content between the list items (unused now)
4206                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
4207                                // Pre-compute block's blockquote level for use in closures
4208                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4209                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4210                                    if let Some(between_line) = lines.get(idx) {
4211                                        let between_content = between_line.content(content);
4212                                        let trimmed = between_content.trim();
4213                                        // Skip empty lines
4214                                        if trimmed.is_empty() {
4215                                            return false;
4216                                        }
4217                                        // Check for meaningful content
4218                                        let line_indent = between_content.len() - between_content.trim_start().len();
4219
4220                                        // Check if blockquote level changed (not just if line starts with ">")
4221                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4222                                            .find(between_content)
4223                                            .map_or(String::new(), |m| m.as_str().to_string());
4224                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4225                                        let blockquote_level_changed =
4226                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4227
4228                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4229                                        if trimmed.starts_with("```")
4230                                            || trimmed.starts_with("~~~")
4231                                            || trimmed.starts_with("---")
4232                                            || trimmed.starts_with("***")
4233                                            || trimmed.starts_with("___")
4234                                            || blockquote_level_changed
4235                                            || crate::utils::skip_context::is_table_line(trimmed)
4236                                            || between_line.heading.is_some()
4237                                        {
4238                                            return true; // These are structural separators - meaningful content that breaks lists
4239                                        }
4240
4241                                        // Only properly indented content continues the list
4242                                        line_indent >= min_continuation_indent
4243                                    } else {
4244                                        false
4245                                    }
4246                                });
4247
4248                                if block.is_ordered {
4249                                    // For ordered lists: don't continue if there are structural separators
4250                                    // Check if there are structural separators between the list items
4251                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4252                                        if let Some(between_line) = lines.get(idx) {
4253                                            let between_content = between_line.content(content);
4254                                            let trimmed = between_content.trim();
4255                                            if trimmed.is_empty() {
4256                                                return false;
4257                                            }
4258                                            // Check if blockquote level changed (not just if line starts with ">")
4259                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4260                                                .find(between_content)
4261                                                .map_or(String::new(), |m| m.as_str().to_string());
4262                                            let between_bq_level =
4263                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4264                                            let blockquote_level_changed =
4265                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4266                                            // Check for structural separators that break lists
4267                                            trimmed.starts_with("```")
4268                                                || trimmed.starts_with("~~~")
4269                                                || trimmed.starts_with("---")
4270                                                || trimmed.starts_with("***")
4271                                                || trimmed.starts_with("___")
4272                                                || blockquote_level_changed
4273                                                || crate::utils::skip_context::is_table_line(trimmed)
4274                                                || between_line.heading.is_some()
4275                                        } else {
4276                                            false
4277                                        }
4278                                    });
4279                                    found_continuation = !has_structural_separators;
4280                                } else {
4281                                    // For unordered lists: also check for structural separators
4282                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4283                                        if let Some(between_line) = lines.get(idx) {
4284                                            let between_content = between_line.content(content);
4285                                            let trimmed = between_content.trim();
4286                                            if trimmed.is_empty() {
4287                                                return false;
4288                                            }
4289                                            // Check if blockquote level changed (not just if line starts with ">")
4290                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4291                                                .find(between_content)
4292                                                .map_or(String::new(), |m| m.as_str().to_string());
4293                                            let between_bq_level =
4294                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4295                                            let blockquote_level_changed =
4296                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4297                                            // Check for structural separators that break lists
4298                                            trimmed.starts_with("```")
4299                                                || trimmed.starts_with("~~~")
4300                                                || trimmed.starts_with("---")
4301                                                || trimmed.starts_with("***")
4302                                                || trimmed.starts_with("___")
4303                                                || blockquote_level_changed
4304                                                || crate::utils::skip_context::is_table_line(trimmed)
4305                                                || between_line.heading.is_some()
4306                                        } else {
4307                                            false
4308                                        }
4309                                    });
4310                                    found_continuation = !has_structural_separators;
4311                                }
4312                            }
4313                        }
4314                    }
4315
4316                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4317                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4318                    }
4319                    if found_continuation {
4320                        // Include the blank line in the block
4321                        block.end_line = line_num;
4322                    } else {
4323                        // Blank line ends the list - don't include it
4324                        list_blocks.push(block.clone());
4325                        current_block = None;
4326                    }
4327                } else {
4328                    // Check for lazy continuation - non-indented line immediately after a list item
4329                    // But only if the line has sufficient indentation for the list type
4330                    let min_required_indent = if block.is_ordered {
4331                        current_indent_level + last_marker_width
4332                    } else {
4333                        current_indent_level + 2
4334                    };
4335
4336                    // For lazy continuation to apply, the line must either:
4337                    // 1. Have no indentation (true lazy continuation)
4338                    // 2. Have sufficient indentation for the list type
4339                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4340                    let line_content = line_info.content(content).trim();
4341
4342                    // Check for table-like patterns
4343                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4344
4345                    // Check if blockquote level changed (not just if line starts with ">")
4346                    // Lines within the same blockquote level are NOT structural separators
4347                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4348                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4349                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4350
4351                    let is_structural_separator = line_info.heading.is_some()
4352                        || line_content.starts_with("```")
4353                        || line_content.starts_with("~~~")
4354                        || line_content.starts_with("---")
4355                        || line_content.starts_with("***")
4356                        || line_content.starts_with("___")
4357                        || blockquote_level_changed
4358                        || looks_like_table;
4359
4360                    // Allow lazy continuation if we're still within the same list block
4361                    // (not just immediately after a list item)
4362                    // Also treat code span continuations as valid continuations regardless of indent
4363                    let is_lazy_continuation = !is_structural_separator
4364                        && !line_info.is_blank
4365                        && (line_info.indent == 0
4366                            || line_info.indent >= min_required_indent
4367                            || line_info.in_code_span_continuation);
4368
4369                    if is_lazy_continuation {
4370                        // Per CommonMark, lazy continuation continues until a blank line
4371                        // or structural element, regardless of uppercase at line start
4372                        block.end_line = line_num;
4373                    } else {
4374                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4375                        list_blocks.push(block.clone());
4376                        current_block = None;
4377                    }
4378                }
4379            }
4380        }
4381
4382        // Don't forget the last block
4383        if let Some(block) = current_block {
4384            list_blocks.push(block);
4385        }
4386
4387        // Merge adjacent blocks that should be one
4388        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4389
4390        list_blocks
4391    }
4392
4393    /// Compute character frequency for fast content analysis
4394    fn compute_char_frequency(content: &str) -> CharFrequency {
4395        let mut frequency = CharFrequency::default();
4396
4397        for ch in content.chars() {
4398            match ch {
4399                '#' => frequency.hash_count += 1,
4400                '*' => frequency.asterisk_count += 1,
4401                '_' => frequency.underscore_count += 1,
4402                '-' => frequency.hyphen_count += 1,
4403                '+' => frequency.plus_count += 1,
4404                '>' => frequency.gt_count += 1,
4405                '|' => frequency.pipe_count += 1,
4406                '[' => frequency.bracket_count += 1,
4407                '`' => frequency.backtick_count += 1,
4408                '<' => frequency.lt_count += 1,
4409                '!' => frequency.exclamation_count += 1,
4410                '\n' => frequency.newline_count += 1,
4411                _ => {}
4412            }
4413        }
4414
4415        frequency
4416    }
4417
4418    /// Parse HTML tags in the content
4419    fn parse_html_tags(
4420        content: &str,
4421        lines: &[LineInfo],
4422        code_blocks: &[(usize, usize)],
4423        flavor: MarkdownFlavor,
4424    ) -> Vec<HtmlTag> {
4425        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4426            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4427
4428        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4429
4430        for cap in HTML_TAG_REGEX.captures_iter(content) {
4431            let full_match = cap.get(0).unwrap();
4432            let match_start = full_match.start();
4433            let match_end = full_match.end();
4434
4435            // Skip if in code block
4436            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4437                continue;
4438            }
4439
4440            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4441            let tag_name_original = cap.get(2).unwrap().as_str();
4442            let tag_name = tag_name_original.to_lowercase();
4443            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4444
4445            // Skip JSX components in MDX files (tags starting with uppercase letter)
4446            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4447            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4448                continue;
4449            }
4450
4451            // Find which line this tag is on
4452            let mut line_num = 1;
4453            let mut col_start = match_start;
4454            let mut col_end = match_end;
4455            for (idx, line_info) in lines.iter().enumerate() {
4456                if match_start >= line_info.byte_offset {
4457                    line_num = idx + 1;
4458                    col_start = match_start - line_info.byte_offset;
4459                    col_end = match_end - line_info.byte_offset;
4460                } else {
4461                    break;
4462                }
4463            }
4464
4465            html_tags.push(HtmlTag {
4466                line: line_num,
4467                start_col: col_start,
4468                end_col: col_end,
4469                byte_offset: match_start,
4470                byte_end: match_end,
4471                tag_name,
4472                is_closing,
4473                is_self_closing,
4474                raw_content: full_match.as_str().to_string(),
4475            });
4476        }
4477
4478        html_tags
4479    }
4480
4481    /// Parse table rows in the content
4482    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4483        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4484
4485        for (line_idx, line_info) in lines.iter().enumerate() {
4486            // Skip lines in code blocks or blank lines
4487            if line_info.in_code_block || line_info.is_blank {
4488                continue;
4489            }
4490
4491            let line = line_info.content(content);
4492            let line_num = line_idx + 1;
4493
4494            // Check if this line contains pipes (potential table row)
4495            if !line.contains('|') {
4496                continue;
4497            }
4498
4499            // Count columns by splitting on pipes
4500            let parts: Vec<&str> = line.split('|').collect();
4501            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4502
4503            // Check if this is a separator row
4504            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4505            let mut column_alignments = Vec::new();
4506
4507            if is_separator {
4508                for part in &parts[1..parts.len() - 1] {
4509                    // Skip first and last empty parts
4510                    let trimmed = part.trim();
4511                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4512                        "center".to_string()
4513                    } else if trimmed.ends_with(':') {
4514                        "right".to_string()
4515                    } else if trimmed.starts_with(':') {
4516                        "left".to_string()
4517                    } else {
4518                        "none".to_string()
4519                    };
4520                    column_alignments.push(alignment);
4521                }
4522            }
4523
4524            table_rows.push(TableRow {
4525                line: line_num,
4526                is_separator,
4527                column_count,
4528                column_alignments,
4529            });
4530        }
4531
4532        table_rows
4533    }
4534
4535    /// Parse bare URLs and emails in the content
4536    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4537        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4538
4539        // Check for bare URLs (not in angle brackets or markdown links)
4540        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4541            let full_match = cap.get(0).unwrap();
4542            let match_start = full_match.start();
4543            let match_end = full_match.end();
4544
4545            // Skip if in code block
4546            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4547                continue;
4548            }
4549
4550            // Skip if already in angle brackets or markdown links
4551            let preceding_char = if match_start > 0 {
4552                content.chars().nth(match_start - 1)
4553            } else {
4554                None
4555            };
4556            let following_char = content.chars().nth(match_end);
4557
4558            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4559                continue;
4560            }
4561            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4562                continue;
4563            }
4564
4565            let url = full_match.as_str();
4566            let url_type = if url.starts_with("https://") {
4567                "https"
4568            } else if url.starts_with("http://") {
4569                "http"
4570            } else if url.starts_with("ftp://") {
4571                "ftp"
4572            } else {
4573                "other"
4574            };
4575
4576            // Find which line this URL is on
4577            let mut line_num = 1;
4578            let mut col_start = match_start;
4579            let mut col_end = match_end;
4580            for (idx, line_info) in lines.iter().enumerate() {
4581                if match_start >= line_info.byte_offset {
4582                    line_num = idx + 1;
4583                    col_start = match_start - line_info.byte_offset;
4584                    col_end = match_end - line_info.byte_offset;
4585                } else {
4586                    break;
4587                }
4588            }
4589
4590            bare_urls.push(BareUrl {
4591                line: line_num,
4592                start_col: col_start,
4593                end_col: col_end,
4594                byte_offset: match_start,
4595                byte_end: match_end,
4596                url: url.to_string(),
4597                url_type: url_type.to_string(),
4598            });
4599        }
4600
4601        // Check for bare email addresses
4602        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4603            let full_match = cap.get(0).unwrap();
4604            let match_start = full_match.start();
4605            let match_end = full_match.end();
4606
4607            // Skip if in code block
4608            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4609                continue;
4610            }
4611
4612            // Skip if already in angle brackets or markdown links
4613            let preceding_char = if match_start > 0 {
4614                content.chars().nth(match_start - 1)
4615            } else {
4616                None
4617            };
4618            let following_char = content.chars().nth(match_end);
4619
4620            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4621                continue;
4622            }
4623            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4624                continue;
4625            }
4626
4627            let email = full_match.as_str();
4628
4629            // Find which line this email is on
4630            let mut line_num = 1;
4631            let mut col_start = match_start;
4632            let mut col_end = match_end;
4633            for (idx, line_info) in lines.iter().enumerate() {
4634                if match_start >= line_info.byte_offset {
4635                    line_num = idx + 1;
4636                    col_start = match_start - line_info.byte_offset;
4637                    col_end = match_end - line_info.byte_offset;
4638                } else {
4639                    break;
4640                }
4641            }
4642
4643            bare_urls.push(BareUrl {
4644                line: line_num,
4645                start_col: col_start,
4646                end_col: col_end,
4647                byte_offset: match_start,
4648                byte_end: match_end,
4649                url: email.to_string(),
4650                url_type: "email".to_string(),
4651            });
4652        }
4653
4654        bare_urls
4655    }
4656
4657    /// Get an iterator over valid CommonMark headings
4658    ///
4659    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4660    /// that should be flagged by MD018 but should not be processed by other heading rules.
4661    ///
4662    /// # Examples
4663    ///
4664    /// ```rust
4665    /// use rumdl_lib::lint_context::LintContext;
4666    /// use rumdl_lib::config::MarkdownFlavor;
4667    ///
4668    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4669    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4670    ///
4671    /// for heading in ctx.valid_headings() {
4672    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4673    /// }
4674    /// // Only prints valid headings, skips `#NoSpace`
4675    /// ```
4676    #[must_use]
4677    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4678        ValidHeadingsIter::new(&self.lines)
4679    }
4680
4681    /// Check if the document contains any valid CommonMark headings
4682    ///
4683    /// Returns `true` if there is at least one heading with proper space after `#`.
4684    #[must_use]
4685    pub fn has_valid_headings(&self) -> bool {
4686        self.lines
4687            .iter()
4688            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4689    }
4690}
4691
4692/// Merge adjacent list blocks that should be treated as one
4693fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4694    if list_blocks.len() < 2 {
4695        return;
4696    }
4697
4698    let mut merger = ListBlockMerger::new(content, lines);
4699    *list_blocks = merger.merge(list_blocks);
4700}
4701
4702/// Helper struct to manage the complex logic of merging list blocks
4703struct ListBlockMerger<'a> {
4704    content: &'a str,
4705    lines: &'a [LineInfo],
4706}
4707
4708impl<'a> ListBlockMerger<'a> {
4709    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4710        Self { content, lines }
4711    }
4712
4713    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4714        let mut merged = Vec::with_capacity(list_blocks.len());
4715        let mut current = list_blocks[0].clone();
4716
4717        for next in list_blocks.iter().skip(1) {
4718            if self.should_merge_blocks(&current, next) {
4719                current = self.merge_two_blocks(current, next);
4720            } else {
4721                merged.push(current);
4722                current = next.clone();
4723            }
4724        }
4725
4726        merged.push(current);
4727        merged
4728    }
4729
4730    /// Determine if two adjacent list blocks should be merged
4731    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4732        // Basic compatibility checks
4733        if !self.blocks_are_compatible(current, next) {
4734            return false;
4735        }
4736
4737        // Check spacing and content between blocks
4738        let spacing = self.analyze_spacing_between(current, next);
4739        match spacing {
4740            BlockSpacing::Consecutive => true,
4741            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4742            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4743                self.can_merge_with_content_between(current, next)
4744            }
4745        }
4746    }
4747
4748    /// Check if blocks have compatible structure for merging
4749    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4750        current.is_ordered == next.is_ordered
4751            && current.blockquote_prefix == next.blockquote_prefix
4752            && current.nesting_level == next.nesting_level
4753    }
4754
4755    /// Analyze the spacing between two list blocks
4756    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4757        let gap = next.start_line - current.end_line;
4758
4759        match gap {
4760            1 => BlockSpacing::Consecutive,
4761            2 => BlockSpacing::SingleBlank,
4762            _ if gap > 2 => {
4763                if self.has_only_blank_lines_between(current, next) {
4764                    BlockSpacing::MultipleBlanks
4765                } else {
4766                    BlockSpacing::ContentBetween
4767                }
4768            }
4769            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4770        }
4771    }
4772
4773    /// Check if unordered lists can be merged with a single blank line between
4774    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4775        // Check if there are structural separators between the blocks
4776        // If has_meaningful_content_between returns true, it means there are structural separators
4777        if has_meaningful_content_between(self.content, current, next, self.lines) {
4778            return false; // Structural separators prevent merging
4779        }
4780
4781        // Only merge unordered lists with same marker across single blank
4782        !current.is_ordered && current.marker == next.marker
4783    }
4784
4785    /// Check if ordered lists can be merged when there's content between them
4786    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4787        // Do not merge lists if there are structural separators between them
4788        if has_meaningful_content_between(self.content, current, next, self.lines) {
4789            return false; // Structural separators prevent merging
4790        }
4791
4792        // Only consider merging ordered lists if there's no structural content between
4793        current.is_ordered && next.is_ordered
4794    }
4795
4796    /// Check if there are only blank lines between blocks
4797    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4798        for line_num in (current.end_line + 1)..next.start_line {
4799            if let Some(line_info) = self.lines.get(line_num - 1)
4800                && !line_info.content(self.content).trim().is_empty()
4801            {
4802                return false;
4803            }
4804        }
4805        true
4806    }
4807
4808    /// Merge two compatible list blocks into one
4809    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4810        current.end_line = next.end_line;
4811        current.item_lines.extend_from_slice(&next.item_lines);
4812
4813        // Update max marker width
4814        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4815
4816        // Handle marker consistency for unordered lists
4817        if !current.is_ordered && self.markers_differ(&current, next) {
4818            current.marker = None; // Mixed markers
4819        }
4820
4821        current
4822    }
4823
4824    /// Check if two blocks have different markers
4825    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4826        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4827    }
4828}
4829
4830/// Types of spacing between list blocks
4831#[derive(Debug, PartialEq)]
4832enum BlockSpacing {
4833    Consecutive,    // No gap between blocks
4834    SingleBlank,    // One blank line between blocks
4835    MultipleBlanks, // Multiple blank lines but no content
4836    ContentBetween, // Content exists between blocks
4837}
4838
4839/// Check if there's meaningful content (not just blank lines) between two list blocks
4840fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4841    // Check lines between current.end_line and next.start_line
4842    for line_num in (current.end_line + 1)..next.start_line {
4843        if let Some(line_info) = lines.get(line_num - 1) {
4844            // Convert to 0-indexed
4845            let trimmed = line_info.content(content).trim();
4846
4847            // Skip empty lines
4848            if trimmed.is_empty() {
4849                continue;
4850            }
4851
4852            // Check for structural separators that should separate lists (CommonMark compliant)
4853
4854            // Headings separate lists
4855            if line_info.heading.is_some() {
4856                return true; // Has meaningful content - headings separate lists
4857            }
4858
4859            // Horizontal rules separate lists (---, ***, ___)
4860            if is_horizontal_rule(trimmed) {
4861                return true; // Has meaningful content - horizontal rules separate lists
4862            }
4863
4864            // Tables separate lists
4865            if crate::utils::skip_context::is_table_line(trimmed) {
4866                return true; // Has meaningful content - tables separate lists
4867            }
4868
4869            // Blockquotes separate lists
4870            if trimmed.starts_with('>') {
4871                return true; // Has meaningful content - blockquotes separate lists
4872            }
4873
4874            // Code block fences separate lists (unless properly indented as list content)
4875            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4876                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4877
4878                // Check if this code block is properly indented as list continuation
4879                let min_continuation_indent = if current.is_ordered {
4880                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4881                } else {
4882                    current.nesting_level + 2
4883                };
4884
4885                if line_indent < min_continuation_indent {
4886                    // This is a standalone code block that separates lists
4887                    return true; // Has meaningful content - standalone code blocks separate lists
4888                }
4889            }
4890
4891            // Check if this line has proper indentation for list continuation
4892            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4893
4894            // Calculate minimum indentation needed to be list continuation
4895            let min_indent = if current.is_ordered {
4896                current.nesting_level + current.max_marker_width
4897            } else {
4898                current.nesting_level + 2
4899            };
4900
4901            // If the line is not indented enough to be list continuation, it's meaningful content
4902            if line_indent < min_indent {
4903                return true; // Has meaningful content - content not indented as list continuation
4904            }
4905
4906            // If we reach here, the line is properly indented as list continuation
4907            // Continue checking other lines
4908        }
4909    }
4910
4911    // Only blank lines or properly indented list continuation content between blocks
4912    false
4913}
4914
4915/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4916/// CommonMark rules for thematic breaks (horizontal rules):
4917/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4918/// - Must have 3+ of the same character (-, *, or _)
4919/// - May have spaces between characters
4920/// - No other characters allowed
4921pub fn is_horizontal_rule_line(line: &str) -> bool {
4922    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4923    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4924    if leading_spaces > 3 || line.starts_with('\t') {
4925        return false;
4926    }
4927
4928    is_horizontal_rule_content(line.trim())
4929}
4930
4931/// Check if trimmed content matches horizontal rule pattern.
4932/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4933pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4934    if trimmed.len() < 3 {
4935        return false;
4936    }
4937
4938    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4939    let chars: Vec<char> = trimmed.chars().collect();
4940    if let Some(&first_char) = chars.first()
4941        && (first_char == '-' || first_char == '*' || first_char == '_')
4942    {
4943        let mut count = 0;
4944        for &ch in &chars {
4945            if ch == first_char {
4946                count += 1;
4947            } else if ch != ' ' && ch != '\t' {
4948                return false; // Non-matching, non-whitespace character
4949            }
4950        }
4951        return count >= 3;
4952    }
4953    false
4954}
4955
4956/// Backwards-compatible alias for `is_horizontal_rule_content`
4957pub fn is_horizontal_rule(trimmed: &str) -> bool {
4958    is_horizontal_rule_content(trimmed)
4959}
4960
4961/// Check if content contains patterns that cause the markdown crate to panic
4962#[cfg(test)]
4963mod tests {
4964    use super::*;
4965
4966    #[test]
4967    fn test_empty_content() {
4968        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4969        assert_eq!(ctx.content, "");
4970        assert_eq!(ctx.line_offsets, vec![0]);
4971        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4972        assert_eq!(ctx.lines.len(), 0);
4973    }
4974
4975    #[test]
4976    fn test_single_line() {
4977        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4978        assert_eq!(ctx.content, "# Hello");
4979        assert_eq!(ctx.line_offsets, vec![0]);
4980        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4981        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4982    }
4983
4984    #[test]
4985    fn test_multi_line() {
4986        let content = "# Title\n\nSecond line\nThird line";
4987        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4988        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
4989        // Test offset to line/col
4990        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
4991        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
4992        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
4993        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
4994        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
4995    }
4996
4997    #[test]
4998    fn test_line_info() {
4999        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
5000        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5001
5002        // Test line info
5003        assert_eq!(ctx.lines.len(), 7);
5004
5005        // Line 1: "# Title"
5006        let line1 = &ctx.lines[0];
5007        assert_eq!(line1.content(ctx.content), "# Title");
5008        assert_eq!(line1.byte_offset, 0);
5009        assert_eq!(line1.indent, 0);
5010        assert!(!line1.is_blank);
5011        assert!(!line1.in_code_block);
5012        assert!(line1.list_item.is_none());
5013
5014        // Line 2: "    indented"
5015        let line2 = &ctx.lines[1];
5016        assert_eq!(line2.content(ctx.content), "    indented");
5017        assert_eq!(line2.byte_offset, 8);
5018        assert_eq!(line2.indent, 4);
5019        assert!(!line2.is_blank);
5020
5021        // Line 3: "" (blank)
5022        let line3 = &ctx.lines[2];
5023        assert_eq!(line3.content(ctx.content), "");
5024        assert!(line3.is_blank);
5025
5026        // Test helper methods
5027        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5028        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5029        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5030        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5031    }
5032
5033    #[test]
5034    fn test_list_item_detection() {
5035        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
5036        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5037
5038        // Line 1: "- Unordered item"
5039        let line1 = &ctx.lines[0];
5040        assert!(line1.list_item.is_some());
5041        let list1 = line1.list_item.as_ref().unwrap();
5042        assert_eq!(list1.marker, "-");
5043        assert!(!list1.is_ordered);
5044        assert_eq!(list1.marker_column, 0);
5045        assert_eq!(list1.content_column, 2);
5046
5047        // Line 2: "  * Nested item"
5048        let line2 = &ctx.lines[1];
5049        assert!(line2.list_item.is_some());
5050        let list2 = line2.list_item.as_ref().unwrap();
5051        assert_eq!(list2.marker, "*");
5052        assert_eq!(list2.marker_column, 2);
5053
5054        // Line 3: "1. Ordered item"
5055        let line3 = &ctx.lines[2];
5056        assert!(line3.list_item.is_some());
5057        let list3 = line3.list_item.as_ref().unwrap();
5058        assert_eq!(list3.marker, "1.");
5059        assert!(list3.is_ordered);
5060        assert_eq!(list3.number, Some(1));
5061
5062        // Line 6: "Not a list"
5063        let line6 = &ctx.lines[5];
5064        assert!(line6.list_item.is_none());
5065    }
5066
5067    #[test]
5068    fn test_offset_to_line_col_edge_cases() {
5069        let content = "a\nb\nc";
5070        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5071        // line_offsets: [0, 2, 4]
5072        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
5073        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
5074        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
5075        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
5076        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
5077        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
5078    }
5079
5080    #[test]
5081    fn test_mdx_esm_blocks() {
5082        let content = r##"import {Chart} from './snowfall.js'
5083export const year = 2023
5084
5085# Last year's snowfall
5086
5087In {year}, the snowfall was above average.
5088It was followed by a warm spring which caused
5089flood conditions in many of the nearby rivers.
5090
5091<Chart color="#fcb32c" year={year} />
5092"##;
5093
5094        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5095
5096        // Check that lines 1 and 2 are marked as ESM blocks
5097        assert_eq!(ctx.lines.len(), 10);
5098        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5099        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5100        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5101        assert!(
5102            !ctx.lines[3].in_esm_block,
5103            "Line 4 (heading) should NOT be in_esm_block"
5104        );
5105        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5106        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5107    }
5108
5109    #[test]
5110    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5111        let content = r#"import {Chart} from './snowfall.js'
5112export const year = 2023
5113
5114# Last year's snowfall
5115"#;
5116
5117        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5118
5119        // ESM blocks should NOT be detected in Standard flavor
5120        assert!(
5121            !ctx.lines[0].in_esm_block,
5122            "Line 1 should NOT be in_esm_block in Standard flavor"
5123        );
5124        assert!(
5125            !ctx.lines[1].in_esm_block,
5126            "Line 2 should NOT be in_esm_block in Standard flavor"
5127        );
5128    }
5129
5130    #[test]
5131    fn test_blockquote_with_indented_content() {
5132        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
5133        // The content inside the blockquote may also be detected as a code block (which is correct),
5134        // but for MD046 purposes, we need to know the line is inside a blockquote.
5135        let content = r#"# Heading
5136
5137>      -S socket-path
5138>                    More text
5139"#;
5140        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5141
5142        // Line 3 (index 2) should be detected as blockquote
5143        assert!(
5144            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5145            "Line 3 should be a blockquote"
5146        );
5147        // Line 4 (index 3) should also be blockquote
5148        assert!(
5149            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5150            "Line 4 should be a blockquote"
5151        );
5152
5153        // Verify blockquote content is correctly parsed
5154        // Note: spaces_after includes the spaces between `>` and content
5155        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5156        assert_eq!(bq3.content, "-S socket-path");
5157        assert_eq!(bq3.nesting_level, 1);
5158        // 6 spaces after the `>` marker
5159        assert!(bq3.has_multiple_spaces_after_marker);
5160
5161        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5162        assert_eq!(bq4.content, "More text");
5163        assert_eq!(bq4.nesting_level, 1);
5164    }
5165
5166    #[test]
5167    fn test_footnote_definitions_not_parsed_as_reference_defs() {
5168        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
5169        let content = r#"# Title
5170
5171A footnote[^1].
5172
5173[^1]: This is the footnote content.
5174
5175[^note]: Another footnote with [link](https://example.com).
5176
5177[regular]: ./path.md "A real reference definition"
5178"#;
5179        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5180
5181        // Should only have one reference definition (the regular one)
5182        assert_eq!(
5183            ctx.reference_defs.len(),
5184            1,
5185            "Footnotes should not be parsed as reference definitions"
5186        );
5187
5188        // The only reference def should be the regular one
5189        assert_eq!(ctx.reference_defs[0].id, "regular");
5190        assert_eq!(ctx.reference_defs[0].url, "./path.md");
5191        assert_eq!(
5192            ctx.reference_defs[0].title,
5193            Some("A real reference definition".to_string())
5194        );
5195    }
5196
5197    #[test]
5198    fn test_footnote_with_inline_link_not_misidentified() {
5199        // Regression test for issue #286: footnote containing an inline link
5200        // was incorrectly parsed as a reference definition with URL "[link](url)"
5201        let content = r#"# Title
5202
5203A footnote[^1].
5204
5205[^1]: [link](https://www.google.com).
5206"#;
5207        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5208
5209        // Should have no reference definitions
5210        assert!(
5211            ctx.reference_defs.is_empty(),
5212            "Footnote with inline link should not create a reference definition"
5213        );
5214    }
5215
5216    #[test]
5217    fn test_various_footnote_formats_excluded() {
5218        // Test various footnote ID formats are all excluded
5219        let content = r#"[^1]: Numeric footnote
5220[^note]: Named footnote
5221[^a]: Single char footnote
5222[^long-footnote-name]: Long named footnote
5223[^123abc]: Mixed alphanumeric
5224
5225[ref1]: ./file1.md
5226[ref2]: ./file2.md
5227"#;
5228        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5229
5230        // Should only have the two regular reference definitions
5231        assert_eq!(
5232            ctx.reference_defs.len(),
5233            2,
5234            "Only regular reference definitions should be parsed"
5235        );
5236
5237        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5238        assert!(ids.contains(&"ref1"));
5239        assert!(ids.contains(&"ref2"));
5240        assert!(!ids.iter().any(|id| id.starts_with('^')));
5241    }
5242
5243    // =========================================================================
5244    // Tests for has_char and char_count methods
5245    // =========================================================================
5246
5247    #[test]
5248    fn test_has_char_tracked_characters() {
5249        // Test all 12 tracked characters
5250        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5251        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5252
5253        // All tracked characters should be detected
5254        assert!(ctx.has_char('#'), "Should detect hash");
5255        assert!(ctx.has_char('*'), "Should detect asterisk");
5256        assert!(ctx.has_char('_'), "Should detect underscore");
5257        assert!(ctx.has_char('-'), "Should detect hyphen");
5258        assert!(ctx.has_char('+'), "Should detect plus");
5259        assert!(ctx.has_char('>'), "Should detect gt");
5260        assert!(ctx.has_char('|'), "Should detect pipe");
5261        assert!(ctx.has_char('['), "Should detect bracket");
5262        assert!(ctx.has_char('`'), "Should detect backtick");
5263        assert!(ctx.has_char('<'), "Should detect lt");
5264        assert!(ctx.has_char('!'), "Should detect exclamation");
5265        assert!(ctx.has_char('\n'), "Should detect newline");
5266    }
5267
5268    #[test]
5269    fn test_has_char_absent_characters() {
5270        let content = "Simple text without special chars";
5271        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5272
5273        // None of the tracked characters should be present
5274        assert!(!ctx.has_char('#'), "Should not detect hash");
5275        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5276        assert!(!ctx.has_char('_'), "Should not detect underscore");
5277        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5278        assert!(!ctx.has_char('+'), "Should not detect plus");
5279        assert!(!ctx.has_char('>'), "Should not detect gt");
5280        assert!(!ctx.has_char('|'), "Should not detect pipe");
5281        assert!(!ctx.has_char('['), "Should not detect bracket");
5282        assert!(!ctx.has_char('`'), "Should not detect backtick");
5283        assert!(!ctx.has_char('<'), "Should not detect lt");
5284        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5285        // Note: single line content has no newlines
5286        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5287    }
5288
5289    #[test]
5290    fn test_has_char_fallback_for_untracked() {
5291        let content = "Text with @mention and $dollar and %percent";
5292        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5293
5294        // Untracked characters should fall back to content.contains()
5295        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5296        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5297        assert!(ctx.has_char('%'), "Should detect % via fallback");
5298        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5299    }
5300
5301    #[test]
5302    fn test_char_count_tracked_characters() {
5303        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5304        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5305
5306        // Count each tracked character
5307        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5308        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5309        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5310        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5311        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5312        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5313        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5314        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5315        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5316        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5317        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5318        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5319    }
5320
5321    #[test]
5322    fn test_char_count_zero_for_absent() {
5323        let content = "Plain text";
5324        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5325
5326        assert_eq!(ctx.char_count('#'), 0);
5327        assert_eq!(ctx.char_count('*'), 0);
5328        assert_eq!(ctx.char_count('_'), 0);
5329        assert_eq!(ctx.char_count('\n'), 0);
5330    }
5331
5332    #[test]
5333    fn test_char_count_fallback_for_untracked() {
5334        let content = "@@@ $$ %%%";
5335        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5336
5337        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5338        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5339        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5340        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5341    }
5342
5343    #[test]
5344    fn test_char_count_empty_content() {
5345        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5346
5347        assert_eq!(ctx.char_count('#'), 0);
5348        assert_eq!(ctx.char_count('*'), 0);
5349        assert_eq!(ctx.char_count('@'), 0);
5350        assert!(!ctx.has_char('#'));
5351        assert!(!ctx.has_char('@'));
5352    }
5353
5354    // =========================================================================
5355    // Tests for is_in_html_tag method
5356    // =========================================================================
5357
5358    #[test]
5359    fn test_is_in_html_tag_simple() {
5360        let content = "<div>content</div>";
5361        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5362
5363        // Inside opening tag
5364        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5365        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5366        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5367
5368        // Outside tag (in content)
5369        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5370        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5371
5372        // Inside closing tag
5373        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5374        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5375    }
5376
5377    #[test]
5378    fn test_is_in_html_tag_self_closing() {
5379        let content = "Text <br/> more text";
5380        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5381
5382        // Before tag
5383        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5384        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5385
5386        // Inside self-closing tag
5387        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5388        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5389        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5390
5391        // After tag
5392        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5393    }
5394
5395    #[test]
5396    fn test_is_in_html_tag_with_attributes() {
5397        let content = r#"<a href="url" class="link">text</a>"#;
5398        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5399
5400        // All positions inside opening tag with attributes
5401        assert!(ctx.is_in_html_tag(0), "Start of tag");
5402        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5403        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5404        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5405
5406        // Content between tags
5407        assert!(!ctx.is_in_html_tag(27), "Start of content");
5408        assert!(!ctx.is_in_html_tag(30), "End of content");
5409
5410        // Closing tag
5411        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5412    }
5413
5414    #[test]
5415    fn test_is_in_html_tag_multiline() {
5416        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5417        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5418
5419        // Opening tag spans multiple lines
5420        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5421        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5422        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5423
5424        // After closing > of opening tag
5425        let closing_bracket_pos = content.find(">\n").unwrap();
5426        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5427    }
5428
5429    #[test]
5430    fn test_is_in_html_tag_no_tags() {
5431        let content = "Plain text without any HTML";
5432        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5433
5434        // No position should be in an HTML tag
5435        for i in 0..content.len() {
5436            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5437        }
5438    }
5439
5440    // =========================================================================
5441    // Tests for is_in_jinja_range method
5442    // =========================================================================
5443
5444    #[test]
5445    fn test_is_in_jinja_range_expression() {
5446        let content = "Hello {{ name }}!";
5447        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5448
5449        // Before Jinja
5450        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5451        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5452
5453        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5454        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5455        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5456        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5457        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5458        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5459
5460        // After Jinja
5461        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5462    }
5463
5464    #[test]
5465    fn test_is_in_jinja_range_statement() {
5466        let content = "{% if condition %}content{% endif %}";
5467        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5468
5469        // Inside opening statement
5470        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5471        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5472        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5473
5474        // Content between
5475        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5476
5477        // Inside closing statement
5478        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5479        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5480    }
5481
5482    #[test]
5483    fn test_is_in_jinja_range_multiple() {
5484        let content = "{{ a }} and {{ b }}";
5485        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5486
5487        // First Jinja expression
5488        assert!(ctx.is_in_jinja_range(0));
5489        assert!(ctx.is_in_jinja_range(3));
5490        assert!(ctx.is_in_jinja_range(6));
5491
5492        // Between expressions
5493        assert!(!ctx.is_in_jinja_range(8));
5494        assert!(!ctx.is_in_jinja_range(11));
5495
5496        // Second Jinja expression
5497        assert!(ctx.is_in_jinja_range(12));
5498        assert!(ctx.is_in_jinja_range(15));
5499        assert!(ctx.is_in_jinja_range(18));
5500    }
5501
5502    #[test]
5503    fn test_is_in_jinja_range_no_jinja() {
5504        let content = "Plain text with single braces but not Jinja";
5505        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5506
5507        // No position should be in Jinja
5508        for i in 0..content.len() {
5509            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5510        }
5511    }
5512
5513    // =========================================================================
5514    // Tests for is_in_link_title method
5515    // =========================================================================
5516
5517    #[test]
5518    fn test_is_in_link_title_with_title() {
5519        let content = r#"[ref]: https://example.com "Title text"
5520
5521Some content."#;
5522        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5523
5524        // Verify we have a reference def with title
5525        assert_eq!(ctx.reference_defs.len(), 1);
5526        let def = &ctx.reference_defs[0];
5527        assert!(def.title_byte_start.is_some());
5528        assert!(def.title_byte_end.is_some());
5529
5530        let title_start = def.title_byte_start.unwrap();
5531        let title_end = def.title_byte_end.unwrap();
5532
5533        // Before title (in URL)
5534        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5535
5536        // Inside title
5537        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5538        assert!(
5539            ctx.is_in_link_title(title_start + 5),
5540            "Middle of title should be in title"
5541        );
5542        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5543
5544        // After title
5545        assert!(
5546            !ctx.is_in_link_title(title_end),
5547            "After title end should not be in title"
5548        );
5549    }
5550
5551    #[test]
5552    fn test_is_in_link_title_without_title() {
5553        let content = "[ref]: https://example.com\n\nSome content.";
5554        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5555
5556        // Reference def without title
5557        assert_eq!(ctx.reference_defs.len(), 1);
5558        let def = &ctx.reference_defs[0];
5559        assert!(def.title_byte_start.is_none());
5560        assert!(def.title_byte_end.is_none());
5561
5562        // No position should be in a title
5563        for i in 0..content.len() {
5564            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5565        }
5566    }
5567
5568    #[test]
5569    fn test_is_in_link_title_multiple_refs() {
5570        let content = r#"[ref1]: /url1 "Title One"
5571[ref2]: /url2
5572[ref3]: /url3 "Title Three"
5573"#;
5574        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5575
5576        // Should have 3 reference defs
5577        assert_eq!(ctx.reference_defs.len(), 3);
5578
5579        // ref1 has title
5580        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5581        assert!(ref1.title_byte_start.is_some());
5582
5583        // ref2 has no title
5584        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5585        assert!(ref2.title_byte_start.is_none());
5586
5587        // ref3 has title
5588        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5589        assert!(ref3.title_byte_start.is_some());
5590
5591        // Check positions in ref1's title
5592        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5593            assert!(ctx.is_in_link_title(start + 1));
5594            assert!(!ctx.is_in_link_title(end + 5));
5595        }
5596
5597        // Check positions in ref3's title
5598        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5599            assert!(ctx.is_in_link_title(start + 1));
5600        }
5601    }
5602
5603    #[test]
5604    fn test_is_in_link_title_single_quotes() {
5605        let content = "[ref]: /url 'Single quoted title'\n";
5606        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5607
5608        assert_eq!(ctx.reference_defs.len(), 1);
5609        let def = &ctx.reference_defs[0];
5610
5611        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5612            assert!(ctx.is_in_link_title(start));
5613            assert!(ctx.is_in_link_title(start + 5));
5614            assert!(!ctx.is_in_link_title(end));
5615        }
5616    }
5617
5618    #[test]
5619    fn test_is_in_link_title_parentheses() {
5620        // Note: The reference def parser may not support parenthesized titles
5621        // This test verifies the is_in_link_title method works when titles exist
5622        let content = "[ref]: /url (Parenthesized title)\n";
5623        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5624
5625        // Parser behavior: may or may not parse parenthesized titles
5626        // We test that is_in_link_title correctly reflects whatever was parsed
5627        if ctx.reference_defs.is_empty() {
5628            // Parser didn't recognize this as a reference def
5629            for i in 0..content.len() {
5630                assert!(!ctx.is_in_link_title(i));
5631            }
5632        } else {
5633            let def = &ctx.reference_defs[0];
5634            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5635                assert!(ctx.is_in_link_title(start));
5636                assert!(ctx.is_in_link_title(start + 5));
5637                assert!(!ctx.is_in_link_title(end));
5638            } else {
5639                // Title wasn't parsed, so no position should be in title
5640                for i in 0..content.len() {
5641                    assert!(!ctx.is_in_link_title(i));
5642                }
5643            }
5644        }
5645    }
5646
5647    #[test]
5648    fn test_is_in_link_title_no_refs() {
5649        let content = "Just plain text without any reference definitions.";
5650        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5651
5652        assert!(ctx.reference_defs.is_empty());
5653
5654        for i in 0..content.len() {
5655            assert!(!ctx.is_in_link_title(i));
5656        }
5657    }
5658
5659    // =========================================================================
5660    // Math span tests (Issue #289)
5661    // =========================================================================
5662
5663    #[test]
5664    fn test_math_spans_inline() {
5665        let content = "Text with inline math $[f](x)$ in it.";
5666        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5667
5668        let math_spans = ctx.math_spans();
5669        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5670
5671        let span = &math_spans[0];
5672        assert!(!span.is_display, "Should be inline math, not display");
5673        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5674    }
5675
5676    #[test]
5677    fn test_math_spans_display_single_line() {
5678        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5679        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5680
5681        let math_spans = ctx.math_spans();
5682        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5683
5684        let span = &math_spans[0];
5685        assert!(span.is_display, "Should be display math");
5686        assert!(
5687            span.content.contains("[x](\\zeta)"),
5688            "Content should contain the link-like pattern"
5689        );
5690    }
5691
5692    #[test]
5693    fn test_math_spans_display_multiline() {
5694        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5695        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5696
5697        let math_spans = ctx.math_spans();
5698        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5699
5700        let span = &math_spans[0];
5701        assert!(span.is_display, "Should be display math");
5702    }
5703
5704    #[test]
5705    fn test_is_in_math_span() {
5706        let content = "Text $[f](x)$ more text";
5707        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5708
5709        // Position inside the math span
5710        let math_start = content.find('$').unwrap();
5711        let math_end = content.rfind('$').unwrap() + 1;
5712
5713        assert!(
5714            ctx.is_in_math_span(math_start + 1),
5715            "Position inside math span should return true"
5716        );
5717        assert!(
5718            ctx.is_in_math_span(math_start + 3),
5719            "Position inside math span should return true"
5720        );
5721
5722        // Position outside the math span
5723        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5724        assert!(
5725            !ctx.is_in_math_span(math_end + 1),
5726            "Position after math span should return false"
5727        );
5728    }
5729
5730    #[test]
5731    fn test_math_spans_mixed_with_code() {
5732        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5733        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5734
5735        let math_spans = ctx.math_spans();
5736        let code_spans = ctx.code_spans();
5737
5738        assert_eq!(math_spans.len(), 1, "Should have one math span");
5739        assert_eq!(code_spans.len(), 1, "Should have one code span");
5740
5741        // Verify math span content
5742        assert_eq!(math_spans[0].content, "[f](x)");
5743        // Verify code span content
5744        assert_eq!(code_spans[0].content, "[g](y)");
5745    }
5746
5747    #[test]
5748    fn test_math_spans_no_math() {
5749        let content = "Regular text without any math at all.";
5750        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5751
5752        let math_spans = ctx.math_spans();
5753        assert!(math_spans.is_empty(), "Should have no math spans");
5754    }
5755
5756    #[test]
5757    fn test_math_spans_multiple() {
5758        let content = "First $a$ and second $b$ and display $$c$$";
5759        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5760
5761        let math_spans = ctx.math_spans();
5762        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5763
5764        // Two inline, one display
5765        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5766        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5767
5768        assert_eq!(inline_count, 2, "Should have two inline math spans");
5769        assert_eq!(display_count, 1, "Should have one display math span");
5770    }
5771
5772    #[test]
5773    fn test_is_in_math_span_boundary_positions() {
5774        // Test exact boundary positions: $[f](x)$
5775        // Byte positions:                0123456789
5776        let content = "$[f](x)$";
5777        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5778
5779        let math_spans = ctx.math_spans();
5780        assert_eq!(math_spans.len(), 1, "Should have one math span");
5781
5782        let span = &math_spans[0];
5783
5784        // Position at opening $ should be in span (byte 0)
5785        assert!(
5786            ctx.is_in_math_span(span.byte_offset),
5787            "Start position should be in span"
5788        );
5789
5790        // Position just inside should be in span
5791        assert!(
5792            ctx.is_in_math_span(span.byte_offset + 1),
5793            "Position after start should be in span"
5794        );
5795
5796        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5797        assert!(
5798            ctx.is_in_math_span(span.byte_end - 1),
5799            "Position at end-1 should be in span"
5800        );
5801
5802        // Position at byte_end should NOT be in span (exclusive end)
5803        assert!(
5804            !ctx.is_in_math_span(span.byte_end),
5805            "Position at byte_end should NOT be in span (exclusive)"
5806        );
5807    }
5808
5809    #[test]
5810    fn test_math_spans_at_document_start() {
5811        let content = "$x$ text";
5812        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5813
5814        let math_spans = ctx.math_spans();
5815        assert_eq!(math_spans.len(), 1);
5816        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5817    }
5818
5819    #[test]
5820    fn test_math_spans_at_document_end() {
5821        let content = "text $x$";
5822        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5823
5824        let math_spans = ctx.math_spans();
5825        assert_eq!(math_spans.len(), 1);
5826        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5827    }
5828
5829    #[test]
5830    fn test_math_spans_consecutive() {
5831        let content = "$a$$b$";
5832        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5833
5834        let math_spans = ctx.math_spans();
5835        // pulldown-cmark should parse these as separate spans
5836        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5837
5838        // All positions should be in some math span
5839        for i in 0..content.len() {
5840            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5841        }
5842    }
5843
5844    #[test]
5845    fn test_math_spans_currency_not_math() {
5846        // Unbalanced $ should not create math spans
5847        let content = "Price is $100";
5848        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5849
5850        let math_spans = ctx.math_spans();
5851        // pulldown-cmark requires balanced delimiters for math
5852        // $100 alone is not math
5853        assert!(
5854            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5855            "Unbalanced $ should not create math span containing 100"
5856        );
5857    }
5858
5859    // =========================================================================
5860    // Tests for O(1) reference definition lookups via HashMap
5861    // =========================================================================
5862
5863    #[test]
5864    fn test_reference_lookup_o1_basic() {
5865        let content = r#"[ref1]: /url1
5866[REF2]: /url2 "Title"
5867[Ref3]: /url3
5868
5869Use [link][ref1] and [link][REF2]."#;
5870        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5871
5872        // Verify we have 3 reference defs
5873        assert_eq!(ctx.reference_defs.len(), 3);
5874
5875        // Test get_reference_url with various cases
5876        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5877        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5878        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5879        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5880        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5881        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5882        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5883    }
5884
5885    #[test]
5886    fn test_reference_lookup_o1_get_reference_def() {
5887        let content = r#"[myref]: https://example.com "My Title"
5888"#;
5889        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5890
5891        // Test get_reference_def
5892        let def = ctx.get_reference_def("myref").expect("Should find myref");
5893        assert_eq!(def.url, "https://example.com");
5894        assert_eq!(def.title.as_deref(), Some("My Title"));
5895
5896        // Case insensitive
5897        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5898        assert_eq!(def2.url, "https://example.com");
5899
5900        // Non-existent
5901        assert!(ctx.get_reference_def("nonexistent").is_none());
5902    }
5903
5904    #[test]
5905    fn test_reference_lookup_o1_has_reference_def() {
5906        let content = r#"[foo]: /foo
5907[BAR]: /bar
5908"#;
5909        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5910
5911        // Test has_reference_def
5912        assert!(ctx.has_reference_def("foo"));
5913        assert!(ctx.has_reference_def("FOO")); // case insensitive
5914        assert!(ctx.has_reference_def("bar"));
5915        assert!(ctx.has_reference_def("Bar")); // case insensitive
5916        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5917    }
5918
5919    #[test]
5920    fn test_reference_lookup_o1_empty_content() {
5921        let content = "No references here.";
5922        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5923
5924        assert!(ctx.reference_defs.is_empty());
5925        assert_eq!(ctx.get_reference_url("anything"), None);
5926        assert!(ctx.get_reference_def("anything").is_none());
5927        assert!(!ctx.has_reference_def("anything"));
5928    }
5929
5930    #[test]
5931    fn test_reference_lookup_o1_special_characters_in_id() {
5932        let content = r#"[ref-with-dash]: /url1
5933[ref_with_underscore]: /url2
5934[ref.with.dots]: /url3
5935"#;
5936        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5937
5938        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5939        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5940        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5941    }
5942
5943    #[test]
5944    fn test_reference_lookup_o1_unicode_id() {
5945        let content = r#"[日本語]: /japanese
5946[émoji]: /emoji
5947"#;
5948        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5949
5950        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5951        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5952        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5953    }
5954}