rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::regex_cache::URL_SIMPLE_REGEX;
7use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
8use regex::Regex;
9use std::borrow::Cow;
10use std::collections::HashMap;
11use std::path::PathBuf;
12use std::sync::LazyLock;
13
14/// Macro for profiling sections - only active in non-WASM builds
15#[cfg(not(target_arch = "wasm32"))]
16macro_rules! profile_section {
17    ($name:expr, $profile:expr, $code:expr) => {{
18        let start = std::time::Instant::now();
19        let result = $code;
20        if $profile {
21            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
22        }
23        result
24    }};
25}
26
27#[cfg(target_arch = "wasm32")]
28macro_rules! profile_section {
29    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
30}
31
32// Comprehensive link pattern that captures both inline and reference links
33// Use (?s) flag to make . match newlines
34static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
35    Regex::new(
36        r#"(?sx)
37        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
38        (?:
39            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
40            |
41            \[([^\]]*)\]      # Reference ID in group 6
42        )"#
43    ).unwrap()
44});
45
46// Image pattern (similar to links but with ! prefix)
47// Use (?s) flag to make . match newlines
48static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
49    Regex::new(
50        r#"(?sx)
51        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
52        (?:
53            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
54            |
55            \[([^\]]*)\]      # Reference ID in group 6
56        )"#
57    ).unwrap()
58});
59
60// Reference definition pattern
61static REF_DEF_PATTERN: LazyLock<Regex> =
62    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
63
64// Pattern for bare URLs - uses centralized URL pattern from regex_cache
65
66// Pattern for email addresses
67static BARE_EMAIL_PATTERN: LazyLock<Regex> =
68    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
69
70// Pattern for blockquote prefix in parse_list_blocks
71static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
72
73/// Pre-computed information about a line
74#[derive(Debug, Clone)]
75pub struct LineInfo {
76    /// Byte offset where this line starts in the document
77    pub byte_offset: usize,
78    /// Length of the line in bytes (without newline)
79    pub byte_len: usize,
80    /// Number of bytes of leading whitespace (for substring extraction)
81    pub indent: usize,
82    /// Visual column width of leading whitespace (with proper tab expansion)
83    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
84    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
85    pub visual_indent: usize,
86    /// Whether the line is blank (empty or only whitespace)
87    pub is_blank: bool,
88    /// Whether this line is inside a code block
89    pub in_code_block: bool,
90    /// Whether this line is inside front matter
91    pub in_front_matter: bool,
92    /// Whether this line is inside an HTML block
93    pub in_html_block: bool,
94    /// Whether this line is inside an HTML comment
95    pub in_html_comment: bool,
96    /// List item information if this line starts a list item
97    pub list_item: Option<ListItemInfo>,
98    /// Heading information if this line is a heading
99    pub heading: Option<HeadingInfo>,
100    /// Blockquote information if this line is a blockquote
101    pub blockquote: Option<BlockquoteInfo>,
102    /// Whether this line is inside a mkdocstrings autodoc block
103    pub in_mkdocstrings: bool,
104    /// Whether this line is part of an ESM import/export block (MDX only)
105    pub in_esm_block: bool,
106    /// Whether this line is a continuation of a multi-line code span from a previous line
107    pub in_code_span_continuation: bool,
108    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
109    /// Pre-computed for consistent detection across all rules
110    pub is_horizontal_rule: bool,
111    /// Whether this line is inside a math block ($$ ... $$)
112    pub in_math_block: bool,
113    /// Whether this line is inside a Quarto div block (::: ... :::)
114    pub in_quarto_div: bool,
115    /// Whether this line contains or is inside a JSX expression (MDX only)
116    pub in_jsx_expression: bool,
117    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
118    pub in_mdx_comment: bool,
119    /// Whether this line is inside a JSX component (MDX only)
120    pub in_jsx_component: bool,
121    /// Whether this line is inside a JSX fragment (MDX only)
122    pub in_jsx_fragment: bool,
123    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
124    pub in_admonition: bool,
125    /// Whether this line is inside an MkDocs content tab block (===)
126    pub in_content_tab: bool,
127    /// Whether this line is a definition list item (: definition)
128    pub in_definition_list: bool,
129    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
130    pub in_obsidian_comment: bool,
131}
132
133impl LineInfo {
134    /// Get the line content as a string slice from the source document
135    pub fn content<'a>(&self, source: &'a str) -> &'a str {
136        &source[self.byte_offset..self.byte_offset + self.byte_len]
137    }
138
139    /// Check if this line is inside MkDocs-specific indented content (admonitions or tabs).
140    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
141    /// but in MkDocs flavor it's actually container content that should be preserved.
142    #[inline]
143    pub fn in_mkdocs_container(&self) -> bool {
144        self.in_admonition || self.in_content_tab
145    }
146}
147
148/// Information about a list item
149#[derive(Debug, Clone)]
150pub struct ListItemInfo {
151    /// The marker used (*, -, +, or number with . or ))
152    pub marker: String,
153    /// Whether it's ordered (true) or unordered (false)
154    pub is_ordered: bool,
155    /// The number for ordered lists
156    pub number: Option<usize>,
157    /// Column where the marker starts (0-based)
158    pub marker_column: usize,
159    /// Column where content after marker starts
160    pub content_column: usize,
161}
162
163/// Heading style type
164#[derive(Debug, Clone, PartialEq)]
165pub enum HeadingStyle {
166    /// ATX style heading (# Heading)
167    ATX,
168    /// Setext style heading with = underline
169    Setext1,
170    /// Setext style heading with - underline
171    Setext2,
172}
173
174/// Parsed link information
175#[derive(Debug, Clone)]
176pub struct ParsedLink<'a> {
177    /// Line number (1-indexed)
178    pub line: usize,
179    /// Start column (0-indexed) in the line
180    pub start_col: usize,
181    /// End column (0-indexed) in the line
182    pub end_col: usize,
183    /// Byte offset in document
184    pub byte_offset: usize,
185    /// End byte offset in document
186    pub byte_end: usize,
187    /// Link text
188    pub text: Cow<'a, str>,
189    /// Link URL or reference
190    pub url: Cow<'a, str>,
191    /// Whether this is a reference link [text][ref] vs inline [text](url)
192    pub is_reference: bool,
193    /// Reference ID for reference links
194    pub reference_id: Option<Cow<'a, str>>,
195    /// Link type from pulldown-cmark
196    pub link_type: LinkType,
197}
198
199/// Information about a broken link reported by pulldown-cmark
200#[derive(Debug, Clone)]
201pub struct BrokenLinkInfo {
202    /// The reference text that couldn't be resolved
203    pub reference: String,
204    /// Byte span in the source document
205    pub span: std::ops::Range<usize>,
206}
207
208/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
209#[derive(Debug, Clone)]
210pub struct FootnoteRef {
211    /// The footnote ID (without the ^ prefix)
212    pub id: String,
213    /// Line number (1-indexed)
214    pub line: usize,
215    /// Start byte offset in document
216    pub byte_offset: usize,
217    /// End byte offset in document
218    pub byte_end: usize,
219}
220
221/// Parsed image information
222#[derive(Debug, Clone)]
223pub struct ParsedImage<'a> {
224    /// Line number (1-indexed)
225    pub line: usize,
226    /// Start column (0-indexed) in the line
227    pub start_col: usize,
228    /// End column (0-indexed) in the line
229    pub end_col: usize,
230    /// Byte offset in document
231    pub byte_offset: usize,
232    /// End byte offset in document
233    pub byte_end: usize,
234    /// Alt text
235    pub alt_text: Cow<'a, str>,
236    /// Image URL or reference
237    pub url: Cow<'a, str>,
238    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
239    pub is_reference: bool,
240    /// Reference ID for reference images
241    pub reference_id: Option<Cow<'a, str>>,
242    /// Link type from pulldown-cmark
243    pub link_type: LinkType,
244}
245
246/// Reference definition [ref]: url "title"
247#[derive(Debug, Clone)]
248pub struct ReferenceDef {
249    /// Line number (1-indexed)
250    pub line: usize,
251    /// Reference ID (normalized to lowercase)
252    pub id: String,
253    /// URL
254    pub url: String,
255    /// Optional title
256    pub title: Option<String>,
257    /// Byte offset where the reference definition starts
258    pub byte_offset: usize,
259    /// Byte offset where the reference definition ends
260    pub byte_end: usize,
261    /// Byte offset where the title starts (if present, includes quote)
262    pub title_byte_start: Option<usize>,
263    /// Byte offset where the title ends (if present, includes quote)
264    pub title_byte_end: Option<usize>,
265}
266
267/// Parsed code span information
268#[derive(Debug, Clone)]
269pub struct CodeSpan {
270    /// Line number where the code span starts (1-indexed)
271    pub line: usize,
272    /// Line number where the code span ends (1-indexed)
273    pub end_line: usize,
274    /// Start column (0-indexed) in the line
275    pub start_col: usize,
276    /// End column (0-indexed) in the line
277    pub end_col: usize,
278    /// Byte offset in document
279    pub byte_offset: usize,
280    /// End byte offset in document
281    pub byte_end: usize,
282    /// Number of backticks used (1, 2, 3, etc.)
283    pub backtick_count: usize,
284    /// Content inside the code span (without backticks)
285    pub content: String,
286}
287
288/// Parsed math span information (inline $...$ or display $$...$$)
289#[derive(Debug, Clone)]
290pub struct MathSpan {
291    /// Line number where the math span starts (1-indexed)
292    pub line: usize,
293    /// Line number where the math span ends (1-indexed)
294    pub end_line: usize,
295    /// Start column (0-indexed) in the line
296    pub start_col: usize,
297    /// End column (0-indexed) in the line
298    pub end_col: usize,
299    /// Byte offset in document
300    pub byte_offset: usize,
301    /// End byte offset in document
302    pub byte_end: usize,
303    /// Whether this is display math ($$...$$) vs inline ($...$)
304    pub is_display: bool,
305    /// Content inside the math delimiters
306    pub content: String,
307}
308
309/// Information about a heading
310#[derive(Debug, Clone)]
311pub struct HeadingInfo {
312    /// Heading level (1-6 for ATX, 1-2 for Setext)
313    pub level: u8,
314    /// Style of heading
315    pub style: HeadingStyle,
316    /// The heading marker (# characters or underline)
317    pub marker: String,
318    /// Column where the marker starts (0-based)
319    pub marker_column: usize,
320    /// Column where heading text starts
321    pub content_column: usize,
322    /// The heading text (without markers and without custom ID syntax)
323    pub text: String,
324    /// Custom header ID if present (e.g., from {#custom-id} syntax)
325    pub custom_id: Option<String>,
326    /// Original heading text including custom ID syntax
327    pub raw_text: String,
328    /// Whether it has a closing sequence (for ATX)
329    pub has_closing_sequence: bool,
330    /// The closing sequence if present
331    pub closing_sequence: String,
332    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
333    /// False for malformed headings like `#NoSpace` that MD018 should flag
334    pub is_valid: bool,
335}
336
337/// A valid heading from a filtered iteration
338///
339/// Only includes headings that are CommonMark-compliant (have space after #).
340/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
341#[derive(Debug, Clone)]
342pub struct ValidHeading<'a> {
343    /// The 1-indexed line number in the document
344    pub line_num: usize,
345    /// Reference to the heading information
346    pub heading: &'a HeadingInfo,
347    /// Reference to the full line info (for rules that need additional context)
348    pub line_info: &'a LineInfo,
349}
350
351/// Iterator over valid CommonMark headings in a document
352///
353/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
354/// but should not be processed by other heading rules.
355pub struct ValidHeadingsIter<'a> {
356    lines: &'a [LineInfo],
357    current_index: usize,
358}
359
360impl<'a> ValidHeadingsIter<'a> {
361    fn new(lines: &'a [LineInfo]) -> Self {
362        Self {
363            lines,
364            current_index: 0,
365        }
366    }
367}
368
369impl<'a> Iterator for ValidHeadingsIter<'a> {
370    type Item = ValidHeading<'a>;
371
372    fn next(&mut self) -> Option<Self::Item> {
373        while self.current_index < self.lines.len() {
374            let idx = self.current_index;
375            self.current_index += 1;
376
377            let line_info = &self.lines[idx];
378            if let Some(heading) = &line_info.heading
379                && heading.is_valid
380            {
381                return Some(ValidHeading {
382                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
383                    heading,
384                    line_info,
385                });
386            }
387        }
388        None
389    }
390}
391
392/// Information about a blockquote line
393#[derive(Debug, Clone)]
394pub struct BlockquoteInfo {
395    /// Nesting level (1 for >, 2 for >>, etc.)
396    pub nesting_level: usize,
397    /// The indentation before the blockquote marker
398    pub indent: String,
399    /// Column where the first > starts (0-based)
400    pub marker_column: usize,
401    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
402    pub prefix: String,
403    /// Content after the blockquote marker(s)
404    pub content: String,
405    /// Whether the line has no space after the marker
406    pub has_no_space_after_marker: bool,
407    /// Whether the line has multiple spaces after the marker
408    pub has_multiple_spaces_after_marker: bool,
409    /// Whether this is an empty blockquote line needing MD028 fix
410    pub needs_md028_fix: bool,
411}
412
413/// Information about a list block
414#[derive(Debug, Clone)]
415pub struct ListBlock {
416    /// Line number where the list starts (1-indexed)
417    pub start_line: usize,
418    /// Line number where the list ends (1-indexed)
419    pub end_line: usize,
420    /// Whether it's ordered or unordered
421    pub is_ordered: bool,
422    /// The consistent marker for unordered lists (if any)
423    pub marker: Option<String>,
424    /// Blockquote prefix for this list (empty if not in blockquote)
425    pub blockquote_prefix: String,
426    /// Lines that are list items within this block
427    pub item_lines: Vec<usize>,
428    /// Nesting level (0 for top-level lists)
429    pub nesting_level: usize,
430    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
431    pub max_marker_width: usize,
432}
433
434use std::sync::{Arc, OnceLock};
435
436/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
437type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
438
439/// Type alias for byte ranges used in JSX expression and MDX comment detection
440type ByteRanges = Vec<(usize, usize)>;
441
442/// Character frequency data for fast content analysis
443#[derive(Debug, Clone, Default)]
444pub struct CharFrequency {
445    /// Count of # characters (headings)
446    pub hash_count: usize,
447    /// Count of * characters (emphasis, lists, horizontal rules)
448    pub asterisk_count: usize,
449    /// Count of _ characters (emphasis, horizontal rules)
450    pub underscore_count: usize,
451    /// Count of - characters (lists, horizontal rules, setext headings)
452    pub hyphen_count: usize,
453    /// Count of + characters (lists)
454    pub plus_count: usize,
455    /// Count of > characters (blockquotes)
456    pub gt_count: usize,
457    /// Count of | characters (tables)
458    pub pipe_count: usize,
459    /// Count of [ characters (links, images)
460    pub bracket_count: usize,
461    /// Count of ` characters (code spans, code blocks)
462    pub backtick_count: usize,
463    /// Count of < characters (HTML tags, autolinks)
464    pub lt_count: usize,
465    /// Count of ! characters (images)
466    pub exclamation_count: usize,
467    /// Count of newline characters
468    pub newline_count: usize,
469}
470
471/// Pre-parsed HTML tag information
472#[derive(Debug, Clone)]
473pub struct HtmlTag {
474    /// Line number (1-indexed)
475    pub line: usize,
476    /// Start column (0-indexed) in the line
477    pub start_col: usize,
478    /// End column (0-indexed) in the line
479    pub end_col: usize,
480    /// Byte offset in document
481    pub byte_offset: usize,
482    /// End byte offset in document
483    pub byte_end: usize,
484    /// Tag name (e.g., "div", "img", "br")
485    pub tag_name: String,
486    /// Whether it's a closing tag (`</tag>`)
487    pub is_closing: bool,
488    /// Whether it's self-closing (`<tag />`)
489    pub is_self_closing: bool,
490    /// Raw tag content
491    pub raw_content: String,
492}
493
494/// Pre-parsed emphasis span information
495#[derive(Debug, Clone)]
496pub struct EmphasisSpan {
497    /// Line number (1-indexed)
498    pub line: usize,
499    /// Start column (0-indexed) in the line
500    pub start_col: usize,
501    /// End column (0-indexed) in the line
502    pub end_col: usize,
503    /// Byte offset in document
504    pub byte_offset: usize,
505    /// End byte offset in document
506    pub byte_end: usize,
507    /// Type of emphasis ('*' or '_')
508    pub marker: char,
509    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
510    pub marker_count: usize,
511    /// Content inside the emphasis
512    pub content: String,
513}
514
515/// Pre-parsed table row information
516#[derive(Debug, Clone)]
517pub struct TableRow {
518    /// Line number (1-indexed)
519    pub line: usize,
520    /// Whether this is a separator row (contains only |, -, :, and spaces)
521    pub is_separator: bool,
522    /// Number of columns (pipe-separated cells)
523    pub column_count: usize,
524    /// Alignment info from separator row
525    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
526}
527
528/// Pre-parsed bare URL information (not in links)
529#[derive(Debug, Clone)]
530pub struct BareUrl {
531    /// Line number (1-indexed)
532    pub line: usize,
533    /// Start column (0-indexed) in the line
534    pub start_col: usize,
535    /// End column (0-indexed) in the line
536    pub end_col: usize,
537    /// Byte offset in document
538    pub byte_offset: usize,
539    /// End byte offset in document
540    pub byte_end: usize,
541    /// The URL string
542    pub url: String,
543    /// Type of URL ("http", "https", "ftp", "email")
544    pub url_type: String,
545}
546
547pub struct LintContext<'a> {
548    pub content: &'a str,
549    pub line_offsets: Vec<usize>,
550    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
551    pub lines: Vec<LineInfo>,             // Pre-computed line information
552    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
553    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
554    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
555    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
556    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
557    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
558    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
559    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
560    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
561    pub char_frequency: CharFrequency,    // Character frequency analysis
562    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
563    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
564    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
565    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
566    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
567    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
568    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
569    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
570    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
571    pub flavor: MarkdownFlavor,           // Markdown flavor being used
572    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
573    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
574    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
575    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
576    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
577    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
578    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
579}
580
581/// Detailed blockquote parse result with all components
582struct BlockquoteComponents<'a> {
583    indent: &'a str,
584    markers: &'a str,
585    spaces_after: &'a str,
586    content: &'a str,
587}
588
589/// Parse blockquote prefix with detailed components using manual parsing
590#[inline]
591fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
592    let bytes = line.as_bytes();
593    let mut pos = 0;
594
595    // Parse leading whitespace (indent)
596    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
597        pos += 1;
598    }
599    let indent_end = pos;
600
601    // Must have at least one '>' marker
602    if pos >= bytes.len() || bytes[pos] != b'>' {
603        return None;
604    }
605
606    // Parse '>' markers
607    while pos < bytes.len() && bytes[pos] == b'>' {
608        pos += 1;
609    }
610    let markers_end = pos;
611
612    // Parse spaces after markers
613    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
614        pos += 1;
615    }
616    let spaces_end = pos;
617
618    Some(BlockquoteComponents {
619        indent: &line[0..indent_end],
620        markers: &line[indent_end..markers_end],
621        spaces_after: &line[markers_end..spaces_end],
622        content: &line[spaces_end..],
623    })
624}
625
626impl<'a> LintContext<'a> {
627    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
628        #[cfg(not(target_arch = "wasm32"))]
629        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
630        #[cfg(target_arch = "wasm32")]
631        let profile = false;
632
633        let line_offsets = profile_section!("Line offsets", profile, {
634            let mut offsets = vec![0];
635            for (i, c) in content.char_indices() {
636                if c == '\n' {
637                    offsets.push(i + 1);
638                }
639            }
640            offsets
641        });
642
643        // Detect code blocks and code spans once and cache them
644        let (code_blocks, code_span_ranges) = profile_section!(
645            "Code blocks",
646            profile,
647            CodeBlockUtils::detect_code_blocks_and_spans(content)
648        );
649
650        // Pre-compute HTML comment ranges ONCE for all operations
651        let html_comment_ranges = profile_section!(
652            "HTML comment ranges",
653            profile,
654            crate::utils::skip_context::compute_html_comment_ranges(content)
655        );
656
657        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
658        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
659            if flavor == MarkdownFlavor::MkDocs {
660                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
661            } else {
662                Vec::new()
663            }
664        });
665
666        // Pre-compute Quarto div block ranges for Quarto flavor
667        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
668            if flavor == MarkdownFlavor::Quarto {
669                crate::utils::quarto_divs::detect_div_block_ranges(content)
670            } else {
671                Vec::new()
672            }
673        });
674
675        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
676        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
677        let (mut lines, emphasis_spans) = profile_section!(
678            "Basic line info",
679            profile,
680            Self::compute_basic_line_info(
681                content,
682                &line_offsets,
683                &code_blocks,
684                flavor,
685                &html_comment_ranges,
686                &autodoc_ranges,
687                &quarto_div_ranges,
688            )
689        );
690
691        // Detect HTML blocks BEFORE heading detection
692        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
693
694        // Detect ESM import/export blocks in MDX files BEFORE heading detection
695        profile_section!(
696            "ESM blocks",
697            profile,
698            Self::detect_esm_blocks(content, &mut lines, flavor)
699        );
700
701        // Detect JSX expressions and MDX comments in MDX files
702        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
703            "JSX/MDX detection",
704            profile,
705            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
706        );
707
708        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
709        profile_section!(
710            "MkDocs constructs",
711            profile,
712            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
713        );
714
715        // Detect Obsidian comments (%%...%%) in Obsidian flavor
716        let obsidian_comment_ranges = profile_section!(
717            "Obsidian comments",
718            profile,
719            Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
720        );
721
722        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
723        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
724
725        // Now detect headings and blockquotes
726        profile_section!(
727            "Headings & blockquotes",
728            profile,
729            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
730        );
731
732        // Parse code spans early so we can exclude them from link/image parsing
733        let code_spans = profile_section!(
734            "Code spans",
735            profile,
736            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
737        );
738
739        // Mark lines that are continuations of multi-line code spans
740        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
741        for span in &code_spans {
742            if span.end_line > span.line {
743                // Mark lines after the first line as continuations
744                for line_num in (span.line + 1)..=span.end_line {
745                    if let Some(line_info) = lines.get_mut(line_num - 1) {
746                        line_info.in_code_span_continuation = true;
747                    }
748                }
749            }
750        }
751
752        // Parse links, images, references, and list blocks
753        let (links, broken_links, footnote_refs) = profile_section!(
754            "Links",
755            profile,
756            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
757        );
758
759        let images = profile_section!(
760            "Images",
761            profile,
762            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
763        );
764
765        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
766
767        // Build O(1) lookup map for reference definitions by lowercase ID
768        let reference_defs_map: HashMap<String, usize> = reference_defs
769            .iter()
770            .enumerate()
771            .map(|(idx, def)| (def.id.to_lowercase(), idx))
772            .collect();
773
774        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
775
776        // Compute character frequency for fast content analysis
777        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
778
779        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
780        let table_blocks = profile_section!(
781            "Table blocks",
782            profile,
783            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
784                content,
785                &code_blocks,
786                &code_spans,
787                &html_comment_ranges,
788            )
789        );
790
791        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
792        let line_index = profile_section!(
793            "Line index",
794            profile,
795            crate::utils::range_utils::LineIndex::new(content)
796        );
797
798        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
799        let jinja_ranges = profile_section!(
800            "Jinja ranges",
801            profile,
802            crate::utils::jinja_utils::find_jinja_ranges(content)
803        );
804
805        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
806        let citation_ranges = profile_section!("Citation ranges", profile, {
807            if flavor == MarkdownFlavor::Quarto {
808                crate::utils::quarto_divs::find_citation_ranges(content)
809            } else {
810                Vec::new()
811            }
812        });
813
814        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
815        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
816            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
817            let mut ranges = Vec::new();
818            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
819                ranges.push((mat.start(), mat.end()));
820            }
821            ranges
822        });
823
824        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
825
826        Self {
827            content,
828            line_offsets,
829            code_blocks,
830            lines,
831            links,
832            images,
833            broken_links,
834            footnote_refs,
835            reference_defs,
836            reference_defs_map,
837            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
838            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
839            list_blocks,
840            char_frequency,
841            html_tags_cache: OnceLock::new(),
842            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
843            table_rows_cache: OnceLock::new(),
844            bare_urls_cache: OnceLock::new(),
845            has_mixed_list_nesting_cache: OnceLock::new(),
846            html_comment_ranges,
847            table_blocks,
848            line_index,
849            jinja_ranges,
850            flavor,
851            source_file,
852            jsx_expression_ranges,
853            mdx_comment_ranges,
854            citation_ranges,
855            shortcode_ranges,
856            inline_config,
857            obsidian_comment_ranges,
858        }
859    }
860
861    /// Check if a rule is disabled at a specific line number (1-indexed)
862    ///
863    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
864    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
865    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
866        self.inline_config.is_rule_disabled(rule_name, line_number)
867    }
868
869    /// Get code spans - computed lazily on first access
870    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
871        Arc::clone(
872            self.code_spans_cache
873                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
874        )
875    }
876
877    /// Get math spans - computed lazily on first access
878    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
879        Arc::clone(
880            self.math_spans_cache
881                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
882        )
883    }
884
885    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
886    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
887        let math_spans = self.math_spans();
888        math_spans
889            .iter()
890            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
891    }
892
893    /// Get HTML comment ranges - pre-computed during LintContext construction
894    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
895        &self.html_comment_ranges
896    }
897
898    /// Get Obsidian comment ranges - pre-computed during LintContext construction
899    /// Returns empty slice for non-Obsidian flavors
900    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
901        &self.obsidian_comment_ranges
902    }
903
904    /// Check if a byte position is inside an Obsidian comment
905    ///
906    /// Returns false for non-Obsidian flavors.
907    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
908        self.obsidian_comment_ranges
909            .iter()
910            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
911    }
912
913    /// Check if a line/column position is inside an Obsidian comment
914    ///
915    /// Line number is 1-indexed, column is 1-indexed.
916    /// Returns false for non-Obsidian flavors.
917    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
918        if self.obsidian_comment_ranges.is_empty() {
919            return false;
920        }
921
922        // Convert line/column (1-indexed, char-based) to byte position
923        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
924        self.is_in_obsidian_comment(byte_pos)
925    }
926
927    /// Get HTML tags - computed lazily on first access
928    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
929        Arc::clone(self.html_tags_cache.get_or_init(|| {
930            Arc::new(Self::parse_html_tags(
931                self.content,
932                &self.lines,
933                &self.code_blocks,
934                self.flavor,
935            ))
936        }))
937    }
938
939    /// Get emphasis spans - pre-computed during construction
940    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
941        Arc::clone(
942            self.emphasis_spans_cache
943                .get()
944                .expect("emphasis_spans_cache initialized during construction"),
945        )
946    }
947
948    /// Get table rows - computed lazily on first access
949    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
950        Arc::clone(
951            self.table_rows_cache
952                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
953        )
954    }
955
956    /// Get bare URLs - computed lazily on first access
957    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
958        Arc::clone(
959            self.bare_urls_cache
960                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
961        )
962    }
963
964    /// Check if document has mixed ordered/unordered list nesting.
965    /// Result is cached after first computation (document-level invariant).
966    /// This is used by MD007 for smart style auto-detection.
967    pub fn has_mixed_list_nesting(&self) -> bool {
968        *self
969            .has_mixed_list_nesting_cache
970            .get_or_init(|| self.compute_mixed_list_nesting())
971    }
972
973    /// Internal computation for mixed list nesting (only called once per LintContext).
974    fn compute_mixed_list_nesting(&self) -> bool {
975        // Track parent list items by their marker position and type
976        // Using marker_column instead of indent because it works correctly
977        // for blockquoted content where indent doesn't account for the prefix
978        // Stack stores: (marker_column, is_ordered)
979        let mut stack: Vec<(usize, bool)> = Vec::new();
980        let mut last_was_blank = false;
981
982        for line_info in &self.lines {
983            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
984            if line_info.in_code_block
985                || line_info.in_front_matter
986                || line_info.in_mkdocstrings
987                || line_info.in_html_comment
988                || line_info.in_esm_block
989            {
990                continue;
991            }
992
993            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
994            if line_info.is_blank {
995                last_was_blank = true;
996                continue;
997            }
998
999            if let Some(list_item) = &line_info.list_item {
1000                // Normalize column 1 to column 0 (consistent with MD007 check function)
1001                let current_pos = if list_item.marker_column == 1 {
1002                    0
1003                } else {
1004                    list_item.marker_column
1005                };
1006
1007                // If there was a blank line and this item is at root level, reset stack
1008                if last_was_blank && current_pos == 0 {
1009                    stack.clear();
1010                }
1011                last_was_blank = false;
1012
1013                // Pop items at same or greater position (they're siblings or deeper, not parents)
1014                while let Some(&(pos, _)) = stack.last() {
1015                    if pos >= current_pos {
1016                        stack.pop();
1017                    } else {
1018                        break;
1019                    }
1020                }
1021
1022                // Check if immediate parent has different type - this is mixed nesting
1023                if let Some(&(_, parent_is_ordered)) = stack.last()
1024                    && parent_is_ordered != list_item.is_ordered
1025                {
1026                    return true; // Found mixed nesting - early exit
1027                }
1028
1029                stack.push((current_pos, list_item.is_ordered));
1030            } else {
1031                // Non-list line (but not blank) - could be paragraph or other content
1032                last_was_blank = false;
1033            }
1034        }
1035
1036        false
1037    }
1038
1039    /// Map a byte offset to (line, column)
1040    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1041        match self.line_offsets.binary_search(&offset) {
1042            Ok(line) => (line + 1, 1),
1043            Err(line) => {
1044                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1045                (line, offset - line_start + 1)
1046            }
1047        }
1048    }
1049
1050    /// Check if a position is within a code block or code span
1051    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1052        // Check code blocks first
1053        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1054            return true;
1055        }
1056
1057        // Check inline code spans (lazy load if needed)
1058        self.code_spans()
1059            .iter()
1060            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1061    }
1062
1063    /// Get line information by line number (1-indexed)
1064    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1065        if line_num > 0 {
1066            self.lines.get(line_num - 1)
1067        } else {
1068            None
1069        }
1070    }
1071
1072    /// Get byte offset for a line number (1-indexed)
1073    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1074        self.line_info(line_num).map(|info| info.byte_offset)
1075    }
1076
1077    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1078    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1079        let normalized_id = ref_id.to_lowercase();
1080        self.reference_defs_map
1081            .get(&normalized_id)
1082            .map(|&idx| self.reference_defs[idx].url.as_str())
1083    }
1084
1085    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1086    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1087        let normalized_id = ref_id.to_lowercase();
1088        self.reference_defs_map
1089            .get(&normalized_id)
1090            .map(|&idx| &self.reference_defs[idx])
1091    }
1092
1093    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1094    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1095        let normalized_id = ref_id.to_lowercase();
1096        self.reference_defs_map.contains_key(&normalized_id)
1097    }
1098
1099    /// Check if a line is part of a list block
1100    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1101        self.list_blocks
1102            .iter()
1103            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1104    }
1105
1106    /// Get the list block containing a specific line
1107    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1108        self.list_blocks
1109            .iter()
1110            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1111    }
1112
1113    // Compatibility methods for DocumentStructure migration
1114
1115    /// Check if a line is within a code block
1116    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1117        if line_num == 0 || line_num > self.lines.len() {
1118            return false;
1119        }
1120        self.lines[line_num - 1].in_code_block
1121    }
1122
1123    /// Check if a line is within front matter
1124    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1125        if line_num == 0 || line_num > self.lines.len() {
1126            return false;
1127        }
1128        self.lines[line_num - 1].in_front_matter
1129    }
1130
1131    /// Check if a line is within an HTML block
1132    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1133        if line_num == 0 || line_num > self.lines.len() {
1134            return false;
1135        }
1136        self.lines[line_num - 1].in_html_block
1137    }
1138
1139    /// Check if a line and column is within a code span
1140    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1141        if line_num == 0 || line_num > self.lines.len() {
1142            return false;
1143        }
1144
1145        // Use the code spans cache to check
1146        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1147        // Convert col to 0-indexed for comparison
1148        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1149        let code_spans = self.code_spans();
1150        code_spans.iter().any(|span| {
1151            // Check if line is within the span's line range
1152            if line_num < span.line || line_num > span.end_line {
1153                return false;
1154            }
1155
1156            if span.line == span.end_line {
1157                // Single-line span: check column bounds
1158                col_0indexed >= span.start_col && col_0indexed < span.end_col
1159            } else if line_num == span.line {
1160                // First line of multi-line span: anything after start_col is in span
1161                col_0indexed >= span.start_col
1162            } else if line_num == span.end_line {
1163                // Last line of multi-line span: anything before end_col is in span
1164                col_0indexed < span.end_col
1165            } else {
1166                // Middle line of multi-line span: entire line is in span
1167                true
1168            }
1169        })
1170    }
1171
1172    /// Check if a byte offset is within a code span
1173    #[inline]
1174    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1175        let code_spans = self.code_spans();
1176        code_spans
1177            .iter()
1178            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1179    }
1180
1181    /// Check if a byte position is within a reference definition
1182    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1183    #[inline]
1184    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1185        self.reference_defs
1186            .iter()
1187            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1188    }
1189
1190    /// Check if a byte position is within an HTML comment
1191    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1192    /// where k is the number of HTML comments (typically very small)
1193    #[inline]
1194    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1195        self.html_comment_ranges
1196            .iter()
1197            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1198    }
1199
1200    /// Check if a byte position is within an HTML tag (including multiline tags)
1201    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1202    #[inline]
1203    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1204        self.html_tags()
1205            .iter()
1206            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1207    }
1208
1209    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1210    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1211        self.jinja_ranges
1212            .iter()
1213            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1214    }
1215
1216    /// Check if a byte position is within a JSX expression (MDX: {expression})
1217    #[inline]
1218    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1219        self.jsx_expression_ranges
1220            .iter()
1221            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1222    }
1223
1224    /// Check if a byte position is within an MDX comment ({/* ... */})
1225    #[inline]
1226    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1227        self.mdx_comment_ranges
1228            .iter()
1229            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1230    }
1231
1232    /// Get all JSX expression byte ranges
1233    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1234        &self.jsx_expression_ranges
1235    }
1236
1237    /// Get all MDX comment byte ranges
1238    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1239        &self.mdx_comment_ranges
1240    }
1241
1242    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1243    /// Only active in Quarto flavor
1244    #[inline]
1245    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1246        self.citation_ranges
1247            .iter()
1248            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1249    }
1250
1251    /// Get all citation byte ranges (Quarto flavor only)
1252    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1253        &self.citation_ranges
1254    }
1255
1256    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1257    #[inline]
1258    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1259        self.shortcode_ranges
1260            .iter()
1261            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1262    }
1263
1264    /// Get all shortcode byte ranges
1265    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1266        &self.shortcode_ranges
1267    }
1268
1269    /// Check if a byte position is within a link reference definition title
1270    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1271        self.reference_defs.iter().any(|def| {
1272            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1273                byte_pos >= start && byte_pos < end
1274            } else {
1275                false
1276            }
1277        })
1278    }
1279
1280    /// Check if content has any instances of a specific character (fast)
1281    pub fn has_char(&self, ch: char) -> bool {
1282        match ch {
1283            '#' => self.char_frequency.hash_count > 0,
1284            '*' => self.char_frequency.asterisk_count > 0,
1285            '_' => self.char_frequency.underscore_count > 0,
1286            '-' => self.char_frequency.hyphen_count > 0,
1287            '+' => self.char_frequency.plus_count > 0,
1288            '>' => self.char_frequency.gt_count > 0,
1289            '|' => self.char_frequency.pipe_count > 0,
1290            '[' => self.char_frequency.bracket_count > 0,
1291            '`' => self.char_frequency.backtick_count > 0,
1292            '<' => self.char_frequency.lt_count > 0,
1293            '!' => self.char_frequency.exclamation_count > 0,
1294            '\n' => self.char_frequency.newline_count > 0,
1295            _ => self.content.contains(ch), // Fallback for other characters
1296        }
1297    }
1298
1299    /// Get count of a specific character (fast)
1300    pub fn char_count(&self, ch: char) -> usize {
1301        match ch {
1302            '#' => self.char_frequency.hash_count,
1303            '*' => self.char_frequency.asterisk_count,
1304            '_' => self.char_frequency.underscore_count,
1305            '-' => self.char_frequency.hyphen_count,
1306            '+' => self.char_frequency.plus_count,
1307            '>' => self.char_frequency.gt_count,
1308            '|' => self.char_frequency.pipe_count,
1309            '[' => self.char_frequency.bracket_count,
1310            '`' => self.char_frequency.backtick_count,
1311            '<' => self.char_frequency.lt_count,
1312            '!' => self.char_frequency.exclamation_count,
1313            '\n' => self.char_frequency.newline_count,
1314            _ => self.content.matches(ch).count(), // Fallback for other characters
1315        }
1316    }
1317
1318    /// Check if content likely contains headings (fast)
1319    pub fn likely_has_headings(&self) -> bool {
1320        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1321    }
1322
1323    /// Check if content likely contains lists (fast)
1324    pub fn likely_has_lists(&self) -> bool {
1325        self.char_frequency.asterisk_count > 0
1326            || self.char_frequency.hyphen_count > 0
1327            || self.char_frequency.plus_count > 0
1328    }
1329
1330    /// Check if content likely contains emphasis (fast)
1331    pub fn likely_has_emphasis(&self) -> bool {
1332        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1333    }
1334
1335    /// Check if content likely contains tables (fast)
1336    pub fn likely_has_tables(&self) -> bool {
1337        self.char_frequency.pipe_count > 2
1338    }
1339
1340    /// Check if content likely contains blockquotes (fast)
1341    pub fn likely_has_blockquotes(&self) -> bool {
1342        self.char_frequency.gt_count > 0
1343    }
1344
1345    /// Check if content likely contains code (fast)
1346    pub fn likely_has_code(&self) -> bool {
1347        self.char_frequency.backtick_count > 0
1348    }
1349
1350    /// Check if content likely contains links or images (fast)
1351    pub fn likely_has_links_or_images(&self) -> bool {
1352        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1353    }
1354
1355    /// Check if content likely contains HTML (fast)
1356    pub fn likely_has_html(&self) -> bool {
1357        self.char_frequency.lt_count > 0
1358    }
1359
1360    /// Get the blockquote prefix for inserting a blank line at the given line index.
1361    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1362    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1363    /// Returns an empty string if the line is not inside a blockquote.
1364    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1365        if let Some(line_info) = self.lines.get(line_idx)
1366            && let Some(ref bq) = line_info.blockquote
1367        {
1368            bq.prefix.trim_end().to_string()
1369        } else {
1370            String::new()
1371        }
1372    }
1373
1374    /// Get HTML tags on a specific line
1375    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1376        self.html_tags()
1377            .iter()
1378            .filter(|tag| tag.line == line_num)
1379            .cloned()
1380            .collect()
1381    }
1382
1383    /// Get emphasis spans on a specific line
1384    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1385        self.emphasis_spans()
1386            .iter()
1387            .filter(|span| span.line == line_num)
1388            .cloned()
1389            .collect()
1390    }
1391
1392    /// Get table rows on a specific line
1393    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1394        self.table_rows()
1395            .iter()
1396            .filter(|row| row.line == line_num)
1397            .cloned()
1398            .collect()
1399    }
1400
1401    /// Get bare URLs on a specific line
1402    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1403        self.bare_urls()
1404            .iter()
1405            .filter(|url| url.line == line_num)
1406            .cloned()
1407            .collect()
1408    }
1409
1410    /// Find the line index for a given byte offset using binary search.
1411    /// Returns (line_index, line_number, column) where:
1412    /// - line_index is the 0-based index in the lines array
1413    /// - line_number is the 1-based line number
1414    /// - column is the byte offset within that line
1415    #[inline]
1416    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1417        // Binary search to find the line containing this byte offset
1418        let idx = match lines.binary_search_by(|line| {
1419            if byte_offset < line.byte_offset {
1420                std::cmp::Ordering::Greater
1421            } else if byte_offset > line.byte_offset + line.byte_len {
1422                std::cmp::Ordering::Less
1423            } else {
1424                std::cmp::Ordering::Equal
1425            }
1426        }) {
1427            Ok(idx) => idx,
1428            Err(idx) => idx.saturating_sub(1),
1429        };
1430
1431        let line = &lines[idx];
1432        let line_num = idx + 1;
1433        let col = byte_offset.saturating_sub(line.byte_offset);
1434
1435        (idx, line_num, col)
1436    }
1437
1438    /// Check if a byte offset is within a code span using binary search
1439    #[inline]
1440    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1441        // Since spans are sorted by byte_offset, use partition_point for binary search
1442        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1443
1444        // Check the span that starts at or before our offset
1445        if idx > 0 {
1446            let span = &code_spans[idx - 1];
1447            if offset >= span.byte_offset && offset < span.byte_end {
1448                return true;
1449            }
1450        }
1451
1452        false
1453    }
1454
1455    /// Collect byte ranges of all links using pulldown-cmark
1456    /// This is used to skip heading detection for lines that fall within link syntax
1457    /// (e.g., multiline links like `[text](url\n#fragment)`)
1458    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1459        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1460
1461        let mut link_ranges = Vec::new();
1462        let mut options = Options::empty();
1463        options.insert(Options::ENABLE_WIKILINKS);
1464        options.insert(Options::ENABLE_FOOTNOTES);
1465
1466        let parser = Parser::new_ext(content, options).into_offset_iter();
1467        let mut link_stack: Vec<usize> = Vec::new();
1468
1469        for (event, range) in parser {
1470            match event {
1471                Event::Start(Tag::Link { .. }) => {
1472                    link_stack.push(range.start);
1473                }
1474                Event::End(TagEnd::Link) => {
1475                    if let Some(start_pos) = link_stack.pop() {
1476                        link_ranges.push((start_pos, range.end));
1477                    }
1478                }
1479                _ => {}
1480            }
1481        }
1482
1483        link_ranges
1484    }
1485
1486    /// Parse all links in the content
1487    fn parse_links(
1488        content: &'a str,
1489        lines: &[LineInfo],
1490        code_blocks: &[(usize, usize)],
1491        code_spans: &[CodeSpan],
1492        flavor: MarkdownFlavor,
1493        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1494    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1495        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1496        use std::collections::HashSet;
1497
1498        let mut links = Vec::with_capacity(content.len() / 500);
1499        let mut broken_links = Vec::new();
1500        let mut footnote_refs = Vec::new();
1501
1502        // Track byte positions of links found by pulldown-cmark
1503        let mut found_positions = HashSet::new();
1504
1505        // Use pulldown-cmark's streaming parser with BrokenLink callback
1506        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1507        // This automatically handles:
1508        // - Escaped links (won't generate events)
1509        // - Links in code blocks/spans (won't generate Link events)
1510        // - Images (generates Tag::Image instead)
1511        // - Reference resolution (dest_url is already resolved!)
1512        // - Broken references (callback is invoked)
1513        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1514        let mut options = Options::empty();
1515        options.insert(Options::ENABLE_WIKILINKS);
1516        options.insert(Options::ENABLE_FOOTNOTES);
1517
1518        let parser = Parser::new_with_broken_link_callback(
1519            content,
1520            options,
1521            Some(|link: BrokenLink<'_>| {
1522                broken_links.push(BrokenLinkInfo {
1523                    reference: link.reference.to_string(),
1524                    span: link.span.clone(),
1525                });
1526                None
1527            }),
1528        )
1529        .into_offset_iter();
1530
1531        let mut link_stack: Vec<(
1532            usize,
1533            usize,
1534            pulldown_cmark::CowStr<'a>,
1535            LinkType,
1536            pulldown_cmark::CowStr<'a>,
1537        )> = Vec::new();
1538        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1539
1540        for (event, range) in parser {
1541            match event {
1542                Event::Start(Tag::Link {
1543                    link_type,
1544                    dest_url,
1545                    id,
1546                    ..
1547                }) => {
1548                    // Link start - record position, URL, and reference ID
1549                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1550                    text_chunks.clear();
1551                }
1552                Event::Text(text) if !link_stack.is_empty() => {
1553                    // Track text content with its byte range
1554                    text_chunks.push((text.to_string(), range.start, range.end));
1555                }
1556                Event::Code(code) if !link_stack.is_empty() => {
1557                    // Include inline code in link text (with backticks)
1558                    let code_text = format!("`{code}`");
1559                    text_chunks.push((code_text, range.start, range.end));
1560                }
1561                Event::End(TagEnd::Link) => {
1562                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1563                        // Skip if in HTML comment
1564                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1565                            text_chunks.clear();
1566                            continue;
1567                        }
1568
1569                        // Find line and column information
1570                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1571
1572                        // Skip if this link is on a MkDocs snippet line
1573                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1574                            text_chunks.clear();
1575                            continue;
1576                        }
1577
1578                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1579
1580                        let is_reference = matches!(
1581                            link_type,
1582                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1583                        );
1584
1585                        // Extract link text directly from source bytes to preserve escaping
1586                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1587                        let link_text = if matches!(link_type, LinkType::WikiLink { .. }) {
1588                            // WikiLinks: [[destination]] or [[destination|display text]]
1589                            // pulldown-cmark's range excludes the final ]], so standard extraction fails
1590                            // Use accumulated text chunks (from Text events) for accurate text
1591                            if !text_chunks.is_empty() {
1592                                let text: String = text_chunks.iter().map(|(t, _, _)| t.as_str()).collect();
1593                                Cow::Owned(text)
1594                            } else {
1595                                // Fallback: use the URL as text (for simple [[destination]] links)
1596                                Cow::Owned(url.to_string())
1597                            }
1598                        } else if start_pos < content.len() {
1599                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1600
1601                            // Find MATCHING ] by tracking bracket depth for nested brackets
1602                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1603                            // Brackets inside code spans (between backticks) should be ignored
1604                            let mut close_pos = None;
1605                            let mut depth = 0;
1606                            let mut in_code_span = false;
1607
1608                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1609                                // Count preceding backslashes
1610                                let mut backslash_count = 0;
1611                                let mut j = i;
1612                                while j > 0 && link_bytes[j - 1] == b'\\' {
1613                                    backslash_count += 1;
1614                                    j -= 1;
1615                                }
1616                                let is_escaped = backslash_count % 2 != 0;
1617
1618                                // Track code spans - backticks toggle in/out of code
1619                                if byte == b'`' && !is_escaped {
1620                                    in_code_span = !in_code_span;
1621                                }
1622
1623                                // Only count brackets when NOT in a code span
1624                                if !is_escaped && !in_code_span {
1625                                    if byte == b'[' {
1626                                        depth += 1;
1627                                    } else if byte == b']' {
1628                                        if depth == 0 {
1629                                            // Found the matching closing bracket
1630                                            close_pos = Some(i);
1631                                            break;
1632                                        } else {
1633                                            depth -= 1;
1634                                        }
1635                                    }
1636                                }
1637                            }
1638
1639                            if let Some(pos) = close_pos {
1640                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1641                            } else {
1642                                Cow::Borrowed("")
1643                            }
1644                        } else {
1645                            Cow::Borrowed("")
1646                        };
1647
1648                        // For reference links, use the actual reference ID from pulldown-cmark
1649                        let reference_id = if is_reference && !ref_id.is_empty() {
1650                            Some(Cow::Owned(ref_id.to_lowercase()))
1651                        } else if is_reference {
1652                            // For collapsed/shortcut references without explicit ID, use the link text
1653                            Some(Cow::Owned(link_text.to_lowercase()))
1654                        } else {
1655                            None
1656                        };
1657
1658                        // Track this position as found
1659                        found_positions.insert(start_pos);
1660
1661                        links.push(ParsedLink {
1662                            line: line_num,
1663                            start_col: col_start,
1664                            end_col: col_end,
1665                            byte_offset: start_pos,
1666                            byte_end: range.end,
1667                            text: link_text,
1668                            url: Cow::Owned(url.to_string()),
1669                            is_reference,
1670                            reference_id,
1671                            link_type,
1672                        });
1673
1674                        text_chunks.clear();
1675                    }
1676                }
1677                Event::FootnoteReference(footnote_id) => {
1678                    // Capture footnote references like [^1], [^note]
1679                    // Skip if in HTML comment
1680                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1681                        continue;
1682                    }
1683
1684                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1685                    footnote_refs.push(FootnoteRef {
1686                        id: footnote_id.to_string(),
1687                        line: line_num,
1688                        byte_offset: range.start,
1689                        byte_end: range.end,
1690                    });
1691                }
1692                _ => {}
1693            }
1694        }
1695
1696        // Also find undefined references using regex
1697        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1698        // because the reference is undefined
1699        for cap in LINK_PATTERN.captures_iter(content) {
1700            let full_match = cap.get(0).unwrap();
1701            let match_start = full_match.start();
1702            let match_end = full_match.end();
1703
1704            // Skip if this was already found by pulldown-cmark (it's a valid link)
1705            if found_positions.contains(&match_start) {
1706                continue;
1707            }
1708
1709            // Skip if escaped
1710            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1711                continue;
1712            }
1713
1714            // Skip if it's an image
1715            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1716                continue;
1717            }
1718
1719            // Skip if in code block
1720            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1721                continue;
1722            }
1723
1724            // Skip if in code span
1725            if Self::is_offset_in_code_span(code_spans, match_start) {
1726                continue;
1727            }
1728
1729            // Skip if in HTML comment
1730            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1731                continue;
1732            }
1733
1734            // Find line and column information
1735            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1736
1737            // Skip if this link is on a MkDocs snippet line
1738            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1739                continue;
1740            }
1741
1742            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1743
1744            let text = cap.get(1).map_or("", |m| m.as_str());
1745
1746            // Only process reference links (group 6)
1747            if let Some(ref_id) = cap.get(6) {
1748                let ref_id_str = ref_id.as_str();
1749                let normalized_ref = if ref_id_str.is_empty() {
1750                    Cow::Owned(text.to_lowercase()) // Implicit reference
1751                } else {
1752                    Cow::Owned(ref_id_str.to_lowercase())
1753                };
1754
1755                // This is an undefined reference (pulldown-cmark didn't parse it)
1756                links.push(ParsedLink {
1757                    line: line_num,
1758                    start_col: col_start,
1759                    end_col: col_end,
1760                    byte_offset: match_start,
1761                    byte_end: match_end,
1762                    text: Cow::Borrowed(text),
1763                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1764                    is_reference: true,
1765                    reference_id: Some(normalized_ref),
1766                    link_type: LinkType::Reference, // Undefined references are reference-style
1767                });
1768            }
1769        }
1770
1771        (links, broken_links, footnote_refs)
1772    }
1773
1774    /// Parse all images in the content
1775    fn parse_images(
1776        content: &'a str,
1777        lines: &[LineInfo],
1778        code_blocks: &[(usize, usize)],
1779        code_spans: &[CodeSpan],
1780        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1781    ) -> Vec<ParsedImage<'a>> {
1782        use crate::utils::skip_context::is_in_html_comment_ranges;
1783        use std::collections::HashSet;
1784
1785        // Pre-size based on a heuristic: images are less common than links
1786        let mut images = Vec::with_capacity(content.len() / 1000);
1787        let mut found_positions = HashSet::new();
1788
1789        // Use pulldown-cmark for parsing - more accurate and faster
1790        let parser = Parser::new(content).into_offset_iter();
1791        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1792            Vec::new();
1793        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1794
1795        for (event, range) in parser {
1796            match event {
1797                Event::Start(Tag::Image {
1798                    link_type,
1799                    dest_url,
1800                    id,
1801                    ..
1802                }) => {
1803                    image_stack.push((range.start, dest_url, link_type, id));
1804                    text_chunks.clear();
1805                }
1806                Event::Text(text) if !image_stack.is_empty() => {
1807                    text_chunks.push((text.to_string(), range.start, range.end));
1808                }
1809                Event::Code(code) if !image_stack.is_empty() => {
1810                    let code_text = format!("`{code}`");
1811                    text_chunks.push((code_text, range.start, range.end));
1812                }
1813                Event::End(TagEnd::Image) => {
1814                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1815                        // Skip if in code block
1816                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1817                            continue;
1818                        }
1819
1820                        // Skip if in code span
1821                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1822                            continue;
1823                        }
1824
1825                        // Skip if in HTML comment
1826                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1827                            continue;
1828                        }
1829
1830                        // Find line and column using binary search
1831                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1832                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1833
1834                        let is_reference = matches!(
1835                            link_type,
1836                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1837                        );
1838
1839                        // Extract alt text directly from source bytes to preserve escaping
1840                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1841                        let alt_text = if start_pos < content.len() {
1842                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1843
1844                            // Find MATCHING ] by tracking bracket depth for nested brackets
1845                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1846                            let mut close_pos = None;
1847                            let mut depth = 0;
1848
1849                            if image_bytes.len() > 2 {
1850                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1851                                    // Count preceding backslashes
1852                                    let mut backslash_count = 0;
1853                                    let mut j = i;
1854                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1855                                        backslash_count += 1;
1856                                        j -= 1;
1857                                    }
1858                                    let is_escaped = backslash_count % 2 != 0;
1859
1860                                    if !is_escaped {
1861                                        if byte == b'[' {
1862                                            depth += 1;
1863                                        } else if byte == b']' {
1864                                            if depth == 0 {
1865                                                // Found the matching closing bracket
1866                                                close_pos = Some(i);
1867                                                break;
1868                                            } else {
1869                                                depth -= 1;
1870                                            }
1871                                        }
1872                                    }
1873                                }
1874                            }
1875
1876                            if let Some(pos) = close_pos {
1877                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1878                            } else {
1879                                Cow::Borrowed("")
1880                            }
1881                        } else {
1882                            Cow::Borrowed("")
1883                        };
1884
1885                        let reference_id = if is_reference && !ref_id.is_empty() {
1886                            Some(Cow::Owned(ref_id.to_lowercase()))
1887                        } else if is_reference {
1888                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1889                        } else {
1890                            None
1891                        };
1892
1893                        found_positions.insert(start_pos);
1894                        images.push(ParsedImage {
1895                            line: line_num,
1896                            start_col: col_start,
1897                            end_col: col_end,
1898                            byte_offset: start_pos,
1899                            byte_end: range.end,
1900                            alt_text,
1901                            url: Cow::Owned(url.to_string()),
1902                            is_reference,
1903                            reference_id,
1904                            link_type,
1905                        });
1906                    }
1907                }
1908                _ => {}
1909            }
1910        }
1911
1912        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1913        for cap in IMAGE_PATTERN.captures_iter(content) {
1914            let full_match = cap.get(0).unwrap();
1915            let match_start = full_match.start();
1916            let match_end = full_match.end();
1917
1918            // Skip if already found by pulldown-cmark
1919            if found_positions.contains(&match_start) {
1920                continue;
1921            }
1922
1923            // Skip if the ! is escaped
1924            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1925                continue;
1926            }
1927
1928            // Skip if in code block, code span, or HTML comment
1929            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1930                || Self::is_offset_in_code_span(code_spans, match_start)
1931                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1932            {
1933                continue;
1934            }
1935
1936            // Only process reference images (undefined references not found by pulldown-cmark)
1937            if let Some(ref_id) = cap.get(6) {
1938                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1939                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1940                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1941                let ref_id_str = ref_id.as_str();
1942                let normalized_ref = if ref_id_str.is_empty() {
1943                    Cow::Owned(alt_text.to_lowercase())
1944                } else {
1945                    Cow::Owned(ref_id_str.to_lowercase())
1946                };
1947
1948                images.push(ParsedImage {
1949                    line: line_num,
1950                    start_col: col_start,
1951                    end_col: col_end,
1952                    byte_offset: match_start,
1953                    byte_end: match_end,
1954                    alt_text: Cow::Borrowed(alt_text),
1955                    url: Cow::Borrowed(""),
1956                    is_reference: true,
1957                    reference_id: Some(normalized_ref),
1958                    link_type: LinkType::Reference, // Undefined references are reference-style
1959                });
1960            }
1961        }
1962
1963        images
1964    }
1965
1966    /// Parse reference definitions
1967    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1968        // Pre-size based on lines count as reference definitions are line-based
1969        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1970
1971        for (line_idx, line_info) in lines.iter().enumerate() {
1972            // Skip lines in code blocks
1973            if line_info.in_code_block {
1974                continue;
1975            }
1976
1977            let line = line_info.content(content);
1978            let line_num = line_idx + 1;
1979
1980            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1981                let id_raw = cap.get(1).unwrap().as_str();
1982
1983                // Skip footnote definitions - they use [^id]: syntax and are semantically
1984                // different from reference link definitions
1985                if id_raw.starts_with('^') {
1986                    continue;
1987                }
1988
1989                let id = id_raw.to_lowercase();
1990                let url = cap.get(2).unwrap().as_str().to_string();
1991                let title_match = cap.get(3).or_else(|| cap.get(4));
1992                let title = title_match.map(|m| m.as_str().to_string());
1993
1994                // Calculate byte positions
1995                // The match starts at the beginning of the line (0) and extends to the end
1996                let match_obj = cap.get(0).unwrap();
1997                let byte_offset = line_info.byte_offset + match_obj.start();
1998                let byte_end = line_info.byte_offset + match_obj.end();
1999
2000                // Calculate title byte positions (includes the quote character before content)
2001                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
2002                    // The match is the content inside quotes, so we include the quote before
2003                    let start = line_info.byte_offset + m.start().saturating_sub(1);
2004                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
2005                    (Some(start), Some(end))
2006                } else {
2007                    (None, None)
2008                };
2009
2010                refs.push(ReferenceDef {
2011                    line: line_num,
2012                    id,
2013                    url,
2014                    title,
2015                    byte_offset,
2016                    byte_end,
2017                    title_byte_start,
2018                    title_byte_end,
2019                });
2020            }
2021        }
2022
2023        refs
2024    }
2025
2026    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
2027    /// Handles nested blockquotes like `> > > content`
2028    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
2029    #[inline]
2030    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2031        let trimmed_start = line.trim_start();
2032        if !trimmed_start.starts_with('>') {
2033            return None;
2034        }
2035
2036        // Track total prefix length to handle nested blockquotes
2037        let mut remaining = line;
2038        let mut total_prefix_len = 0;
2039
2040        loop {
2041            let trimmed = remaining.trim_start();
2042            if !trimmed.starts_with('>') {
2043                break;
2044            }
2045
2046            // Add leading whitespace + '>' to prefix
2047            let leading_ws_len = remaining.len() - trimmed.len();
2048            total_prefix_len += leading_ws_len + 1;
2049
2050            let after_gt = &trimmed[1..];
2051
2052            // Handle optional whitespace after '>' (space or tab)
2053            if let Some(stripped) = after_gt.strip_prefix(' ') {
2054                total_prefix_len += 1;
2055                remaining = stripped;
2056            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2057                total_prefix_len += 1;
2058                remaining = stripped;
2059            } else {
2060                remaining = after_gt;
2061            }
2062        }
2063
2064        Some((&line[..total_prefix_len], remaining))
2065    }
2066
2067    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2068    ///
2069    /// Returns a HashMap keyed by line byte offset, containing:
2070    /// `(is_ordered, marker, marker_column, content_column, number)`
2071    ///
2072    /// ## Why pulldown-cmark?
2073    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2074    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2075    /// This fixes issue #253 where continuation lines were falsely detected.
2076    ///
2077    /// ## Tab indentation quirk
2078    /// Pulldown-cmark reports nested list items at the newline character position
2079    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2080    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2081    /// We detect this and advance to the correct line.
2082    ///
2083    /// ## HashMap key strategy
2084    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2085    /// that resolve to the same line (after newline adjustment). The first event
2086    /// for each line is authoritative.
2087    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2088    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2089    /// This avoids a separate parse for emphasis detection.
2090    fn detect_list_items_and_emphasis_with_pulldown(
2091        content: &str,
2092        line_offsets: &[usize],
2093        flavor: MarkdownFlavor,
2094        front_matter_end: usize,
2095        code_blocks: &[(usize, usize)],
2096    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2097        use std::collections::HashMap;
2098
2099        let mut list_items = HashMap::new();
2100        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2101
2102        let mut options = Options::empty();
2103        options.insert(Options::ENABLE_TABLES);
2104        options.insert(Options::ENABLE_FOOTNOTES);
2105        options.insert(Options::ENABLE_STRIKETHROUGH);
2106        options.insert(Options::ENABLE_TASKLISTS);
2107        // Always enable GFM features for consistency with existing behavior
2108        options.insert(Options::ENABLE_GFM);
2109
2110        // Suppress unused variable warning
2111        let _ = flavor;
2112
2113        let parser = Parser::new_ext(content, options).into_offset_iter();
2114        let mut list_depth: usize = 0;
2115        let mut list_stack: Vec<bool> = Vec::new();
2116
2117        for (event, range) in parser {
2118            match event {
2119                // Capture emphasis spans (for MD030's emphasis detection)
2120                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2121                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2122                        2
2123                    } else {
2124                        1
2125                    };
2126                    let match_start = range.start;
2127                    let match_end = range.end;
2128
2129                    // Skip if in code block
2130                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2131                        // Determine marker character by looking at the content at the start
2132                        let marker = content[match_start..].chars().next().unwrap_or('*');
2133                        if marker == '*' || marker == '_' {
2134                            // Extract content between markers
2135                            let content_start = match_start + marker_count;
2136                            let content_end = if match_end >= marker_count {
2137                                match_end - marker_count
2138                            } else {
2139                                match_end
2140                            };
2141                            let content_part = if content_start < content_end && content_end <= content.len() {
2142                                &content[content_start..content_end]
2143                            } else {
2144                                ""
2145                            };
2146
2147                            // Find which line this emphasis is on using line_offsets
2148                            let line_idx = match line_offsets.binary_search(&match_start) {
2149                                Ok(idx) => idx,
2150                                Err(idx) => idx.saturating_sub(1),
2151                            };
2152                            let line_num = line_idx + 1;
2153                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2154                            let col_start = match_start - line_start;
2155                            let col_end = match_end - line_start;
2156
2157                            emphasis_spans.push(EmphasisSpan {
2158                                line: line_num,
2159                                start_col: col_start,
2160                                end_col: col_end,
2161                                byte_offset: match_start,
2162                                byte_end: match_end,
2163                                marker,
2164                                marker_count,
2165                                content: content_part.to_string(),
2166                            });
2167                        }
2168                    }
2169                }
2170                Event::Start(Tag::List(start_number)) => {
2171                    list_depth += 1;
2172                    list_stack.push(start_number.is_some());
2173                }
2174                Event::End(TagEnd::List(_)) => {
2175                    list_depth = list_depth.saturating_sub(1);
2176                    list_stack.pop();
2177                }
2178                Event::Start(Tag::Item) if list_depth > 0 => {
2179                    // Get the ordered state for the CURRENT (innermost) list
2180                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2181                    // Find which line this byte offset corresponds to
2182                    let item_start = range.start;
2183
2184                    // Binary search to find the line number
2185                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2186                        Ok(idx) => idx,
2187                        Err(idx) => idx.saturating_sub(1),
2188                    };
2189
2190                    // Pulldown-cmark reports nested list items at the newline before the item
2191                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2192                    // Advance to the actual content line in this case.
2193                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2194                        line_idx += 1;
2195                    }
2196
2197                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2198                    if front_matter_end > 0 && line_idx < front_matter_end {
2199                        continue;
2200                    }
2201
2202                    if line_idx < line_offsets.len() {
2203                        let line_start_byte = line_offsets[line_idx];
2204                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2205                        let line = &content[line_start_byte..line_end.min(content.len())];
2206
2207                        // Strip trailing newline
2208                        let line = line
2209                            .strip_suffix('\n')
2210                            .or_else(|| line.strip_suffix("\r\n"))
2211                            .unwrap_or(line);
2212
2213                        // Strip blockquote prefix if present
2214                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2215                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2216                            (prefix.len(), content)
2217                        } else {
2218                            (0, line)
2219                        };
2220
2221                        // Parse the list marker from the actual line
2222                        if current_list_is_ordered {
2223                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2224                                Self::parse_ordered_list(line_to_parse)
2225                            {
2226                                let marker = format!("{number_str}{delimiter}");
2227                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2228                                let content_column = marker_column + marker.len() + spacing.len();
2229                                let number = number_str.parse().ok();
2230
2231                                list_items.entry(line_start_byte).or_insert((
2232                                    true,
2233                                    marker,
2234                                    marker_column,
2235                                    content_column,
2236                                    number,
2237                                ));
2238                            }
2239                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2240                            Self::parse_unordered_list(line_to_parse)
2241                        {
2242                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2243                            let content_column = marker_column + 1 + spacing.len();
2244
2245                            list_items.entry(line_start_byte).or_insert((
2246                                false,
2247                                marker.to_string(),
2248                                marker_column,
2249                                content_column,
2250                                None,
2251                            ));
2252                        }
2253                    }
2254                }
2255                _ => {}
2256            }
2257        }
2258
2259        (list_items, emphasis_spans)
2260    }
2261
2262    /// Fast unordered list parser - replaces regex for 5-10x speedup
2263    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2264    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2265    #[inline]
2266    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2267        let bytes = line.as_bytes();
2268        let mut i = 0;
2269
2270        // Skip leading whitespace
2271        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2272            i += 1;
2273        }
2274
2275        // Check for marker
2276        if i >= bytes.len() {
2277            return None;
2278        }
2279        let marker = bytes[i] as char;
2280        if marker != '-' && marker != '*' && marker != '+' {
2281            return None;
2282        }
2283        let marker_pos = i;
2284        i += 1;
2285
2286        // Collect spacing after marker (space or tab only)
2287        let spacing_start = i;
2288        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2289            i += 1;
2290        }
2291
2292        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2293    }
2294
2295    /// Fast ordered list parser - replaces regex for 5-10x speedup
2296    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2297    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2298    #[inline]
2299    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2300        let bytes = line.as_bytes();
2301        let mut i = 0;
2302
2303        // Skip leading whitespace
2304        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2305            i += 1;
2306        }
2307
2308        // Collect digits
2309        let number_start = i;
2310        while i < bytes.len() && bytes[i].is_ascii_digit() {
2311            i += 1;
2312        }
2313        if i == number_start {
2314            return None; // No digits found
2315        }
2316
2317        // Check for delimiter
2318        if i >= bytes.len() {
2319            return None;
2320        }
2321        let delimiter = bytes[i] as char;
2322        if delimiter != '.' && delimiter != ')' {
2323            return None;
2324        }
2325        let delimiter_pos = i;
2326        i += 1;
2327
2328        // Collect spacing after delimiter (space or tab only)
2329        let spacing_start = i;
2330        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2331            i += 1;
2332        }
2333
2334        Some((
2335            &line[..number_start],
2336            &line[number_start..delimiter_pos],
2337            delimiter,
2338            &line[spacing_start..i],
2339            &line[i..],
2340        ))
2341    }
2342
2343    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2344    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2345    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2346        let num_lines = line_offsets.len();
2347        let mut in_code_block = vec![false; num_lines];
2348
2349        // For each code block, mark all lines within it
2350        for &(start, end) in code_blocks {
2351            // Ensure we're at valid UTF-8 boundaries
2352            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2353                let mut boundary = start;
2354                while boundary > 0 && !content.is_char_boundary(boundary) {
2355                    boundary -= 1;
2356                }
2357                boundary
2358            } else {
2359                start
2360            };
2361
2362            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2363                let mut boundary = end;
2364                while boundary < content.len() && !content.is_char_boundary(boundary) {
2365                    boundary += 1;
2366                }
2367                boundary
2368            } else {
2369                end.min(content.len())
2370            };
2371
2372            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2373            // That function now has proper list context awareness (see code_block_utils.rs)
2374            // and correctly distinguishes between:
2375            // - Fenced code blocks (``` or ~~~)
2376            // - Indented code blocks at document level (4 spaces + blank line before)
2377            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2378            //
2379            // We no longer need to re-validate here. The original validation logic
2380            // was causing false positives by marking list continuation paragraphs as
2381            // code blocks when they have 4 spaces of indentation.
2382
2383            // Use binary search to find the first and last line indices
2384            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2385            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2386            //
2387            // Find the line that CONTAINS safe_start: the line with the largest
2388            // start offset that is <= safe_start. partition_point gives us the
2389            // first line that starts AFTER safe_start, so we subtract 1.
2390            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2391            let first_line = first_line_after.saturating_sub(1);
2392            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2393
2394            // Mark all lines in the range at once
2395            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2396                *flag = true;
2397            }
2398        }
2399
2400        in_code_block
2401    }
2402
2403    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2404    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2405    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2406        let content_lines: Vec<&str> = content.lines().collect();
2407        let num_lines = content_lines.len();
2408        let mut in_math_block = vec![false; num_lines];
2409
2410        let mut inside_math = false;
2411
2412        for (i, line) in content_lines.iter().enumerate() {
2413            // Skip lines that are in code blocks - math delimiters inside code are literal
2414            if code_block_map.get(i).copied().unwrap_or(false) {
2415                continue;
2416            }
2417
2418            let trimmed = line.trim();
2419
2420            // Check for math block delimiter ($$)
2421            // A line with just $$ toggles the math block state
2422            if trimmed == "$$" {
2423                if inside_math {
2424                    // Closing delimiter - this line is still part of the math block
2425                    in_math_block[i] = true;
2426                    inside_math = false;
2427                } else {
2428                    // Opening delimiter - this line starts the math block
2429                    in_math_block[i] = true;
2430                    inside_math = true;
2431                }
2432            } else if inside_math {
2433                // Content inside math block
2434                in_math_block[i] = true;
2435            }
2436        }
2437
2438        in_math_block
2439    }
2440
2441    /// Pre-compute basic line information (without headings/blockquotes)
2442    /// Also returns emphasis spans detected during the pulldown-cmark parse
2443    fn compute_basic_line_info(
2444        content: &str,
2445        line_offsets: &[usize],
2446        code_blocks: &[(usize, usize)],
2447        flavor: MarkdownFlavor,
2448        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2449        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2450        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2451    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2452        let content_lines: Vec<&str> = content.lines().collect();
2453        let mut lines = Vec::with_capacity(content_lines.len());
2454
2455        // Pre-compute which lines are in code blocks
2456        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2457
2458        // Pre-compute which lines are in math blocks ($$ ... $$)
2459        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2460
2461        // Detect front matter boundaries FIRST, before any other parsing
2462        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2463        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2464
2465        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2466        // (context-aware, eliminates false positives)
2467        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2468            content,
2469            line_offsets,
2470            flavor,
2471            front_matter_end,
2472            code_blocks,
2473        );
2474
2475        for (i, line) in content_lines.iter().enumerate() {
2476            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2477            let indent = line.len() - line.trim_start().len();
2478            // Compute visual indent with proper CommonMark tab expansion
2479            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2480
2481            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2482            let blockquote_parse = Self::parse_blockquote_prefix(line);
2483
2484            // For blank detection, consider blockquote context
2485            let is_blank = if let Some((_, content)) = blockquote_parse {
2486                // In blockquote context, check if content after prefix is blank
2487                content.trim().is_empty()
2488            } else {
2489                line.trim().is_empty()
2490            };
2491
2492            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2493            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2494
2495            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2496            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2497                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2498            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2499            // This ensures content after `-->` on the same line is not incorrectly skipped
2500            let line_end_offset = byte_offset + line.len();
2501            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2502                html_comment_ranges,
2503                byte_offset,
2504                line_end_offset,
2505            );
2506            // Use pulldown-cmark's list detection for context-aware parsing
2507            // This eliminates false positives on continuation lines (issue #253)
2508            let list_item =
2509                list_item_map
2510                    .get(&byte_offset)
2511                    .map(
2512                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2513                            marker: marker.clone(),
2514                            is_ordered: *is_ordered,
2515                            number: *number,
2516                            marker_column: *marker_column,
2517                            content_column: *content_column,
2518                        },
2519                    );
2520
2521            // Detect horizontal rules (only outside code blocks and frontmatter)
2522            // Uses CommonMark-compliant check including leading indentation validation
2523            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2524            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2525
2526            // Get math block status for this line
2527            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2528
2529            // Check if line is inside a Quarto div block
2530            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2531                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2532
2533            lines.push(LineInfo {
2534                byte_offset,
2535                byte_len: line.len(),
2536                indent,
2537                visual_indent,
2538                is_blank,
2539                in_code_block,
2540                in_front_matter,
2541                in_html_block: false, // Will be populated after line creation
2542                in_html_comment,
2543                list_item,
2544                heading: None,    // Will be populated in second pass for Setext headings
2545                blockquote: None, // Will be populated after line creation
2546                in_mkdocstrings,
2547                in_esm_block: false, // Will be populated after line creation for MDX files
2548                in_code_span_continuation: false, // Will be populated after code spans are parsed
2549                is_horizontal_rule: is_hr,
2550                in_math_block,
2551                in_quarto_div,
2552                in_jsx_expression: false,   // Will be populated for MDX files
2553                in_mdx_comment: false,      // Will be populated for MDX files
2554                in_jsx_component: false,    // Will be populated for MDX files
2555                in_jsx_fragment: false,     // Will be populated for MDX files
2556                in_admonition: false,       // Will be populated for MkDocs files
2557                in_content_tab: false,      // Will be populated for MkDocs files
2558                in_definition_list: false,  // Will be populated for MkDocs files
2559                in_obsidian_comment: false, // Will be populated for Obsidian files
2560            });
2561        }
2562
2563        (lines, emphasis_spans)
2564    }
2565
2566    /// Detect headings and blockquotes (called after HTML block detection)
2567    fn detect_headings_and_blockquotes(
2568        content: &str,
2569        lines: &mut [LineInfo],
2570        flavor: MarkdownFlavor,
2571        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2572        link_byte_ranges: &[(usize, usize)],
2573    ) {
2574        // Regex for heading detection
2575        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2576            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2577        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2578            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2579
2580        let content_lines: Vec<&str> = content.lines().collect();
2581
2582        // Detect front matter boundaries to skip those lines
2583        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2584
2585        // Detect headings (including Setext which needs look-ahead) and blockquotes
2586        for i in 0..lines.len() {
2587            let line = content_lines[i];
2588
2589            // Detect blockquotes FIRST, before any skip conditions.
2590            // A line can be both a blockquote AND contain a code block inside it.
2591            // We need to know about the blockquote marker regardless of code block status.
2592            // Skip only frontmatter lines - those are never blockquotes.
2593            if !(front_matter_end > 0 && i < front_matter_end)
2594                && let Some(bq) = parse_blockquote_detailed(line)
2595            {
2596                let nesting_level = bq.markers.len();
2597                let marker_column = bq.indent.len();
2598                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2599                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2600                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2601                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2602
2603                lines[i].blockquote = Some(BlockquoteInfo {
2604                    nesting_level,
2605                    indent: bq.indent.to_string(),
2606                    marker_column,
2607                    prefix,
2608                    content: bq.content.to_string(),
2609                    has_no_space_after_marker: has_no_space,
2610                    has_multiple_spaces_after_marker: has_multiple_spaces,
2611                    needs_md028_fix,
2612                });
2613
2614                // Update is_horizontal_rule for blockquote content
2615                // The original detection doesn't strip blockquote prefix, so we need to check here
2616                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2617                    lines[i].is_horizontal_rule = true;
2618                }
2619            }
2620
2621            // Now apply skip conditions for heading detection
2622            if lines[i].in_code_block {
2623                continue;
2624            }
2625
2626            // Skip lines in front matter
2627            if front_matter_end > 0 && i < front_matter_end {
2628                continue;
2629            }
2630
2631            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2632            if lines[i].in_html_block {
2633                continue;
2634            }
2635
2636            // Skip heading detection for blank lines
2637            if lines[i].is_blank {
2638                continue;
2639            }
2640
2641            // Check for ATX headings (but skip MkDocs snippet lines)
2642            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2643            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2644                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2645                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2646            } else {
2647                false
2648            };
2649
2650            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2651                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2652                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2653                    continue;
2654                }
2655                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2656                // This prevents false positives where `#fragment` is detected as a heading
2657                let line_offset = lines[i].byte_offset;
2658                if link_byte_ranges
2659                    .iter()
2660                    .any(|&(start, end)| line_offset > start && line_offset < end)
2661                {
2662                    continue;
2663                }
2664                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2665                let hashes = caps.get(2).map_or("", |m| m.as_str());
2666                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2667                let rest = caps.get(4).map_or("", |m| m.as_str());
2668
2669                let level = hashes.len() as u8;
2670                let marker_column = leading_spaces.len();
2671
2672                // Check for closing sequence, but handle custom IDs that might come after
2673                let (text, has_closing, closing_seq) = {
2674                    // First check if there's a custom ID at the end
2675                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2676                        // Check if this looks like a valid custom ID (ends with })
2677                        if rest[id_start..].trim_end().ends_with('}') {
2678                            // Split off the custom ID
2679                            (&rest[..id_start], &rest[id_start..])
2680                        } else {
2681                            (rest, "")
2682                        }
2683                    } else {
2684                        (rest, "")
2685                    };
2686
2687                    // Now look for closing hashes in the part before the custom ID
2688                    let trimmed_rest = rest_without_id.trim_end();
2689                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2690                        // Find the start of the hash sequence by walking backwards
2691                        // Use char_indices to get byte positions at char boundaries
2692                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2693
2694                        // Find which char index corresponds to last_hash_byte_pos
2695                        let last_hash_char_idx = char_positions
2696                            .iter()
2697                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2698
2699                        if let Some(mut char_idx) = last_hash_char_idx {
2700                            // Walk backwards to find start of hash sequence
2701                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2702                                char_idx -= 1;
2703                            }
2704
2705                            // Get the byte position of the start of hashes
2706                            let start_of_hashes = char_positions[char_idx].0;
2707
2708                            // Check if there's at least one space before the closing hashes
2709                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2710
2711                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2712                            let potential_closing = &trimmed_rest[start_of_hashes..];
2713                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2714
2715                            if is_all_hashes && has_space_before {
2716                                // This is a closing sequence
2717                                let closing_hashes = potential_closing.to_string();
2718                                // The text is everything before the closing hashes
2719                                // Don't include the custom ID here - it will be extracted later
2720                                let text_part = if !custom_id_part.is_empty() {
2721                                    // If we have a custom ID, append it back to get the full rest
2722                                    // This allows the extract_header_id function to handle it properly
2723                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2724                                } else {
2725                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2726                                };
2727                                (text_part, true, closing_hashes)
2728                            } else {
2729                                // Not a valid closing sequence, return the full content
2730                                (rest.to_string(), false, String::new())
2731                            }
2732                        } else {
2733                            // Couldn't find char boundary, return the full content
2734                            (rest.to_string(), false, String::new())
2735                        }
2736                    } else {
2737                        // No hashes found, return the full content
2738                        (rest.to_string(), false, String::new())
2739                    }
2740                };
2741
2742                let content_column = marker_column + hashes.len() + spaces_after.len();
2743
2744                // Extract custom header ID if present
2745                let raw_text = text.trim().to_string();
2746                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2747
2748                // If no custom ID was found on the header line, check the next line for standalone attr-list
2749                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2750                    let next_line = content_lines[i + 1];
2751                    if !lines[i + 1].in_code_block
2752                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2753                        && let Some(next_line_id) =
2754                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2755                    {
2756                        custom_id = Some(next_line_id);
2757                    }
2758                }
2759
2760                // ATX heading is "valid" for processing by heading rules if:
2761                // 1. Has space after # (CommonMark compliant): `# Heading`
2762                // 2. Is empty (just hashes): `#`
2763                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2764                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2765                //
2766                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2767                // - `#tag` - single # with lowercase (social hashtag)
2768                // - `#123` - single # with number (GitHub issue ref)
2769                let is_valid = !spaces_after.is_empty()
2770                    || rest.is_empty()
2771                    || level > 1
2772                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2773
2774                lines[i].heading = Some(HeadingInfo {
2775                    level,
2776                    style: HeadingStyle::ATX,
2777                    marker: hashes.to_string(),
2778                    marker_column,
2779                    content_column,
2780                    text: clean_text,
2781                    custom_id,
2782                    raw_text,
2783                    has_closing_sequence: has_closing,
2784                    closing_sequence: closing_seq,
2785                    is_valid,
2786                });
2787            }
2788            // Check for Setext headings (need to look at next line)
2789            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2790                let next_line = content_lines[i + 1];
2791                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2792                    // Skip if next line is front matter delimiter
2793                    if front_matter_end > 0 && i < front_matter_end {
2794                        continue;
2795                    }
2796
2797                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2798                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2799                    {
2800                        continue;
2801                    }
2802
2803                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2804                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2805                    let content_line = line.trim();
2806
2807                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2808                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2809                        continue;
2810                    }
2811
2812                    // Skip underscore thematic breaks (___)
2813                    if content_line.starts_with('_') {
2814                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2815                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2816                            continue;
2817                        }
2818                    }
2819
2820                    // Skip numbered lists (1. Item, 2. Item, etc.)
2821                    if let Some(first_char) = content_line.chars().next()
2822                        && first_char.is_ascii_digit()
2823                    {
2824                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2825                        if num_end < content_line.len() {
2826                            let next = content_line.chars().nth(num_end);
2827                            if next == Some('.') || next == Some(')') {
2828                                continue;
2829                            }
2830                        }
2831                    }
2832
2833                    // Skip ATX headings
2834                    if ATX_HEADING_REGEX.is_match(line) {
2835                        continue;
2836                    }
2837
2838                    // Skip blockquotes
2839                    if content_line.starts_with('>') {
2840                        continue;
2841                    }
2842
2843                    // Skip code fences
2844                    let trimmed_start = line.trim_start();
2845                    if trimmed_start.len() >= 3 {
2846                        let first_three: String = trimmed_start.chars().take(3).collect();
2847                        if first_three == "```" || first_three == "~~~" {
2848                            continue;
2849                        }
2850                    }
2851
2852                    // Skip HTML blocks
2853                    if content_line.starts_with('<') {
2854                        continue;
2855                    }
2856
2857                    let underline = next_line.trim();
2858
2859                    let level = if underline.starts_with('=') { 1 } else { 2 };
2860                    let style = if level == 1 {
2861                        HeadingStyle::Setext1
2862                    } else {
2863                        HeadingStyle::Setext2
2864                    };
2865
2866                    // Extract custom header ID if present
2867                    let raw_text = line.trim().to_string();
2868                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2869
2870                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2871                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2872                        let attr_line = content_lines[i + 2];
2873                        if !lines[i + 2].in_code_block
2874                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2875                            && let Some(attr_line_id) =
2876                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2877                        {
2878                            custom_id = Some(attr_line_id);
2879                        }
2880                    }
2881
2882                    lines[i].heading = Some(HeadingInfo {
2883                        level,
2884                        style,
2885                        marker: underline.to_string(),
2886                        marker_column: next_line.len() - next_line.trim_start().len(),
2887                        content_column: lines[i].indent,
2888                        text: clean_text,
2889                        custom_id,
2890                        raw_text,
2891                        has_closing_sequence: false,
2892                        closing_sequence: String::new(),
2893                        is_valid: true, // Setext headings are always valid
2894                    });
2895                }
2896            }
2897        }
2898    }
2899
2900    /// Detect HTML blocks in the content
2901    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2902        // HTML block elements that trigger block context
2903        // Includes HTML5 media, embedded content, and interactive elements
2904        const BLOCK_ELEMENTS: &[&str] = &[
2905            "address",
2906            "article",
2907            "aside",
2908            "audio",
2909            "blockquote",
2910            "canvas",
2911            "details",
2912            "dialog",
2913            "dd",
2914            "div",
2915            "dl",
2916            "dt",
2917            "embed",
2918            "fieldset",
2919            "figcaption",
2920            "figure",
2921            "footer",
2922            "form",
2923            "h1",
2924            "h2",
2925            "h3",
2926            "h4",
2927            "h5",
2928            "h6",
2929            "header",
2930            "hr",
2931            "iframe",
2932            "li",
2933            "main",
2934            "menu",
2935            "nav",
2936            "noscript",
2937            "object",
2938            "ol",
2939            "p",
2940            "picture",
2941            "pre",
2942            "script",
2943            "search",
2944            "section",
2945            "source",
2946            "style",
2947            "summary",
2948            "svg",
2949            "table",
2950            "tbody",
2951            "td",
2952            "template",
2953            "textarea",
2954            "tfoot",
2955            "th",
2956            "thead",
2957            "tr",
2958            "track",
2959            "ul",
2960            "video",
2961        ];
2962
2963        let mut i = 0;
2964        while i < lines.len() {
2965            // Skip if already in code block or front matter
2966            if lines[i].in_code_block || lines[i].in_front_matter {
2967                i += 1;
2968                continue;
2969            }
2970
2971            let trimmed = lines[i].content(content).trim_start();
2972
2973            // Check if line starts with an HTML tag
2974            if trimmed.starts_with('<') && trimmed.len() > 1 {
2975                // Extract tag name safely
2976                let after_bracket = &trimmed[1..];
2977                let is_closing = after_bracket.starts_with('/');
2978                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2979
2980                // Extract tag name (stop at space, >, /, or end of string)
2981                let tag_name = tag_start
2982                    .chars()
2983                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2984                    .collect::<String>()
2985                    .to_lowercase();
2986
2987                // Check if it's a block element
2988                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2989                    // Mark this line as in HTML block
2990                    lines[i].in_html_block = true;
2991
2992                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2993                    // This avoids complex nesting logic that might cause infinite loops
2994                    // Only search for closing tag on subsequent lines if the opening tag
2995                    // does NOT have its closing tag on the same line
2996                    if !is_closing {
2997                        let closing_tag = format!("</{tag_name}>");
2998
2999                        // Check if closing tag is on the same line as opening tag
3000                        // (e.g., <script src="..."></script> or <style>.class{}</style>)
3001                        let same_line_close = lines[i].content(content).contains(&closing_tag);
3002
3003                        // Only search subsequent lines if the tag isn't self-closed on this line
3004                        if !same_line_close {
3005                            // style and script tags can contain blank lines (CSS/JS formatting)
3006                            let allow_blank_lines = tag_name == "style" || tag_name == "script";
3007                            let mut j = i + 1;
3008                            let mut found_closing_tag = false;
3009                            while j < lines.len() && j < i + 100 {
3010                                // Limit search to 100 lines
3011                                // Stop at blank lines (except for style/script tags)
3012                                if !allow_blank_lines && lines[j].is_blank {
3013                                    break;
3014                                }
3015
3016                                lines[j].in_html_block = true;
3017
3018                                // Check if this line contains the closing tag
3019                                if lines[j].content(content).contains(&closing_tag) {
3020                                    found_closing_tag = true;
3021                                }
3022
3023                                // After finding closing tag, continue marking lines as
3024                                // in_html_block until blank line (per CommonMark spec)
3025                                if found_closing_tag {
3026                                    j += 1;
3027                                    // Continue marking subsequent lines until blank
3028                                    while j < lines.len() && j < i + 100 {
3029                                        if lines[j].is_blank {
3030                                            break;
3031                                        }
3032                                        lines[j].in_html_block = true;
3033                                        j += 1;
3034                                    }
3035                                    break;
3036                                }
3037                                j += 1;
3038                            }
3039                        }
3040                    }
3041                }
3042            }
3043
3044            i += 1;
3045        }
3046    }
3047
3048    /// Detect ESM import/export blocks anywhere in MDX files
3049    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
3050    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3051        // Only process MDX files
3052        if !flavor.supports_esm_blocks() {
3053            return;
3054        }
3055
3056        let mut in_multiline_import = false;
3057
3058        for line in lines.iter_mut() {
3059            // Skip code blocks, front matter, and HTML comments
3060            if line.in_code_block || line.in_front_matter || line.in_html_comment {
3061                in_multiline_import = false;
3062                continue;
3063            }
3064
3065            let line_content = line.content(content);
3066            let trimmed = line_content.trim();
3067
3068            // Handle continuation of multi-line import/export
3069            if in_multiline_import {
3070                line.in_esm_block = true;
3071                // Check if this line completes the statement
3072                // Multi-line import ends when we see the closing quote + optional semicolon
3073                if trimmed.ends_with('\'')
3074                    || trimmed.ends_with('"')
3075                    || trimmed.ends_with("';")
3076                    || trimmed.ends_with("\";")
3077                    || line_content.contains(';')
3078                {
3079                    in_multiline_import = false;
3080                }
3081                continue;
3082            }
3083
3084            // Skip blank lines
3085            if line.is_blank {
3086                continue;
3087            }
3088
3089            // Check if line starts with import or export
3090            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3091                line.in_esm_block = true;
3092
3093                // Determine if this is a complete single-line statement or starts a multi-line one
3094                // Multi-line imports look like:
3095                //   import {
3096                //     Foo,
3097                //     Bar
3098                //   } from 'module'
3099                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3100                let is_import = trimmed.starts_with("import ");
3101
3102                // Check for simple complete statements
3103                let is_complete =
3104                    // Ends with semicolon
3105                    trimmed.ends_with(';')
3106                    // import/export with from clause that ends with quote
3107                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3108                    // Simple export (export const/let/var/function/class without from)
3109                    || (!is_import && !trimmed.contains(" from ") && (
3110                        trimmed.starts_with("export const ")
3111                        || trimmed.starts_with("export let ")
3112                        || trimmed.starts_with("export var ")
3113                        || trimmed.starts_with("export function ")
3114                        || trimmed.starts_with("export class ")
3115                        || trimmed.starts_with("export default ")
3116                    ));
3117
3118                if !is_complete && is_import {
3119                    // Only imports can span multiple lines in the typical case
3120                    // Check if it looks like the start of a multi-line import
3121                    // e.g., "import {" or "import type {"
3122                    if trimmed.contains('{') && !trimmed.contains('}') {
3123                        in_multiline_import = true;
3124                    }
3125                }
3126            }
3127        }
3128    }
3129
3130    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3131    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3132    fn detect_jsx_and_mdx_comments(
3133        content: &str,
3134        lines: &mut [LineInfo],
3135        flavor: MarkdownFlavor,
3136        code_blocks: &[(usize, usize)],
3137    ) -> (ByteRanges, ByteRanges) {
3138        // Only process MDX files
3139        if !flavor.supports_jsx() {
3140            return (Vec::new(), Vec::new());
3141        }
3142
3143        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3144        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3145
3146        // Quick check - if no braces, no JSX expressions or MDX comments
3147        if !content.contains('{') {
3148            return (jsx_expression_ranges, mdx_comment_ranges);
3149        }
3150
3151        let bytes = content.as_bytes();
3152        let mut i = 0;
3153
3154        while i < bytes.len() {
3155            if bytes[i] == b'{' {
3156                // Check if we're in a code block
3157                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3158                    i += 1;
3159                    continue;
3160                }
3161
3162                let start = i;
3163
3164                // Check if it's an MDX comment: {/* ... */}
3165                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3166                    // Find the closing */}
3167                    let mut j = i + 3;
3168                    while j + 2 < bytes.len() {
3169                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3170                            let end = j + 3;
3171                            mdx_comment_ranges.push((start, end));
3172
3173                            // Mark lines as in MDX comment
3174                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3175                                line.in_mdx_comment = true;
3176                            });
3177
3178                            i = end;
3179                            break;
3180                        }
3181                        j += 1;
3182                    }
3183                    if j + 2 >= bytes.len() {
3184                        // Unclosed MDX comment - mark rest as comment
3185                        mdx_comment_ranges.push((start, bytes.len()));
3186                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3187                            line.in_mdx_comment = true;
3188                        });
3189                        break;
3190                    }
3191                } else {
3192                    // Regular JSX expression: { ... }
3193                    // Need to handle nested braces
3194                    let mut brace_depth = 1;
3195                    let mut j = i + 1;
3196                    let mut in_string = false;
3197                    let mut string_char = b'"';
3198
3199                    while j < bytes.len() && brace_depth > 0 {
3200                        let c = bytes[j];
3201
3202                        // Handle strings to avoid counting braces inside them
3203                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3204                            in_string = true;
3205                            string_char = c;
3206                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3207                            in_string = false;
3208                        } else if !in_string {
3209                            if c == b'{' {
3210                                brace_depth += 1;
3211                            } else if c == b'}' {
3212                                brace_depth -= 1;
3213                            }
3214                        }
3215                        j += 1;
3216                    }
3217
3218                    if brace_depth == 0 {
3219                        let end = j;
3220                        jsx_expression_ranges.push((start, end));
3221
3222                        // Mark lines as in JSX expression
3223                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3224                            line.in_jsx_expression = true;
3225                        });
3226
3227                        i = end;
3228                    } else {
3229                        i += 1;
3230                    }
3231                }
3232            } else {
3233                i += 1;
3234            }
3235        }
3236
3237        (jsx_expression_ranges, mdx_comment_ranges)
3238    }
3239
3240    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3241    /// and populate the corresponding fields in LineInfo
3242    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3243        if flavor != MarkdownFlavor::MkDocs {
3244            return;
3245        }
3246
3247        use crate::utils::mkdocs_admonitions;
3248        use crate::utils::mkdocs_definition_lists;
3249        use crate::utils::mkdocs_tabs;
3250
3251        let content_lines: Vec<&str> = content.lines().collect();
3252
3253        // Track admonition context
3254        let mut in_admonition = false;
3255        let mut admonition_indent = 0;
3256
3257        // Track tab context
3258        let mut in_tab = false;
3259        let mut tab_indent = 0;
3260
3261        // Track definition list context
3262        let mut in_definition = false;
3263
3264        for (i, line) in content_lines.iter().enumerate() {
3265            if i >= lines.len() {
3266                break;
3267            }
3268
3269            // Check for admonition markers first - even on lines marked as code blocks
3270            // Pulldown-cmark marks 4-space indented content as indented code blocks,
3271            // but in MkDocs this is admonition/tab content, not code.
3272            if mkdocs_admonitions::is_admonition_start(line) {
3273                in_admonition = true;
3274                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3275                lines[i].in_admonition = true;
3276            } else if in_admonition {
3277                // Check if still in admonition content
3278                if line.trim().is_empty() {
3279                    // Blank lines are part of admonitions
3280                    lines[i].in_admonition = true;
3281                    // Override code block detection for blank lines inside admonitions
3282                    lines[i].in_code_block = false;
3283                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3284                    lines[i].in_admonition = true;
3285                    // Override code block detection - this is admonition content, not code
3286                    lines[i].in_code_block = false;
3287                } else {
3288                    // End of admonition
3289                    in_admonition = false;
3290                    // Check if this line starts a new admonition
3291                    if mkdocs_admonitions::is_admonition_start(line) {
3292                        in_admonition = true;
3293                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3294                        lines[i].in_admonition = true;
3295                    }
3296                }
3297            }
3298
3299            // Check for tab markers - also before the code block skip
3300            // Tab content also uses 4-space indentation which pulldown-cmark treats as code
3301            if mkdocs_tabs::is_tab_marker(line) {
3302                in_tab = true;
3303                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3304                lines[i].in_content_tab = true;
3305            } else if in_tab {
3306                // Check if still in tab content
3307                if line.trim().is_empty() {
3308                    // Blank lines are part of tabs
3309                    lines[i].in_content_tab = true;
3310                    lines[i].in_code_block = false;
3311                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3312                    lines[i].in_content_tab = true;
3313                    // Override code block detection - this is tab content, not code
3314                    lines[i].in_code_block = false;
3315                } else {
3316                    // End of tab content
3317                    in_tab = false;
3318                    // Check if this line starts a new tab
3319                    if mkdocs_tabs::is_tab_marker(line) {
3320                        in_tab = true;
3321                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3322                        lines[i].in_content_tab = true;
3323                    }
3324                }
3325            }
3326
3327            // Skip remaining detection for lines in actual code blocks
3328            if lines[i].in_code_block {
3329                continue;
3330            }
3331
3332            // Check for definition list items
3333            if mkdocs_definition_lists::is_definition_line(line) {
3334                in_definition = true;
3335                lines[i].in_definition_list = true;
3336            } else if in_definition {
3337                // Check if continuation
3338                if mkdocs_definition_lists::is_definition_continuation(line) {
3339                    lines[i].in_definition_list = true;
3340                } else if line.trim().is_empty() {
3341                    // Blank line might continue definition
3342                    lines[i].in_definition_list = true;
3343                } else if mkdocs_definition_lists::could_be_term_line(line) {
3344                    // This could be a new term - check if followed by definition
3345                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3346                    {
3347                        lines[i].in_definition_list = true;
3348                    } else {
3349                        in_definition = false;
3350                    }
3351                } else {
3352                    in_definition = false;
3353                }
3354            } else if mkdocs_definition_lists::could_be_term_line(line) {
3355                // Check if this is a term followed by a definition
3356                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3357                    lines[i].in_definition_list = true;
3358                    in_definition = true;
3359                }
3360            }
3361        }
3362    }
3363
3364    /// Detect Obsidian comment blocks (%%...%%) in Obsidian flavor
3365    ///
3366    /// Obsidian comments use `%%` as delimiters:
3367    /// - Inline: `text %%hidden%% text`
3368    /// - Block: `%%\nmulti-line\n%%`
3369    ///
3370    /// Comments do NOT nest - the first `%%` after an opening `%%` closes the comment.
3371    /// Comments are NOT detected inside code blocks or HTML comments.
3372    ///
3373    /// Returns the computed comment ranges for use by rules that need position-level checking.
3374    fn detect_obsidian_comments(
3375        content: &str,
3376        lines: &mut [LineInfo],
3377        flavor: MarkdownFlavor,
3378        code_span_ranges: &[(usize, usize)],
3379    ) -> Vec<(usize, usize)> {
3380        // Only process Obsidian files
3381        if flavor != MarkdownFlavor::Obsidian {
3382            return Vec::new();
3383        }
3384
3385        // Compute Obsidian comment ranges (byte ranges)
3386        let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3387
3388        // Mark lines that fall within comment ranges
3389        for range in &comment_ranges {
3390            for line in lines.iter_mut() {
3391                // Skip lines in code blocks or HTML comments - they take precedence
3392                if line.in_code_block || line.in_html_comment {
3393                    continue;
3394                }
3395
3396                let line_start = line.byte_offset;
3397                let line_end = line.byte_offset + line.byte_len;
3398
3399                // Check if this line is entirely within a comment
3400                // A line is "in" a comment if it starts within or after the comment start
3401                // AND ends within or before the comment end
3402                if line_start >= range.0 && line_end <= range.1 {
3403                    line.in_obsidian_comment = true;
3404                } else if line_start < range.1 && line_end > range.0 {
3405                    // Line partially overlaps with comment - check if the overlap is significant
3406                    // For inline comments on a line, we still mark the line if any part is in comment
3407                    // However, for the filtered_lines API, we only skip lines entirely within comments
3408                    // This matches the behavior of HTML comments
3409
3410                    // Check if the ENTIRE line content (excluding leading/trailing whitespace)
3411                    // is within the comment range
3412                    let line_content_start = line_start;
3413                    let line_content_end = line_end;
3414
3415                    if line_content_start >= range.0 && line_content_end <= range.1 {
3416                        line.in_obsidian_comment = true;
3417                    }
3418                }
3419            }
3420        }
3421
3422        comment_ranges
3423    }
3424
3425    /// Compute byte ranges for all Obsidian comments in the content
3426    ///
3427    /// Returns a vector of (start, end) byte offset pairs for each comment.
3428    /// Comments do not nest - first `%%` after an opening `%%` closes it.
3429    fn compute_obsidian_comment_ranges(
3430        content: &str,
3431        lines: &[LineInfo],
3432        code_span_ranges: &[(usize, usize)],
3433    ) -> Vec<(usize, usize)> {
3434        let mut ranges = Vec::new();
3435
3436        // Quick check - if no %% at all, no comments
3437        if !content.contains("%%") {
3438            return ranges;
3439        }
3440
3441        // Build skip ranges for code blocks, HTML comments, and inline code spans
3442        // to avoid detecting %% inside those regions.
3443        let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3444        for line in lines {
3445            if line.in_code_block || line.in_html_comment {
3446                skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3447            }
3448        }
3449        skip_ranges.extend(code_span_ranges.iter().copied());
3450
3451        if !skip_ranges.is_empty() {
3452            // Sort and merge overlapping ranges for efficient scanning
3453            skip_ranges.sort_by_key(|(start, _)| *start);
3454            let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3455            for (start, end) in skip_ranges {
3456                if let Some((_, last_end)) = merged.last_mut()
3457                    && start <= *last_end
3458                {
3459                    *last_end = (*last_end).max(end);
3460                    continue;
3461                }
3462                merged.push((start, end));
3463            }
3464            skip_ranges = merged;
3465        }
3466
3467        let content_bytes = content.as_bytes();
3468        let len = content.len();
3469        let mut i = 0;
3470        let mut in_comment = false;
3471        let mut comment_start = 0;
3472        let mut skip_idx = 0;
3473
3474        while i < len.saturating_sub(1) {
3475            // Fast-skip any ranges we should ignore (code blocks, HTML comments, code spans)
3476            if skip_idx < skip_ranges.len() {
3477                let (skip_start, skip_end) = skip_ranges[skip_idx];
3478                if i >= skip_end {
3479                    skip_idx += 1;
3480                    continue;
3481                }
3482                if i >= skip_start {
3483                    i = skip_end;
3484                    continue;
3485                }
3486            }
3487
3488            // Check for %%
3489            if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3490                if !in_comment {
3491                    // Opening %%
3492                    in_comment = true;
3493                    comment_start = i;
3494                    i += 2;
3495                } else {
3496                    // Closing %%
3497                    let comment_end = i + 2;
3498                    ranges.push((comment_start, comment_end));
3499                    in_comment = false;
3500                    i += 2;
3501                }
3502            } else {
3503                i += 1;
3504            }
3505        }
3506
3507        // Handle unclosed comment - extends to end of document
3508        if in_comment {
3509            ranges.push((comment_start, len));
3510        }
3511
3512        ranges
3513    }
3514
3515    /// Helper to mark lines within a byte range
3516    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3517    where
3518        F: FnMut(&mut LineInfo),
3519    {
3520        // Find lines that overlap with the range
3521        for line in lines.iter_mut() {
3522            let line_start = line.byte_offset;
3523            let line_end = line.byte_offset + line.byte_len;
3524
3525            // Check if this line overlaps with the range
3526            if line_start < end && line_end > start {
3527                f(line);
3528            }
3529        }
3530
3531        // Silence unused warning for content (needed for signature consistency)
3532        let _ = content;
3533    }
3534
3535    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3536    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3537        // Quick check - if no backticks, no code spans
3538        if !content.contains('`') {
3539            return Vec::new();
3540        }
3541
3542        // Use pulldown-cmark's streaming parser with byte offsets
3543        let parser = Parser::new(content).into_offset_iter();
3544        let mut ranges = Vec::new();
3545
3546        for (event, range) in parser {
3547            if let Event::Code(_) = event {
3548                ranges.push((range.start, range.end));
3549            }
3550        }
3551
3552        Self::build_code_spans_from_ranges(content, lines, &ranges)
3553    }
3554
3555    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3556        let mut code_spans = Vec::new();
3557        if ranges.is_empty() {
3558            return code_spans;
3559        }
3560
3561        for &(start_pos, end_pos) in ranges {
3562            // The range includes the backticks, extract the actual content
3563            let full_span = &content[start_pos..end_pos];
3564            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3565
3566            // Extract content between backticks, preserving spaces
3567            let content_start = start_pos + backtick_count;
3568            let content_end = end_pos - backtick_count;
3569            let span_content = if content_start < content_end {
3570                content[content_start..content_end].to_string()
3571            } else {
3572                String::new()
3573            };
3574
3575            // Use binary search to find line number - O(log n) instead of O(n)
3576            // Find the rightmost line whose byte_offset <= start_pos
3577            let line_idx = lines
3578                .partition_point(|line| line.byte_offset <= start_pos)
3579                .saturating_sub(1);
3580            let line_num = line_idx + 1;
3581            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3582
3583            // Find end column using binary search
3584            let end_line_idx = lines
3585                .partition_point(|line| line.byte_offset <= end_pos)
3586                .saturating_sub(1);
3587            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3588
3589            // Convert byte offsets to character positions for correct Unicode handling
3590            // This ensures consistency with warning.column which uses character positions
3591            let line_content = lines[line_idx].content(content);
3592            let col_start = if byte_col_start <= line_content.len() {
3593                line_content[..byte_col_start].chars().count()
3594            } else {
3595                line_content.chars().count()
3596            };
3597
3598            let end_line_content = lines[end_line_idx].content(content);
3599            let col_end = if byte_col_end <= end_line_content.len() {
3600                end_line_content[..byte_col_end].chars().count()
3601            } else {
3602                end_line_content.chars().count()
3603            };
3604
3605            code_spans.push(CodeSpan {
3606                line: line_num,
3607                end_line: end_line_idx + 1,
3608                start_col: col_start,
3609                end_col: col_end,
3610                byte_offset: start_pos,
3611                byte_end: end_pos,
3612                backtick_count,
3613                content: span_content,
3614            });
3615        }
3616
3617        // Sort by position to ensure consistent ordering
3618        code_spans.sort_by_key(|span| span.byte_offset);
3619
3620        code_spans
3621    }
3622
3623    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3624    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3625        let mut math_spans = Vec::new();
3626
3627        // Quick check - if no $ signs, no math spans
3628        if !content.contains('$') {
3629            return math_spans;
3630        }
3631
3632        // Use pulldown-cmark with ENABLE_MATH option
3633        let mut options = Options::empty();
3634        options.insert(Options::ENABLE_MATH);
3635        let parser = Parser::new_ext(content, options).into_offset_iter();
3636
3637        for (event, range) in parser {
3638            let (is_display, math_content) = match &event {
3639                Event::InlineMath(text) => (false, text.as_ref()),
3640                Event::DisplayMath(text) => (true, text.as_ref()),
3641                _ => continue,
3642            };
3643
3644            let start_pos = range.start;
3645            let end_pos = range.end;
3646
3647            // Use binary search to find line number - O(log n) instead of O(n)
3648            let line_idx = lines
3649                .partition_point(|line| line.byte_offset <= start_pos)
3650                .saturating_sub(1);
3651            let line_num = line_idx + 1;
3652            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3653
3654            // Find end column using binary search
3655            let end_line_idx = lines
3656                .partition_point(|line| line.byte_offset <= end_pos)
3657                .saturating_sub(1);
3658            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3659
3660            // Convert byte offsets to character positions for correct Unicode handling
3661            let line_content = lines[line_idx].content(content);
3662            let col_start = if byte_col_start <= line_content.len() {
3663                line_content[..byte_col_start].chars().count()
3664            } else {
3665                line_content.chars().count()
3666            };
3667
3668            let end_line_content = lines[end_line_idx].content(content);
3669            let col_end = if byte_col_end <= end_line_content.len() {
3670                end_line_content[..byte_col_end].chars().count()
3671            } else {
3672                end_line_content.chars().count()
3673            };
3674
3675            math_spans.push(MathSpan {
3676                line: line_num,
3677                end_line: end_line_idx + 1,
3678                start_col: col_start,
3679                end_col: col_end,
3680                byte_offset: start_pos,
3681                byte_end: end_pos,
3682                is_display,
3683                content: math_content.to_string(),
3684            });
3685        }
3686
3687        // Sort by position to ensure consistent ordering
3688        math_spans.sort_by_key(|span| span.byte_offset);
3689
3690        math_spans
3691    }
3692
3693    /// Parse all list blocks in the content (legacy line-by-line approach)
3694    ///
3695    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3696    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3697    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3698    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3699    ///   treated as list continuation (based on the list marker width)
3700    ///
3701    /// When a new list item is encountered, we check if list-breaking content was seen
3702    /// since the last item. If so, we start a new list block.
3703    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3704        // Minimum indentation for unordered list continuation per CommonMark spec
3705        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3706
3707        /// Initialize or reset the forward-scanning tracking state.
3708        /// This helper eliminates code duplication across three initialization sites.
3709        #[inline]
3710        fn reset_tracking_state(
3711            list_item: &ListItemInfo,
3712            has_list_breaking_content: &mut bool,
3713            min_continuation: &mut usize,
3714        ) {
3715            *has_list_breaking_content = false;
3716            let marker_width = if list_item.is_ordered {
3717                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3718            } else {
3719                list_item.marker.len()
3720            };
3721            *min_continuation = if list_item.is_ordered {
3722                marker_width
3723            } else {
3724                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3725            };
3726        }
3727
3728        // Pre-size based on lines that could be list items
3729        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3730        let mut current_block: Option<ListBlock> = None;
3731        let mut last_list_item_line = 0;
3732        let mut current_indent_level = 0;
3733        let mut last_marker_width = 0;
3734
3735        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3736        let mut has_list_breaking_content_since_last_item = false;
3737        let mut min_continuation_for_tracking = 0;
3738
3739        for (line_idx, line_info) in lines.iter().enumerate() {
3740            let line_num = line_idx + 1;
3741
3742            // Enhanced code block handling using Design #3's context analysis
3743            if line_info.in_code_block {
3744                if let Some(ref mut block) = current_block {
3745                    // Calculate minimum indentation for list continuation
3746                    let min_continuation_indent =
3747                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3748
3749                    // Analyze code block context using the three-tier classification
3750                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3751
3752                    match context {
3753                        CodeBlockContext::Indented => {
3754                            // Code block is properly indented - continues the list
3755                            block.end_line = line_num;
3756                            continue;
3757                        }
3758                        CodeBlockContext::Standalone => {
3759                            // Code block separates lists - end current block
3760                            let completed_block = current_block.take().unwrap();
3761                            list_blocks.push(completed_block);
3762                            continue;
3763                        }
3764                        CodeBlockContext::Adjacent => {
3765                            // Edge case - use conservative behavior (continue list)
3766                            block.end_line = line_num;
3767                            continue;
3768                        }
3769                    }
3770                } else {
3771                    // No current list block - skip code block lines
3772                    continue;
3773                }
3774            }
3775
3776            // Extract blockquote prefix if any
3777            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3778                caps.get(0).unwrap().as_str().to_string()
3779            } else {
3780                String::new()
3781            };
3782
3783            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3784            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3785            if let Some(ref block) = current_block
3786                && line_info.list_item.is_none()
3787                && !line_info.is_blank
3788                && !line_info.in_code_span_continuation
3789            {
3790                let line_content = line_info.content(content).trim();
3791
3792                // Check for structural separators that break lists
3793                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3794                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3795                // as they indicate improper indentation rather than lazy continuation.
3796                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3797
3798                // Check if blockquote context changes (different prefix than current block)
3799                // Lines within the SAME blockquote context don't break lists
3800                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3801
3802                let breaks_list = line_info.heading.is_some()
3803                    || line_content.starts_with("---")
3804                    || line_content.starts_with("***")
3805                    || line_content.starts_with("___")
3806                    || crate::utils::skip_context::is_table_line(line_content)
3807                    || blockquote_prefix_changes
3808                    || (line_info.indent > 0
3809                        && line_info.indent < min_continuation_for_tracking
3810                        && !is_lazy_continuation);
3811
3812                if breaks_list {
3813                    has_list_breaking_content_since_last_item = true;
3814                }
3815            }
3816
3817            // If this line is a code span continuation within an active list block,
3818            // extend the block's end_line to include this line (maintains list continuity)
3819            if line_info.in_code_span_continuation
3820                && line_info.list_item.is_none()
3821                && let Some(ref mut block) = current_block
3822            {
3823                block.end_line = line_num;
3824            }
3825
3826            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3827            // properly indented lines within the list). This ensures the workaround at line 2448
3828            // works correctly when there are multiple continuation lines before a nested list item.
3829            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3830            // For blockquote lines, compute effective indent after stripping the prefix
3831            let effective_continuation_indent = if let Some(ref block) = current_block {
3832                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3833                let line_content = line_info.content(content);
3834                let line_bq_level = line_content
3835                    .chars()
3836                    .take_while(|c| *c == '>' || c.is_whitespace())
3837                    .filter(|&c| c == '>')
3838                    .count();
3839                if line_bq_level > 0 && line_bq_level == block_bq_level {
3840                    // Compute indent after blockquote markers
3841                    let mut pos = 0;
3842                    let mut found_markers = 0;
3843                    for c in line_content.chars() {
3844                        pos += c.len_utf8();
3845                        if c == '>' {
3846                            found_markers += 1;
3847                            if found_markers == line_bq_level {
3848                                if line_content.get(pos..pos + 1) == Some(" ") {
3849                                    pos += 1;
3850                                }
3851                                break;
3852                            }
3853                        }
3854                    }
3855                    let after_bq = &line_content[pos..];
3856                    after_bq.len() - after_bq.trim_start().len()
3857                } else {
3858                    line_info.indent
3859                }
3860            } else {
3861                line_info.indent
3862            };
3863            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3864                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3865                if block_bq_level > 0 {
3866                    if block.is_ordered { last_marker_width } else { 2 }
3867                } else {
3868                    min_continuation_for_tracking
3869                }
3870            } else {
3871                min_continuation_for_tracking
3872            };
3873            // Lazy continuation allows unindented text to continue a list item,
3874            // but NOT structural elements like headings, code fences, or horizontal rules
3875            let is_structural_element = line_info.heading.is_some()
3876                || line_info.content(content).trim().starts_with("```")
3877                || line_info.content(content).trim().starts_with("~~~");
3878            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3879                || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3880
3881            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3882                eprintln!(
3883                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3884                    line_num,
3885                    effective_continuation_indent,
3886                    adjusted_min_continuation_for_tracking,
3887                    is_valid_continuation,
3888                    line_info.in_code_span_continuation,
3889                    line_info.in_code_block,
3890                    current_block.is_some()
3891                );
3892            }
3893
3894            if !line_info.in_code_span_continuation
3895                && line_info.list_item.is_none()
3896                && !line_info.is_blank
3897                && !line_info.in_code_block
3898                && is_valid_continuation
3899                && let Some(ref mut block) = current_block
3900            {
3901                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3902                    eprintln!(
3903                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3904                        line_num, block.end_line, line_num
3905                    );
3906                }
3907                block.end_line = line_num;
3908            }
3909
3910            // Check if this line is a list item
3911            if let Some(list_item) = &line_info.list_item {
3912                // Calculate nesting level based on indentation
3913                let item_indent = list_item.marker_column;
3914                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3915
3916                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3917                    eprintln!(
3918                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3919                        line_num, list_item.marker, item_indent
3920                    );
3921                }
3922
3923                if let Some(ref mut block) = current_block {
3924                    // Check if this continues the current block
3925                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3926                    // or a continuation at the same or lower level
3927                    let is_nested = nesting > block.nesting_level;
3928                    let same_type =
3929                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3930                    let same_context = block.blockquote_prefix == blockquote_prefix;
3931                    // Allow one blank line after last item, or lines immediately after block content
3932                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3933
3934                    // For unordered lists, also check marker consistency
3935                    let marker_compatible =
3936                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3937
3938                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3939                    // This eliminates the quadratic bottleneck from issue #148
3940                    let has_non_list_content = has_list_breaking_content_since_last_item;
3941
3942                    // A list continues if:
3943                    // 1. It's a nested item (indented more than the parent), OR
3944                    // 2. It's the same type at the same level with reasonable distance
3945                    let mut continues_list = if is_nested {
3946                        // Nested items always continue the list if they're in the same context
3947                        same_context && reasonable_distance && !has_non_list_content
3948                    } else {
3949                        // Same-level items need to match type and markers
3950                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
3951                    };
3952
3953                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3954                        eprintln!(
3955                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
3956                            line_num,
3957                            continues_list,
3958                            is_nested,
3959                            same_type,
3960                            same_context,
3961                            reasonable_distance,
3962                            marker_compatible,
3963                            has_non_list_content,
3964                            last_list_item_line,
3965                            block.end_line
3966                        );
3967                    }
3968
3969                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
3970                    // This handles edge cases where content patterns might otherwise split lists incorrectly
3971                    // Apply for: nested items (different types OK), OR same-level same-type items
3972                    if !continues_list
3973                        && (is_nested || same_type)
3974                        && reasonable_distance
3975                        && line_num > 0
3976                        && block.end_line == line_num - 1
3977                    {
3978                        // Check if the previous line was a list item or a continuation of a list item
3979                        // (including lazy continuation lines)
3980                        if block.item_lines.contains(&(line_num - 1)) {
3981                            // They're consecutive list items - force them to be in the same list
3982                            continues_list = true;
3983                        } else {
3984                            // Previous line is a continuation line within this block
3985                            // (e.g., lazy continuation with indent=0)
3986                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
3987                            continues_list = true;
3988                        }
3989                    }
3990
3991                    if continues_list {
3992                        // Extend current block
3993                        block.end_line = line_num;
3994                        block.item_lines.push(line_num);
3995
3996                        // Update max marker width
3997                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
3998                            list_item.marker.len() + 1
3999                        } else {
4000                            list_item.marker.len()
4001                        });
4002
4003                        // Update marker consistency for unordered lists
4004                        if !block.is_ordered
4005                            && block.marker.is_some()
4006                            && block.marker.as_ref() != Some(&list_item.marker)
4007                        {
4008                            // Mixed markers, clear the marker field
4009                            block.marker = None;
4010                        }
4011
4012                        // Reset tracked state for issue #148 optimization
4013                        reset_tracking_state(
4014                            list_item,
4015                            &mut has_list_breaking_content_since_last_item,
4016                            &mut min_continuation_for_tracking,
4017                        );
4018                    } else {
4019                        // End current block and start a new one
4020                        // When a different list type starts AT THE SAME LEVEL (not nested),
4021                        // trim back lazy continuation lines (they become part of the gap, not the list)
4022                        // For nested items, different types are fine - they're sub-lists
4023                        if !same_type
4024                            && !is_nested
4025                            && let Some(&last_item) = block.item_lines.last()
4026                        {
4027                            block.end_line = last_item;
4028                        }
4029
4030                        list_blocks.push(block.clone());
4031
4032                        *block = ListBlock {
4033                            start_line: line_num,
4034                            end_line: line_num,
4035                            is_ordered: list_item.is_ordered,
4036                            marker: if list_item.is_ordered {
4037                                None
4038                            } else {
4039                                Some(list_item.marker.clone())
4040                            },
4041                            blockquote_prefix: blockquote_prefix.clone(),
4042                            item_lines: vec![line_num],
4043                            nesting_level: nesting,
4044                            max_marker_width: if list_item.is_ordered {
4045                                list_item.marker.len() + 1
4046                            } else {
4047                                list_item.marker.len()
4048                            },
4049                        };
4050
4051                        // Initialize tracked state for new block (issue #148 optimization)
4052                        reset_tracking_state(
4053                            list_item,
4054                            &mut has_list_breaking_content_since_last_item,
4055                            &mut min_continuation_for_tracking,
4056                        );
4057                    }
4058                } else {
4059                    // Start a new block
4060                    current_block = Some(ListBlock {
4061                        start_line: line_num,
4062                        end_line: line_num,
4063                        is_ordered: list_item.is_ordered,
4064                        marker: if list_item.is_ordered {
4065                            None
4066                        } else {
4067                            Some(list_item.marker.clone())
4068                        },
4069                        blockquote_prefix,
4070                        item_lines: vec![line_num],
4071                        nesting_level: nesting,
4072                        max_marker_width: list_item.marker.len(),
4073                    });
4074
4075                    // Initialize tracked state for new block (issue #148 optimization)
4076                    reset_tracking_state(
4077                        list_item,
4078                        &mut has_list_breaking_content_since_last_item,
4079                        &mut min_continuation_for_tracking,
4080                    );
4081                }
4082
4083                last_list_item_line = line_num;
4084                current_indent_level = item_indent;
4085                last_marker_width = if list_item.is_ordered {
4086                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
4087                } else {
4088                    list_item.marker.len()
4089                };
4090            } else if let Some(ref mut block) = current_block {
4091                // Not a list item - check if it continues the current block
4092                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4093                    eprintln!(
4094                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4095                        line_num, line_info.is_blank
4096                    );
4097                }
4098
4099                // For MD032 compatibility, we use a simple approach:
4100                // - Indented lines continue the list
4101                // - Blank lines followed by indented content continue the list
4102                // - Everything else ends the list
4103
4104                // Check if the last line in the list block ended with a backslash (hard line break)
4105                // This handles cases where list items use backslash for hard line breaks
4106                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4107                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4108                } else {
4109                    false
4110                };
4111
4112                // Calculate minimum indentation for list continuation
4113                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
4114                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
4115                let min_continuation_indent = if block.is_ordered {
4116                    current_indent_level + last_marker_width
4117                } else {
4118                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
4119                };
4120
4121                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4122                    // Indented line or backslash continuation continues the list
4123                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4124                        eprintln!(
4125                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4126                            line_num, line_info.indent, min_continuation_indent
4127                        );
4128                    }
4129                    block.end_line = line_num;
4130                } else if line_info.is_blank {
4131                    // Blank line - check if it's internal to the list or ending it
4132                    // We only include blank lines that are followed by more list content
4133                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4134                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4135                    }
4136                    let mut check_idx = line_idx + 1;
4137                    let mut found_continuation = false;
4138
4139                    // Skip additional blank lines
4140                    while check_idx < lines.len() && lines[check_idx].is_blank {
4141                        check_idx += 1;
4142                    }
4143
4144                    if check_idx < lines.len() {
4145                        let next_line = &lines[check_idx];
4146                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
4147                        let next_content = next_line.content(content);
4148                        // Use blockquote level (count of >) to compare, not the full prefix
4149                        // This avoids issues where the regex captures extra whitespace
4150                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4151                        let next_bq_level_for_indent = next_content
4152                            .chars()
4153                            .take_while(|c| *c == '>' || c.is_whitespace())
4154                            .filter(|&c| c == '>')
4155                            .count();
4156                        let effective_indent =
4157                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4158                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
4159                                // Find position after ">" and one space
4160                                let mut pos = 0;
4161                                let mut found_markers = 0;
4162                                for c in next_content.chars() {
4163                                    pos += c.len_utf8();
4164                                    if c == '>' {
4165                                        found_markers += 1;
4166                                        if found_markers == next_bq_level_for_indent {
4167                                            // Skip optional space after last >
4168                                            if next_content.get(pos..pos + 1) == Some(" ") {
4169                                                pos += 1;
4170                                            }
4171                                            break;
4172                                        }
4173                                    }
4174                                }
4175                                let after_blockquote_marker = &next_content[pos..];
4176                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4177                            } else {
4178                                next_line.indent
4179                            };
4180                        // Also adjust min_continuation_indent for blockquote lists
4181                        // The marker_column includes blockquote prefix, so subtract it
4182                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4183                            // For blockquote lists, the continuation is relative to blockquote content
4184                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
4185                            if block.is_ordered { last_marker_width } else { 2 }
4186                        } else {
4187                            min_continuation_indent
4188                        };
4189                        // Check if followed by indented content (list continuation)
4190                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4191                            eprintln!(
4192                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4193                                line_num,
4194                                check_idx + 1,
4195                                effective_indent,
4196                                adjusted_min_continuation,
4197                                next_line.list_item.is_some(),
4198                                next_line.in_code_block
4199                            );
4200                        }
4201                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4202                            found_continuation = true;
4203                        }
4204                        // Check if followed by another list item at the same level
4205                        else if !next_line.in_code_block
4206                            && next_line.list_item.is_some()
4207                            && let Some(item) = &next_line.list_item
4208                        {
4209                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4210                                .find(next_line.content(content))
4211                                .map_or(String::new(), |m| m.as_str().to_string());
4212                            if item.marker_column == current_indent_level
4213                                && item.is_ordered == block.is_ordered
4214                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4215                            {
4216                                // Check if there was meaningful content between the list items (unused now)
4217                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
4218                                // Pre-compute block's blockquote level for use in closures
4219                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4220                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4221                                    if let Some(between_line) = lines.get(idx) {
4222                                        let between_content = between_line.content(content);
4223                                        let trimmed = between_content.trim();
4224                                        // Skip empty lines
4225                                        if trimmed.is_empty() {
4226                                            return false;
4227                                        }
4228                                        // Check for meaningful content
4229                                        let line_indent = between_content.len() - between_content.trim_start().len();
4230
4231                                        // Check if blockquote level changed (not just if line starts with ">")
4232                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4233                                            .find(between_content)
4234                                            .map_or(String::new(), |m| m.as_str().to_string());
4235                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4236                                        let blockquote_level_changed =
4237                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4238
4239                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4240                                        if trimmed.starts_with("```")
4241                                            || trimmed.starts_with("~~~")
4242                                            || trimmed.starts_with("---")
4243                                            || trimmed.starts_with("***")
4244                                            || trimmed.starts_with("___")
4245                                            || blockquote_level_changed
4246                                            || crate::utils::skip_context::is_table_line(trimmed)
4247                                            || between_line.heading.is_some()
4248                                        {
4249                                            return true; // These are structural separators - meaningful content that breaks lists
4250                                        }
4251
4252                                        // Only properly indented content continues the list
4253                                        line_indent >= min_continuation_indent
4254                                    } else {
4255                                        false
4256                                    }
4257                                });
4258
4259                                if block.is_ordered {
4260                                    // For ordered lists: don't continue if there are structural separators
4261                                    // Check if there are structural separators between the list items
4262                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4263                                        if let Some(between_line) = lines.get(idx) {
4264                                            let between_content = between_line.content(content);
4265                                            let trimmed = between_content.trim();
4266                                            if trimmed.is_empty() {
4267                                                return false;
4268                                            }
4269                                            // Check if blockquote level changed (not just if line starts with ">")
4270                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4271                                                .find(between_content)
4272                                                .map_or(String::new(), |m| m.as_str().to_string());
4273                                            let between_bq_level =
4274                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4275                                            let blockquote_level_changed =
4276                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4277                                            // Check for structural separators that break lists
4278                                            trimmed.starts_with("```")
4279                                                || trimmed.starts_with("~~~")
4280                                                || trimmed.starts_with("---")
4281                                                || trimmed.starts_with("***")
4282                                                || trimmed.starts_with("___")
4283                                                || blockquote_level_changed
4284                                                || crate::utils::skip_context::is_table_line(trimmed)
4285                                                || between_line.heading.is_some()
4286                                        } else {
4287                                            false
4288                                        }
4289                                    });
4290                                    found_continuation = !has_structural_separators;
4291                                } else {
4292                                    // For unordered lists: also check for structural separators
4293                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4294                                        if let Some(between_line) = lines.get(idx) {
4295                                            let between_content = between_line.content(content);
4296                                            let trimmed = between_content.trim();
4297                                            if trimmed.is_empty() {
4298                                                return false;
4299                                            }
4300                                            // Check if blockquote level changed (not just if line starts with ">")
4301                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4302                                                .find(between_content)
4303                                                .map_or(String::new(), |m| m.as_str().to_string());
4304                                            let between_bq_level =
4305                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4306                                            let blockquote_level_changed =
4307                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4308                                            // Check for structural separators that break lists
4309                                            trimmed.starts_with("```")
4310                                                || trimmed.starts_with("~~~")
4311                                                || trimmed.starts_with("---")
4312                                                || trimmed.starts_with("***")
4313                                                || trimmed.starts_with("___")
4314                                                || blockquote_level_changed
4315                                                || crate::utils::skip_context::is_table_line(trimmed)
4316                                                || between_line.heading.is_some()
4317                                        } else {
4318                                            false
4319                                        }
4320                                    });
4321                                    found_continuation = !has_structural_separators;
4322                                }
4323                            }
4324                        }
4325                    }
4326
4327                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4328                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4329                    }
4330                    if found_continuation {
4331                        // Include the blank line in the block
4332                        block.end_line = line_num;
4333                    } else {
4334                        // Blank line ends the list - don't include it
4335                        list_blocks.push(block.clone());
4336                        current_block = None;
4337                    }
4338                } else {
4339                    // Check for lazy continuation - non-indented line immediately after a list item
4340                    // But only if the line has sufficient indentation for the list type
4341                    let min_required_indent = if block.is_ordered {
4342                        current_indent_level + last_marker_width
4343                    } else {
4344                        current_indent_level + 2
4345                    };
4346
4347                    // For lazy continuation to apply, the line must either:
4348                    // 1. Have no indentation (true lazy continuation)
4349                    // 2. Have sufficient indentation for the list type
4350                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4351                    let line_content = line_info.content(content).trim();
4352
4353                    // Check for table-like patterns
4354                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4355
4356                    // Check if blockquote level changed (not just if line starts with ">")
4357                    // Lines within the same blockquote level are NOT structural separators
4358                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4359                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4360                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4361
4362                    let is_structural_separator = line_info.heading.is_some()
4363                        || line_content.starts_with("```")
4364                        || line_content.starts_with("~~~")
4365                        || line_content.starts_with("---")
4366                        || line_content.starts_with("***")
4367                        || line_content.starts_with("___")
4368                        || blockquote_level_changed
4369                        || looks_like_table;
4370
4371                    // Allow lazy continuation if we're still within the same list block
4372                    // (not just immediately after a list item)
4373                    // Also treat code span continuations as valid continuations regardless of indent
4374                    let is_lazy_continuation = !is_structural_separator
4375                        && !line_info.is_blank
4376                        && (line_info.indent == 0
4377                            || line_info.indent >= min_required_indent
4378                            || line_info.in_code_span_continuation);
4379
4380                    if is_lazy_continuation {
4381                        // Per CommonMark, lazy continuation continues until a blank line
4382                        // or structural element, regardless of uppercase at line start
4383                        block.end_line = line_num;
4384                    } else {
4385                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4386                        list_blocks.push(block.clone());
4387                        current_block = None;
4388                    }
4389                }
4390            }
4391        }
4392
4393        // Don't forget the last block
4394        if let Some(block) = current_block {
4395            list_blocks.push(block);
4396        }
4397
4398        // Merge adjacent blocks that should be one
4399        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4400
4401        list_blocks
4402    }
4403
4404    /// Compute character frequency for fast content analysis
4405    fn compute_char_frequency(content: &str) -> CharFrequency {
4406        let mut frequency = CharFrequency::default();
4407
4408        for ch in content.chars() {
4409            match ch {
4410                '#' => frequency.hash_count += 1,
4411                '*' => frequency.asterisk_count += 1,
4412                '_' => frequency.underscore_count += 1,
4413                '-' => frequency.hyphen_count += 1,
4414                '+' => frequency.plus_count += 1,
4415                '>' => frequency.gt_count += 1,
4416                '|' => frequency.pipe_count += 1,
4417                '[' => frequency.bracket_count += 1,
4418                '`' => frequency.backtick_count += 1,
4419                '<' => frequency.lt_count += 1,
4420                '!' => frequency.exclamation_count += 1,
4421                '\n' => frequency.newline_count += 1,
4422                _ => {}
4423            }
4424        }
4425
4426        frequency
4427    }
4428
4429    /// Parse HTML tags in the content
4430    fn parse_html_tags(
4431        content: &str,
4432        lines: &[LineInfo],
4433        code_blocks: &[(usize, usize)],
4434        flavor: MarkdownFlavor,
4435    ) -> Vec<HtmlTag> {
4436        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4437            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4438
4439        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4440
4441        for cap in HTML_TAG_REGEX.captures_iter(content) {
4442            let full_match = cap.get(0).unwrap();
4443            let match_start = full_match.start();
4444            let match_end = full_match.end();
4445
4446            // Skip if in code block
4447            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4448                continue;
4449            }
4450
4451            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4452            let tag_name_original = cap.get(2).unwrap().as_str();
4453            let tag_name = tag_name_original.to_lowercase();
4454            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4455
4456            // Skip JSX components in MDX files (tags starting with uppercase letter)
4457            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4458            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4459                continue;
4460            }
4461
4462            // Find which line this tag is on
4463            let mut line_num = 1;
4464            let mut col_start = match_start;
4465            let mut col_end = match_end;
4466            for (idx, line_info) in lines.iter().enumerate() {
4467                if match_start >= line_info.byte_offset {
4468                    line_num = idx + 1;
4469                    col_start = match_start - line_info.byte_offset;
4470                    col_end = match_end - line_info.byte_offset;
4471                } else {
4472                    break;
4473                }
4474            }
4475
4476            html_tags.push(HtmlTag {
4477                line: line_num,
4478                start_col: col_start,
4479                end_col: col_end,
4480                byte_offset: match_start,
4481                byte_end: match_end,
4482                tag_name,
4483                is_closing,
4484                is_self_closing,
4485                raw_content: full_match.as_str().to_string(),
4486            });
4487        }
4488
4489        html_tags
4490    }
4491
4492    /// Parse table rows in the content
4493    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4494        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4495
4496        for (line_idx, line_info) in lines.iter().enumerate() {
4497            // Skip lines in code blocks or blank lines
4498            if line_info.in_code_block || line_info.is_blank {
4499                continue;
4500            }
4501
4502            let line = line_info.content(content);
4503            let line_num = line_idx + 1;
4504
4505            // Check if this line contains pipes (potential table row)
4506            if !line.contains('|') {
4507                continue;
4508            }
4509
4510            // Count columns by splitting on pipes
4511            let parts: Vec<&str> = line.split('|').collect();
4512            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4513
4514            // Check if this is a separator row
4515            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4516            let mut column_alignments = Vec::new();
4517
4518            if is_separator {
4519                for part in &parts[1..parts.len() - 1] {
4520                    // Skip first and last empty parts
4521                    let trimmed = part.trim();
4522                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4523                        "center".to_string()
4524                    } else if trimmed.ends_with(':') {
4525                        "right".to_string()
4526                    } else if trimmed.starts_with(':') {
4527                        "left".to_string()
4528                    } else {
4529                        "none".to_string()
4530                    };
4531                    column_alignments.push(alignment);
4532                }
4533            }
4534
4535            table_rows.push(TableRow {
4536                line: line_num,
4537                is_separator,
4538                column_count,
4539                column_alignments,
4540            });
4541        }
4542
4543        table_rows
4544    }
4545
4546    /// Parse bare URLs and emails in the content
4547    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4548        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4549
4550        // Check for bare URLs (not in angle brackets or markdown links)
4551        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4552            let full_match = cap.get(0).unwrap();
4553            let match_start = full_match.start();
4554            let match_end = full_match.end();
4555
4556            // Skip if in code block
4557            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4558                continue;
4559            }
4560
4561            // Skip if already in angle brackets or markdown links
4562            let preceding_char = if match_start > 0 {
4563                content.chars().nth(match_start - 1)
4564            } else {
4565                None
4566            };
4567            let following_char = content.chars().nth(match_end);
4568
4569            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4570                continue;
4571            }
4572            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4573                continue;
4574            }
4575
4576            let url = full_match.as_str();
4577            let url_type = if url.starts_with("https://") {
4578                "https"
4579            } else if url.starts_with("http://") {
4580                "http"
4581            } else if url.starts_with("ftp://") {
4582                "ftp"
4583            } else {
4584                "other"
4585            };
4586
4587            // Find which line this URL is on
4588            let mut line_num = 1;
4589            let mut col_start = match_start;
4590            let mut col_end = match_end;
4591            for (idx, line_info) in lines.iter().enumerate() {
4592                if match_start >= line_info.byte_offset {
4593                    line_num = idx + 1;
4594                    col_start = match_start - line_info.byte_offset;
4595                    col_end = match_end - line_info.byte_offset;
4596                } else {
4597                    break;
4598                }
4599            }
4600
4601            bare_urls.push(BareUrl {
4602                line: line_num,
4603                start_col: col_start,
4604                end_col: col_end,
4605                byte_offset: match_start,
4606                byte_end: match_end,
4607                url: url.to_string(),
4608                url_type: url_type.to_string(),
4609            });
4610        }
4611
4612        // Check for bare email addresses
4613        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4614            let full_match = cap.get(0).unwrap();
4615            let match_start = full_match.start();
4616            let match_end = full_match.end();
4617
4618            // Skip if in code block
4619            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4620                continue;
4621            }
4622
4623            // Skip if already in angle brackets or markdown links
4624            let preceding_char = if match_start > 0 {
4625                content.chars().nth(match_start - 1)
4626            } else {
4627                None
4628            };
4629            let following_char = content.chars().nth(match_end);
4630
4631            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4632                continue;
4633            }
4634            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4635                continue;
4636            }
4637
4638            let email = full_match.as_str();
4639
4640            // Find which line this email is on
4641            let mut line_num = 1;
4642            let mut col_start = match_start;
4643            let mut col_end = match_end;
4644            for (idx, line_info) in lines.iter().enumerate() {
4645                if match_start >= line_info.byte_offset {
4646                    line_num = idx + 1;
4647                    col_start = match_start - line_info.byte_offset;
4648                    col_end = match_end - line_info.byte_offset;
4649                } else {
4650                    break;
4651                }
4652            }
4653
4654            bare_urls.push(BareUrl {
4655                line: line_num,
4656                start_col: col_start,
4657                end_col: col_end,
4658                byte_offset: match_start,
4659                byte_end: match_end,
4660                url: email.to_string(),
4661                url_type: "email".to_string(),
4662            });
4663        }
4664
4665        bare_urls
4666    }
4667
4668    /// Get an iterator over valid CommonMark headings
4669    ///
4670    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4671    /// that should be flagged by MD018 but should not be processed by other heading rules.
4672    ///
4673    /// # Examples
4674    ///
4675    /// ```rust
4676    /// use rumdl_lib::lint_context::LintContext;
4677    /// use rumdl_lib::config::MarkdownFlavor;
4678    ///
4679    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4680    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4681    ///
4682    /// for heading in ctx.valid_headings() {
4683    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4684    /// }
4685    /// // Only prints valid headings, skips `#NoSpace`
4686    /// ```
4687    #[must_use]
4688    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4689        ValidHeadingsIter::new(&self.lines)
4690    }
4691
4692    /// Check if the document contains any valid CommonMark headings
4693    ///
4694    /// Returns `true` if there is at least one heading with proper space after `#`.
4695    #[must_use]
4696    pub fn has_valid_headings(&self) -> bool {
4697        self.lines
4698            .iter()
4699            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4700    }
4701}
4702
4703/// Merge adjacent list blocks that should be treated as one
4704fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4705    if list_blocks.len() < 2 {
4706        return;
4707    }
4708
4709    let mut merger = ListBlockMerger::new(content, lines);
4710    *list_blocks = merger.merge(list_blocks);
4711}
4712
4713/// Helper struct to manage the complex logic of merging list blocks
4714struct ListBlockMerger<'a> {
4715    content: &'a str,
4716    lines: &'a [LineInfo],
4717}
4718
4719impl<'a> ListBlockMerger<'a> {
4720    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4721        Self { content, lines }
4722    }
4723
4724    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4725        let mut merged = Vec::with_capacity(list_blocks.len());
4726        let mut current = list_blocks[0].clone();
4727
4728        for next in list_blocks.iter().skip(1) {
4729            if self.should_merge_blocks(&current, next) {
4730                current = self.merge_two_blocks(current, next);
4731            } else {
4732                merged.push(current);
4733                current = next.clone();
4734            }
4735        }
4736
4737        merged.push(current);
4738        merged
4739    }
4740
4741    /// Determine if two adjacent list blocks should be merged
4742    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4743        // Basic compatibility checks
4744        if !self.blocks_are_compatible(current, next) {
4745            return false;
4746        }
4747
4748        // Check spacing and content between blocks
4749        let spacing = self.analyze_spacing_between(current, next);
4750        match spacing {
4751            BlockSpacing::Consecutive => true,
4752            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4753            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4754                self.can_merge_with_content_between(current, next)
4755            }
4756        }
4757    }
4758
4759    /// Check if blocks have compatible structure for merging
4760    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4761        current.is_ordered == next.is_ordered
4762            && current.blockquote_prefix == next.blockquote_prefix
4763            && current.nesting_level == next.nesting_level
4764    }
4765
4766    /// Analyze the spacing between two list blocks
4767    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4768        let gap = next.start_line - current.end_line;
4769
4770        match gap {
4771            1 => BlockSpacing::Consecutive,
4772            2 => BlockSpacing::SingleBlank,
4773            _ if gap > 2 => {
4774                if self.has_only_blank_lines_between(current, next) {
4775                    BlockSpacing::MultipleBlanks
4776                } else {
4777                    BlockSpacing::ContentBetween
4778                }
4779            }
4780            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4781        }
4782    }
4783
4784    /// Check if unordered lists can be merged with a single blank line between
4785    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4786        // Check if there are structural separators between the blocks
4787        // If has_meaningful_content_between returns true, it means there are structural separators
4788        if has_meaningful_content_between(self.content, current, next, self.lines) {
4789            return false; // Structural separators prevent merging
4790        }
4791
4792        // Only merge unordered lists with same marker across single blank
4793        !current.is_ordered && current.marker == next.marker
4794    }
4795
4796    /// Check if ordered lists can be merged when there's content between them
4797    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4798        // Do not merge lists if there are structural separators between them
4799        if has_meaningful_content_between(self.content, current, next, self.lines) {
4800            return false; // Structural separators prevent merging
4801        }
4802
4803        // Only consider merging ordered lists if there's no structural content between
4804        current.is_ordered && next.is_ordered
4805    }
4806
4807    /// Check if there are only blank lines between blocks
4808    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4809        for line_num in (current.end_line + 1)..next.start_line {
4810            if let Some(line_info) = self.lines.get(line_num - 1)
4811                && !line_info.content(self.content).trim().is_empty()
4812            {
4813                return false;
4814            }
4815        }
4816        true
4817    }
4818
4819    /// Merge two compatible list blocks into one
4820    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4821        current.end_line = next.end_line;
4822        current.item_lines.extend_from_slice(&next.item_lines);
4823
4824        // Update max marker width
4825        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4826
4827        // Handle marker consistency for unordered lists
4828        if !current.is_ordered && self.markers_differ(&current, next) {
4829            current.marker = None; // Mixed markers
4830        }
4831
4832        current
4833    }
4834
4835    /// Check if two blocks have different markers
4836    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4837        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4838    }
4839}
4840
4841/// Types of spacing between list blocks
4842#[derive(Debug, PartialEq)]
4843enum BlockSpacing {
4844    Consecutive,    // No gap between blocks
4845    SingleBlank,    // One blank line between blocks
4846    MultipleBlanks, // Multiple blank lines but no content
4847    ContentBetween, // Content exists between blocks
4848}
4849
4850/// Check if there's meaningful content (not just blank lines) between two list blocks
4851fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4852    // Check lines between current.end_line and next.start_line
4853    for line_num in (current.end_line + 1)..next.start_line {
4854        if let Some(line_info) = lines.get(line_num - 1) {
4855            // Convert to 0-indexed
4856            let trimmed = line_info.content(content).trim();
4857
4858            // Skip empty lines
4859            if trimmed.is_empty() {
4860                continue;
4861            }
4862
4863            // Check for structural separators that should separate lists (CommonMark compliant)
4864
4865            // Headings separate lists
4866            if line_info.heading.is_some() {
4867                return true; // Has meaningful content - headings separate lists
4868            }
4869
4870            // Horizontal rules separate lists (---, ***, ___)
4871            if is_horizontal_rule(trimmed) {
4872                return true; // Has meaningful content - horizontal rules separate lists
4873            }
4874
4875            // Tables separate lists
4876            if crate::utils::skip_context::is_table_line(trimmed) {
4877                return true; // Has meaningful content - tables separate lists
4878            }
4879
4880            // Blockquotes separate lists
4881            if trimmed.starts_with('>') {
4882                return true; // Has meaningful content - blockquotes separate lists
4883            }
4884
4885            // Code block fences separate lists (unless properly indented as list content)
4886            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4887                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4888
4889                // Check if this code block is properly indented as list continuation
4890                let min_continuation_indent = if current.is_ordered {
4891                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4892                } else {
4893                    current.nesting_level + 2
4894                };
4895
4896                if line_indent < min_continuation_indent {
4897                    // This is a standalone code block that separates lists
4898                    return true; // Has meaningful content - standalone code blocks separate lists
4899                }
4900            }
4901
4902            // Check if this line has proper indentation for list continuation
4903            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4904
4905            // Calculate minimum indentation needed to be list continuation
4906            let min_indent = if current.is_ordered {
4907                current.nesting_level + current.max_marker_width
4908            } else {
4909                current.nesting_level + 2
4910            };
4911
4912            // If the line is not indented enough to be list continuation, it's meaningful content
4913            if line_indent < min_indent {
4914                return true; // Has meaningful content - content not indented as list continuation
4915            }
4916
4917            // If we reach here, the line is properly indented as list continuation
4918            // Continue checking other lines
4919        }
4920    }
4921
4922    // Only blank lines or properly indented list continuation content between blocks
4923    false
4924}
4925
4926/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4927/// CommonMark rules for thematic breaks (horizontal rules):
4928/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4929/// - Must have 3+ of the same character (-, *, or _)
4930/// - May have spaces between characters
4931/// - No other characters allowed
4932pub fn is_horizontal_rule_line(line: &str) -> bool {
4933    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4934    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4935    if leading_spaces > 3 || line.starts_with('\t') {
4936        return false;
4937    }
4938
4939    is_horizontal_rule_content(line.trim())
4940}
4941
4942/// Check if trimmed content matches horizontal rule pattern.
4943/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4944pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4945    if trimmed.len() < 3 {
4946        return false;
4947    }
4948
4949    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
4950    let chars: Vec<char> = trimmed.chars().collect();
4951    if let Some(&first_char) = chars.first()
4952        && (first_char == '-' || first_char == '*' || first_char == '_')
4953    {
4954        let mut count = 0;
4955        for &ch in &chars {
4956            if ch == first_char {
4957                count += 1;
4958            } else if ch != ' ' && ch != '\t' {
4959                return false; // Non-matching, non-whitespace character
4960            }
4961        }
4962        return count >= 3;
4963    }
4964    false
4965}
4966
4967/// Backwards-compatible alias for `is_horizontal_rule_content`
4968pub fn is_horizontal_rule(trimmed: &str) -> bool {
4969    is_horizontal_rule_content(trimmed)
4970}
4971
4972/// Check if content contains patterns that cause the markdown crate to panic
4973#[cfg(test)]
4974mod tests {
4975    use super::*;
4976
4977    #[test]
4978    fn test_empty_content() {
4979        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
4980        assert_eq!(ctx.content, "");
4981        assert_eq!(ctx.line_offsets, vec![0]);
4982        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4983        assert_eq!(ctx.lines.len(), 0);
4984    }
4985
4986    #[test]
4987    fn test_single_line() {
4988        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
4989        assert_eq!(ctx.content, "# Hello");
4990        assert_eq!(ctx.line_offsets, vec![0]);
4991        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
4992        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
4993    }
4994
4995    #[test]
4996    fn test_multi_line() {
4997        let content = "# Title\n\nSecond line\nThird line";
4998        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4999        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
5000        // Test offset to line/col
5001        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
5002        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
5003        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
5004        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
5005        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
5006    }
5007
5008    #[test]
5009    fn test_line_info() {
5010        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
5011        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5012
5013        // Test line info
5014        assert_eq!(ctx.lines.len(), 7);
5015
5016        // Line 1: "# Title"
5017        let line1 = &ctx.lines[0];
5018        assert_eq!(line1.content(ctx.content), "# Title");
5019        assert_eq!(line1.byte_offset, 0);
5020        assert_eq!(line1.indent, 0);
5021        assert!(!line1.is_blank);
5022        assert!(!line1.in_code_block);
5023        assert!(line1.list_item.is_none());
5024
5025        // Line 2: "    indented"
5026        let line2 = &ctx.lines[1];
5027        assert_eq!(line2.content(ctx.content), "    indented");
5028        assert_eq!(line2.byte_offset, 8);
5029        assert_eq!(line2.indent, 4);
5030        assert!(!line2.is_blank);
5031
5032        // Line 3: "" (blank)
5033        let line3 = &ctx.lines[2];
5034        assert_eq!(line3.content(ctx.content), "");
5035        assert!(line3.is_blank);
5036
5037        // Test helper methods
5038        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5039        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5040        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5041        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5042    }
5043
5044    #[test]
5045    fn test_list_item_detection() {
5046        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
5047        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5048
5049        // Line 1: "- Unordered item"
5050        let line1 = &ctx.lines[0];
5051        assert!(line1.list_item.is_some());
5052        let list1 = line1.list_item.as_ref().unwrap();
5053        assert_eq!(list1.marker, "-");
5054        assert!(!list1.is_ordered);
5055        assert_eq!(list1.marker_column, 0);
5056        assert_eq!(list1.content_column, 2);
5057
5058        // Line 2: "  * Nested item"
5059        let line2 = &ctx.lines[1];
5060        assert!(line2.list_item.is_some());
5061        let list2 = line2.list_item.as_ref().unwrap();
5062        assert_eq!(list2.marker, "*");
5063        assert_eq!(list2.marker_column, 2);
5064
5065        // Line 3: "1. Ordered item"
5066        let line3 = &ctx.lines[2];
5067        assert!(line3.list_item.is_some());
5068        let list3 = line3.list_item.as_ref().unwrap();
5069        assert_eq!(list3.marker, "1.");
5070        assert!(list3.is_ordered);
5071        assert_eq!(list3.number, Some(1));
5072
5073        // Line 6: "Not a list"
5074        let line6 = &ctx.lines[5];
5075        assert!(line6.list_item.is_none());
5076    }
5077
5078    #[test]
5079    fn test_offset_to_line_col_edge_cases() {
5080        let content = "a\nb\nc";
5081        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5082        // line_offsets: [0, 2, 4]
5083        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
5084        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
5085        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
5086        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
5087        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
5088        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
5089    }
5090
5091    #[test]
5092    fn test_mdx_esm_blocks() {
5093        let content = r##"import {Chart} from './snowfall.js'
5094export const year = 2023
5095
5096# Last year's snowfall
5097
5098In {year}, the snowfall was above average.
5099It was followed by a warm spring which caused
5100flood conditions in many of the nearby rivers.
5101
5102<Chart color="#fcb32c" year={year} />
5103"##;
5104
5105        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5106
5107        // Check that lines 1 and 2 are marked as ESM blocks
5108        assert_eq!(ctx.lines.len(), 10);
5109        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5110        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5111        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5112        assert!(
5113            !ctx.lines[3].in_esm_block,
5114            "Line 4 (heading) should NOT be in_esm_block"
5115        );
5116        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5117        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5118    }
5119
5120    #[test]
5121    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5122        let content = r#"import {Chart} from './snowfall.js'
5123export const year = 2023
5124
5125# Last year's snowfall
5126"#;
5127
5128        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5129
5130        // ESM blocks should NOT be detected in Standard flavor
5131        assert!(
5132            !ctx.lines[0].in_esm_block,
5133            "Line 1 should NOT be in_esm_block in Standard flavor"
5134        );
5135        assert!(
5136            !ctx.lines[1].in_esm_block,
5137            "Line 2 should NOT be in_esm_block in Standard flavor"
5138        );
5139    }
5140
5141    #[test]
5142    fn test_blockquote_with_indented_content() {
5143        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
5144        // The content inside the blockquote may also be detected as a code block (which is correct),
5145        // but for MD046 purposes, we need to know the line is inside a blockquote.
5146        let content = r#"# Heading
5147
5148>      -S socket-path
5149>                    More text
5150"#;
5151        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5152
5153        // Line 3 (index 2) should be detected as blockquote
5154        assert!(
5155            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5156            "Line 3 should be a blockquote"
5157        );
5158        // Line 4 (index 3) should also be blockquote
5159        assert!(
5160            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5161            "Line 4 should be a blockquote"
5162        );
5163
5164        // Verify blockquote content is correctly parsed
5165        // Note: spaces_after includes the spaces between `>` and content
5166        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5167        assert_eq!(bq3.content, "-S socket-path");
5168        assert_eq!(bq3.nesting_level, 1);
5169        // 6 spaces after the `>` marker
5170        assert!(bq3.has_multiple_spaces_after_marker);
5171
5172        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5173        assert_eq!(bq4.content, "More text");
5174        assert_eq!(bq4.nesting_level, 1);
5175    }
5176
5177    #[test]
5178    fn test_footnote_definitions_not_parsed_as_reference_defs() {
5179        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
5180        let content = r#"# Title
5181
5182A footnote[^1].
5183
5184[^1]: This is the footnote content.
5185
5186[^note]: Another footnote with [link](https://example.com).
5187
5188[regular]: ./path.md "A real reference definition"
5189"#;
5190        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5191
5192        // Should only have one reference definition (the regular one)
5193        assert_eq!(
5194            ctx.reference_defs.len(),
5195            1,
5196            "Footnotes should not be parsed as reference definitions"
5197        );
5198
5199        // The only reference def should be the regular one
5200        assert_eq!(ctx.reference_defs[0].id, "regular");
5201        assert_eq!(ctx.reference_defs[0].url, "./path.md");
5202        assert_eq!(
5203            ctx.reference_defs[0].title,
5204            Some("A real reference definition".to_string())
5205        );
5206    }
5207
5208    #[test]
5209    fn test_footnote_with_inline_link_not_misidentified() {
5210        // Regression test for issue #286: footnote containing an inline link
5211        // was incorrectly parsed as a reference definition with URL "[link](url)"
5212        let content = r#"# Title
5213
5214A footnote[^1].
5215
5216[^1]: [link](https://www.google.com).
5217"#;
5218        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5219
5220        // Should have no reference definitions
5221        assert!(
5222            ctx.reference_defs.is_empty(),
5223            "Footnote with inline link should not create a reference definition"
5224        );
5225    }
5226
5227    #[test]
5228    fn test_various_footnote_formats_excluded() {
5229        // Test various footnote ID formats are all excluded
5230        let content = r#"[^1]: Numeric footnote
5231[^note]: Named footnote
5232[^a]: Single char footnote
5233[^long-footnote-name]: Long named footnote
5234[^123abc]: Mixed alphanumeric
5235
5236[ref1]: ./file1.md
5237[ref2]: ./file2.md
5238"#;
5239        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5240
5241        // Should only have the two regular reference definitions
5242        assert_eq!(
5243            ctx.reference_defs.len(),
5244            2,
5245            "Only regular reference definitions should be parsed"
5246        );
5247
5248        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5249        assert!(ids.contains(&"ref1"));
5250        assert!(ids.contains(&"ref2"));
5251        assert!(!ids.iter().any(|id| id.starts_with('^')));
5252    }
5253
5254    // =========================================================================
5255    // Tests for has_char and char_count methods
5256    // =========================================================================
5257
5258    #[test]
5259    fn test_has_char_tracked_characters() {
5260        // Test all 12 tracked characters
5261        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5262        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5263
5264        // All tracked characters should be detected
5265        assert!(ctx.has_char('#'), "Should detect hash");
5266        assert!(ctx.has_char('*'), "Should detect asterisk");
5267        assert!(ctx.has_char('_'), "Should detect underscore");
5268        assert!(ctx.has_char('-'), "Should detect hyphen");
5269        assert!(ctx.has_char('+'), "Should detect plus");
5270        assert!(ctx.has_char('>'), "Should detect gt");
5271        assert!(ctx.has_char('|'), "Should detect pipe");
5272        assert!(ctx.has_char('['), "Should detect bracket");
5273        assert!(ctx.has_char('`'), "Should detect backtick");
5274        assert!(ctx.has_char('<'), "Should detect lt");
5275        assert!(ctx.has_char('!'), "Should detect exclamation");
5276        assert!(ctx.has_char('\n'), "Should detect newline");
5277    }
5278
5279    #[test]
5280    fn test_has_char_absent_characters() {
5281        let content = "Simple text without special chars";
5282        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5283
5284        // None of the tracked characters should be present
5285        assert!(!ctx.has_char('#'), "Should not detect hash");
5286        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5287        assert!(!ctx.has_char('_'), "Should not detect underscore");
5288        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5289        assert!(!ctx.has_char('+'), "Should not detect plus");
5290        assert!(!ctx.has_char('>'), "Should not detect gt");
5291        assert!(!ctx.has_char('|'), "Should not detect pipe");
5292        assert!(!ctx.has_char('['), "Should not detect bracket");
5293        assert!(!ctx.has_char('`'), "Should not detect backtick");
5294        assert!(!ctx.has_char('<'), "Should not detect lt");
5295        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5296        // Note: single line content has no newlines
5297        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5298    }
5299
5300    #[test]
5301    fn test_has_char_fallback_for_untracked() {
5302        let content = "Text with @mention and $dollar and %percent";
5303        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5304
5305        // Untracked characters should fall back to content.contains()
5306        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5307        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5308        assert!(ctx.has_char('%'), "Should detect % via fallback");
5309        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5310    }
5311
5312    #[test]
5313    fn test_char_count_tracked_characters() {
5314        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5315        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5316
5317        // Count each tracked character
5318        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5319        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5320        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5321        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5322        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5323        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5324        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5325        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5326        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5327        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5328        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5329        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5330    }
5331
5332    #[test]
5333    fn test_char_count_zero_for_absent() {
5334        let content = "Plain text";
5335        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5336
5337        assert_eq!(ctx.char_count('#'), 0);
5338        assert_eq!(ctx.char_count('*'), 0);
5339        assert_eq!(ctx.char_count('_'), 0);
5340        assert_eq!(ctx.char_count('\n'), 0);
5341    }
5342
5343    #[test]
5344    fn test_char_count_fallback_for_untracked() {
5345        let content = "@@@ $$ %%%";
5346        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5347
5348        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5349        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5350        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5351        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5352    }
5353
5354    #[test]
5355    fn test_char_count_empty_content() {
5356        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5357
5358        assert_eq!(ctx.char_count('#'), 0);
5359        assert_eq!(ctx.char_count('*'), 0);
5360        assert_eq!(ctx.char_count('@'), 0);
5361        assert!(!ctx.has_char('#'));
5362        assert!(!ctx.has_char('@'));
5363    }
5364
5365    // =========================================================================
5366    // Tests for is_in_html_tag method
5367    // =========================================================================
5368
5369    #[test]
5370    fn test_is_in_html_tag_simple() {
5371        let content = "<div>content</div>";
5372        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5373
5374        // Inside opening tag
5375        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5376        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5377        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5378
5379        // Outside tag (in content)
5380        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5381        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5382
5383        // Inside closing tag
5384        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5385        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5386    }
5387
5388    #[test]
5389    fn test_is_in_html_tag_self_closing() {
5390        let content = "Text <br/> more text";
5391        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5392
5393        // Before tag
5394        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5395        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5396
5397        // Inside self-closing tag
5398        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5399        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5400        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5401
5402        // After tag
5403        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5404    }
5405
5406    #[test]
5407    fn test_is_in_html_tag_with_attributes() {
5408        let content = r#"<a href="url" class="link">text</a>"#;
5409        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5410
5411        // All positions inside opening tag with attributes
5412        assert!(ctx.is_in_html_tag(0), "Start of tag");
5413        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5414        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5415        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5416
5417        // Content between tags
5418        assert!(!ctx.is_in_html_tag(27), "Start of content");
5419        assert!(!ctx.is_in_html_tag(30), "End of content");
5420
5421        // Closing tag
5422        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5423    }
5424
5425    #[test]
5426    fn test_is_in_html_tag_multiline() {
5427        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5428        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5429
5430        // Opening tag spans multiple lines
5431        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5432        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5433        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5434
5435        // After closing > of opening tag
5436        let closing_bracket_pos = content.find(">\n").unwrap();
5437        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5438    }
5439
5440    #[test]
5441    fn test_is_in_html_tag_no_tags() {
5442        let content = "Plain text without any HTML";
5443        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5444
5445        // No position should be in an HTML tag
5446        for i in 0..content.len() {
5447            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5448        }
5449    }
5450
5451    // =========================================================================
5452    // Tests for is_in_jinja_range method
5453    // =========================================================================
5454
5455    #[test]
5456    fn test_is_in_jinja_range_expression() {
5457        let content = "Hello {{ name }}!";
5458        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5459
5460        // Before Jinja
5461        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5462        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5463
5464        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5465        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5466        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5467        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5468        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5469        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5470
5471        // After Jinja
5472        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5473    }
5474
5475    #[test]
5476    fn test_is_in_jinja_range_statement() {
5477        let content = "{% if condition %}content{% endif %}";
5478        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5479
5480        // Inside opening statement
5481        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5482        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5483        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5484
5485        // Content between
5486        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5487
5488        // Inside closing statement
5489        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5490        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5491    }
5492
5493    #[test]
5494    fn test_is_in_jinja_range_multiple() {
5495        let content = "{{ a }} and {{ b }}";
5496        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5497
5498        // First Jinja expression
5499        assert!(ctx.is_in_jinja_range(0));
5500        assert!(ctx.is_in_jinja_range(3));
5501        assert!(ctx.is_in_jinja_range(6));
5502
5503        // Between expressions
5504        assert!(!ctx.is_in_jinja_range(8));
5505        assert!(!ctx.is_in_jinja_range(11));
5506
5507        // Second Jinja expression
5508        assert!(ctx.is_in_jinja_range(12));
5509        assert!(ctx.is_in_jinja_range(15));
5510        assert!(ctx.is_in_jinja_range(18));
5511    }
5512
5513    #[test]
5514    fn test_is_in_jinja_range_no_jinja() {
5515        let content = "Plain text with single braces but not Jinja";
5516        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5517
5518        // No position should be in Jinja
5519        for i in 0..content.len() {
5520            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5521        }
5522    }
5523
5524    // =========================================================================
5525    // Tests for is_in_link_title method
5526    // =========================================================================
5527
5528    #[test]
5529    fn test_is_in_link_title_with_title() {
5530        let content = r#"[ref]: https://example.com "Title text"
5531
5532Some content."#;
5533        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5534
5535        // Verify we have a reference def with title
5536        assert_eq!(ctx.reference_defs.len(), 1);
5537        let def = &ctx.reference_defs[0];
5538        assert!(def.title_byte_start.is_some());
5539        assert!(def.title_byte_end.is_some());
5540
5541        let title_start = def.title_byte_start.unwrap();
5542        let title_end = def.title_byte_end.unwrap();
5543
5544        // Before title (in URL)
5545        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5546
5547        // Inside title
5548        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5549        assert!(
5550            ctx.is_in_link_title(title_start + 5),
5551            "Middle of title should be in title"
5552        );
5553        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5554
5555        // After title
5556        assert!(
5557            !ctx.is_in_link_title(title_end),
5558            "After title end should not be in title"
5559        );
5560    }
5561
5562    #[test]
5563    fn test_is_in_link_title_without_title() {
5564        let content = "[ref]: https://example.com\n\nSome content.";
5565        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5566
5567        // Reference def without title
5568        assert_eq!(ctx.reference_defs.len(), 1);
5569        let def = &ctx.reference_defs[0];
5570        assert!(def.title_byte_start.is_none());
5571        assert!(def.title_byte_end.is_none());
5572
5573        // No position should be in a title
5574        for i in 0..content.len() {
5575            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5576        }
5577    }
5578
5579    #[test]
5580    fn test_is_in_link_title_multiple_refs() {
5581        let content = r#"[ref1]: /url1 "Title One"
5582[ref2]: /url2
5583[ref3]: /url3 "Title Three"
5584"#;
5585        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5586
5587        // Should have 3 reference defs
5588        assert_eq!(ctx.reference_defs.len(), 3);
5589
5590        // ref1 has title
5591        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5592        assert!(ref1.title_byte_start.is_some());
5593
5594        // ref2 has no title
5595        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5596        assert!(ref2.title_byte_start.is_none());
5597
5598        // ref3 has title
5599        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5600        assert!(ref3.title_byte_start.is_some());
5601
5602        // Check positions in ref1's title
5603        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5604            assert!(ctx.is_in_link_title(start + 1));
5605            assert!(!ctx.is_in_link_title(end + 5));
5606        }
5607
5608        // Check positions in ref3's title
5609        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5610            assert!(ctx.is_in_link_title(start + 1));
5611        }
5612    }
5613
5614    #[test]
5615    fn test_is_in_link_title_single_quotes() {
5616        let content = "[ref]: /url 'Single quoted title'\n";
5617        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5618
5619        assert_eq!(ctx.reference_defs.len(), 1);
5620        let def = &ctx.reference_defs[0];
5621
5622        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5623            assert!(ctx.is_in_link_title(start));
5624            assert!(ctx.is_in_link_title(start + 5));
5625            assert!(!ctx.is_in_link_title(end));
5626        }
5627    }
5628
5629    #[test]
5630    fn test_is_in_link_title_parentheses() {
5631        // Note: The reference def parser may not support parenthesized titles
5632        // This test verifies the is_in_link_title method works when titles exist
5633        let content = "[ref]: /url (Parenthesized title)\n";
5634        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5635
5636        // Parser behavior: may or may not parse parenthesized titles
5637        // We test that is_in_link_title correctly reflects whatever was parsed
5638        if ctx.reference_defs.is_empty() {
5639            // Parser didn't recognize this as a reference def
5640            for i in 0..content.len() {
5641                assert!(!ctx.is_in_link_title(i));
5642            }
5643        } else {
5644            let def = &ctx.reference_defs[0];
5645            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5646                assert!(ctx.is_in_link_title(start));
5647                assert!(ctx.is_in_link_title(start + 5));
5648                assert!(!ctx.is_in_link_title(end));
5649            } else {
5650                // Title wasn't parsed, so no position should be in title
5651                for i in 0..content.len() {
5652                    assert!(!ctx.is_in_link_title(i));
5653                }
5654            }
5655        }
5656    }
5657
5658    #[test]
5659    fn test_is_in_link_title_no_refs() {
5660        let content = "Just plain text without any reference definitions.";
5661        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5662
5663        assert!(ctx.reference_defs.is_empty());
5664
5665        for i in 0..content.len() {
5666            assert!(!ctx.is_in_link_title(i));
5667        }
5668    }
5669
5670    // =========================================================================
5671    // Math span tests (Issue #289)
5672    // =========================================================================
5673
5674    #[test]
5675    fn test_math_spans_inline() {
5676        let content = "Text with inline math $[f](x)$ in it.";
5677        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5678
5679        let math_spans = ctx.math_spans();
5680        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5681
5682        let span = &math_spans[0];
5683        assert!(!span.is_display, "Should be inline math, not display");
5684        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5685    }
5686
5687    #[test]
5688    fn test_math_spans_display_single_line() {
5689        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5690        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5691
5692        let math_spans = ctx.math_spans();
5693        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5694
5695        let span = &math_spans[0];
5696        assert!(span.is_display, "Should be display math");
5697        assert!(
5698            span.content.contains("[x](\\zeta)"),
5699            "Content should contain the link-like pattern"
5700        );
5701    }
5702
5703    #[test]
5704    fn test_math_spans_display_multiline() {
5705        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5706        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5707
5708        let math_spans = ctx.math_spans();
5709        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5710
5711        let span = &math_spans[0];
5712        assert!(span.is_display, "Should be display math");
5713    }
5714
5715    #[test]
5716    fn test_is_in_math_span() {
5717        let content = "Text $[f](x)$ more text";
5718        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5719
5720        // Position inside the math span
5721        let math_start = content.find('$').unwrap();
5722        let math_end = content.rfind('$').unwrap() + 1;
5723
5724        assert!(
5725            ctx.is_in_math_span(math_start + 1),
5726            "Position inside math span should return true"
5727        );
5728        assert!(
5729            ctx.is_in_math_span(math_start + 3),
5730            "Position inside math span should return true"
5731        );
5732
5733        // Position outside the math span
5734        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5735        assert!(
5736            !ctx.is_in_math_span(math_end + 1),
5737            "Position after math span should return false"
5738        );
5739    }
5740
5741    #[test]
5742    fn test_math_spans_mixed_with_code() {
5743        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5744        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5745
5746        let math_spans = ctx.math_spans();
5747        let code_spans = ctx.code_spans();
5748
5749        assert_eq!(math_spans.len(), 1, "Should have one math span");
5750        assert_eq!(code_spans.len(), 1, "Should have one code span");
5751
5752        // Verify math span content
5753        assert_eq!(math_spans[0].content, "[f](x)");
5754        // Verify code span content
5755        assert_eq!(code_spans[0].content, "[g](y)");
5756    }
5757
5758    #[test]
5759    fn test_math_spans_no_math() {
5760        let content = "Regular text without any math at all.";
5761        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5762
5763        let math_spans = ctx.math_spans();
5764        assert!(math_spans.is_empty(), "Should have no math spans");
5765    }
5766
5767    #[test]
5768    fn test_math_spans_multiple() {
5769        let content = "First $a$ and second $b$ and display $$c$$";
5770        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5771
5772        let math_spans = ctx.math_spans();
5773        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5774
5775        // Two inline, one display
5776        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5777        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5778
5779        assert_eq!(inline_count, 2, "Should have two inline math spans");
5780        assert_eq!(display_count, 1, "Should have one display math span");
5781    }
5782
5783    #[test]
5784    fn test_is_in_math_span_boundary_positions() {
5785        // Test exact boundary positions: $[f](x)$
5786        // Byte positions:                0123456789
5787        let content = "$[f](x)$";
5788        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5789
5790        let math_spans = ctx.math_spans();
5791        assert_eq!(math_spans.len(), 1, "Should have one math span");
5792
5793        let span = &math_spans[0];
5794
5795        // Position at opening $ should be in span (byte 0)
5796        assert!(
5797            ctx.is_in_math_span(span.byte_offset),
5798            "Start position should be in span"
5799        );
5800
5801        // Position just inside should be in span
5802        assert!(
5803            ctx.is_in_math_span(span.byte_offset + 1),
5804            "Position after start should be in span"
5805        );
5806
5807        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5808        assert!(
5809            ctx.is_in_math_span(span.byte_end - 1),
5810            "Position at end-1 should be in span"
5811        );
5812
5813        // Position at byte_end should NOT be in span (exclusive end)
5814        assert!(
5815            !ctx.is_in_math_span(span.byte_end),
5816            "Position at byte_end should NOT be in span (exclusive)"
5817        );
5818    }
5819
5820    #[test]
5821    fn test_math_spans_at_document_start() {
5822        let content = "$x$ text";
5823        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5824
5825        let math_spans = ctx.math_spans();
5826        assert_eq!(math_spans.len(), 1);
5827        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5828    }
5829
5830    #[test]
5831    fn test_math_spans_at_document_end() {
5832        let content = "text $x$";
5833        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5834
5835        let math_spans = ctx.math_spans();
5836        assert_eq!(math_spans.len(), 1);
5837        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5838    }
5839
5840    #[test]
5841    fn test_math_spans_consecutive() {
5842        let content = "$a$$b$";
5843        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5844
5845        let math_spans = ctx.math_spans();
5846        // pulldown-cmark should parse these as separate spans
5847        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5848
5849        // All positions should be in some math span
5850        for i in 0..content.len() {
5851            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5852        }
5853    }
5854
5855    #[test]
5856    fn test_math_spans_currency_not_math() {
5857        // Unbalanced $ should not create math spans
5858        let content = "Price is $100";
5859        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5860
5861        let math_spans = ctx.math_spans();
5862        // pulldown-cmark requires balanced delimiters for math
5863        // $100 alone is not math
5864        assert!(
5865            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5866            "Unbalanced $ should not create math span containing 100"
5867        );
5868    }
5869
5870    // =========================================================================
5871    // Tests for O(1) reference definition lookups via HashMap
5872    // =========================================================================
5873
5874    #[test]
5875    fn test_reference_lookup_o1_basic() {
5876        let content = r#"[ref1]: /url1
5877[REF2]: /url2 "Title"
5878[Ref3]: /url3
5879
5880Use [link][ref1] and [link][REF2]."#;
5881        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5882
5883        // Verify we have 3 reference defs
5884        assert_eq!(ctx.reference_defs.len(), 3);
5885
5886        // Test get_reference_url with various cases
5887        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5888        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5889        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5890        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5891        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5892        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5893        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5894    }
5895
5896    #[test]
5897    fn test_reference_lookup_o1_get_reference_def() {
5898        let content = r#"[myref]: https://example.com "My Title"
5899"#;
5900        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5901
5902        // Test get_reference_def
5903        let def = ctx.get_reference_def("myref").expect("Should find myref");
5904        assert_eq!(def.url, "https://example.com");
5905        assert_eq!(def.title.as_deref(), Some("My Title"));
5906
5907        // Case insensitive
5908        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5909        assert_eq!(def2.url, "https://example.com");
5910
5911        // Non-existent
5912        assert!(ctx.get_reference_def("nonexistent").is_none());
5913    }
5914
5915    #[test]
5916    fn test_reference_lookup_o1_has_reference_def() {
5917        let content = r#"[foo]: /foo
5918[BAR]: /bar
5919"#;
5920        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5921
5922        // Test has_reference_def
5923        assert!(ctx.has_reference_def("foo"));
5924        assert!(ctx.has_reference_def("FOO")); // case insensitive
5925        assert!(ctx.has_reference_def("bar"));
5926        assert!(ctx.has_reference_def("Bar")); // case insensitive
5927        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5928    }
5929
5930    #[test]
5931    fn test_reference_lookup_o1_empty_content() {
5932        let content = "No references here.";
5933        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5934
5935        assert!(ctx.reference_defs.is_empty());
5936        assert_eq!(ctx.get_reference_url("anything"), None);
5937        assert!(ctx.get_reference_def("anything").is_none());
5938        assert!(!ctx.has_reference_def("anything"));
5939    }
5940
5941    #[test]
5942    fn test_reference_lookup_o1_special_characters_in_id() {
5943        let content = r#"[ref-with-dash]: /url1
5944[ref_with_underscore]: /url2
5945[ref.with.dots]: /url3
5946"#;
5947        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5948
5949        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
5950        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
5951        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
5952    }
5953
5954    #[test]
5955    fn test_reference_lookup_o1_unicode_id() {
5956        let content = r#"[日本語]: /japanese
5957[émoji]: /emoji
5958"#;
5959        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5960
5961        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
5962        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
5963        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
5964    }
5965}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs