rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::inline_config::InlineConfig;
3use crate::rules::front_matter_utils::FrontMatterUtils;
4use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
5use crate::utils::element_cache::ElementCache;
6use crate::utils::mkdocs_html_markdown::MarkdownHtmlTracker;
7use crate::utils::regex_cache::URL_SIMPLE_REGEX;
8use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
9use regex::Regex;
10use std::borrow::Cow;
11use std::collections::HashMap;
12use std::path::PathBuf;
13use std::sync::LazyLock;
14
15/// Macro for profiling sections - only active in non-WASM builds
16#[cfg(not(target_arch = "wasm32"))]
17macro_rules! profile_section {
18    ($name:expr, $profile:expr, $code:expr) => {{
19        let start = std::time::Instant::now();
20        let result = $code;
21        if $profile {
22            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
23        }
24        result
25    }};
26}
27
28#[cfg(target_arch = "wasm32")]
29macro_rules! profile_section {
30    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
31}
32
33// Comprehensive link pattern that captures both inline and reference links
34// Use (?s) flag to make . match newlines
35static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
36    Regex::new(
37        r#"(?sx)
38        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
39        (?:
40            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
41            |
42            \[([^\]]*)\]      # Reference ID in group 6
43        )"#
44    ).unwrap()
45});
46
47// Image pattern (similar to links but with ! prefix)
48// Use (?s) flag to make . match newlines
49static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
50    Regex::new(
51        r#"(?sx)
52        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
53        (?:
54            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
55            |
56            \[([^\]]*)\]      # Reference ID in group 6
57        )"#
58    ).unwrap()
59});
60
61// Reference definition pattern
62static REF_DEF_PATTERN: LazyLock<Regex> =
63    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
64
65// Pattern for bare URLs - uses centralized URL pattern from regex_cache
66
67// Pattern for email addresses
68static BARE_EMAIL_PATTERN: LazyLock<Regex> =
69    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
70
71// Pattern for blockquote prefix in parse_list_blocks
72static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
73
74/// Pre-computed information about a line
75#[derive(Debug, Clone)]
76pub struct LineInfo {
77    /// Byte offset where this line starts in the document
78    pub byte_offset: usize,
79    /// Length of the line in bytes (without newline)
80    pub byte_len: usize,
81    /// Number of bytes of leading whitespace (for substring extraction)
82    pub indent: usize,
83    /// Visual column width of leading whitespace (with proper tab expansion)
84    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
85    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
86    pub visual_indent: usize,
87    /// Whether the line is blank (empty or only whitespace)
88    pub is_blank: bool,
89    /// Whether this line is inside a code block
90    pub in_code_block: bool,
91    /// Whether this line is inside front matter
92    pub in_front_matter: bool,
93    /// Whether this line is inside an HTML block
94    pub in_html_block: bool,
95    /// Whether this line is inside an HTML comment
96    pub in_html_comment: bool,
97    /// List item information if this line starts a list item
98    pub list_item: Option<ListItemInfo>,
99    /// Heading information if this line is a heading
100    pub heading: Option<HeadingInfo>,
101    /// Blockquote information if this line is a blockquote
102    pub blockquote: Option<BlockquoteInfo>,
103    /// Whether this line is inside a mkdocstrings autodoc block
104    pub in_mkdocstrings: bool,
105    /// Whether this line is part of an ESM import/export block (MDX only)
106    pub in_esm_block: bool,
107    /// Whether this line is a continuation of a multi-line code span from a previous line
108    pub in_code_span_continuation: bool,
109    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
110    /// Pre-computed for consistent detection across all rules
111    pub is_horizontal_rule: bool,
112    /// Whether this line is inside a math block ($$ ... $$)
113    pub in_math_block: bool,
114    /// Whether this line is inside a Quarto div block (::: ... :::)
115    pub in_quarto_div: bool,
116    /// Whether this line contains or is inside a JSX expression (MDX only)
117    pub in_jsx_expression: bool,
118    /// Whether this line is inside an MDX comment {/* ... */} (MDX only)
119    pub in_mdx_comment: bool,
120    /// Whether this line is inside a JSX component (MDX only)
121    pub in_jsx_component: bool,
122    /// Whether this line is inside a JSX fragment (MDX only)
123    pub in_jsx_fragment: bool,
124    /// Whether this line is inside an MkDocs admonition block (!!! or ???)
125    pub in_admonition: bool,
126    /// Whether this line is inside an MkDocs content tab block (===)
127    pub in_content_tab: bool,
128    /// Whether this line is inside an HTML block with markdown attribute (MkDocs grid cards, etc.)
129    pub in_mkdocs_html_markdown: bool,
130    /// Whether this line is a definition list item (: definition)
131    pub in_definition_list: bool,
132    /// Whether this line is inside an Obsidian comment (%%...%% syntax, Obsidian flavor only)
133    pub in_obsidian_comment: bool,
134}
135
136impl LineInfo {
137    /// Get the line content as a string slice from the source document
138    pub fn content<'a>(&self, source: &'a str) -> &'a str {
139        &source[self.byte_offset..self.byte_offset + self.byte_len]
140    }
141
142    /// Check if this line is inside MkDocs-specific indented content (admonitions, tabs, or markdown HTML).
143    /// This content uses 4-space indentation which pulldown-cmark would interpret as code blocks,
144    /// but in MkDocs flavor it's actually container content that should be preserved.
145    #[inline]
146    pub fn in_mkdocs_container(&self) -> bool {
147        self.in_admonition || self.in_content_tab || self.in_mkdocs_html_markdown
148    }
149}
150
151/// Information about a list item
152#[derive(Debug, Clone)]
153pub struct ListItemInfo {
154    /// The marker used (*, -, +, or number with . or ))
155    pub marker: String,
156    /// Whether it's ordered (true) or unordered (false)
157    pub is_ordered: bool,
158    /// The number for ordered lists
159    pub number: Option<usize>,
160    /// Column where the marker starts (0-based)
161    pub marker_column: usize,
162    /// Column where content after marker starts
163    pub content_column: usize,
164}
165
166/// Heading style type
167#[derive(Debug, Clone, PartialEq)]
168pub enum HeadingStyle {
169    /// ATX style heading (# Heading)
170    ATX,
171    /// Setext style heading with = underline
172    Setext1,
173    /// Setext style heading with - underline
174    Setext2,
175}
176
177/// Parsed link information
178#[derive(Debug, Clone)]
179pub struct ParsedLink<'a> {
180    /// Line number (1-indexed)
181    pub line: usize,
182    /// Start column (0-indexed) in the line
183    pub start_col: usize,
184    /// End column (0-indexed) in the line
185    pub end_col: usize,
186    /// Byte offset in document
187    pub byte_offset: usize,
188    /// End byte offset in document
189    pub byte_end: usize,
190    /// Link text
191    pub text: Cow<'a, str>,
192    /// Link URL or reference
193    pub url: Cow<'a, str>,
194    /// Whether this is a reference link [text][ref] vs inline [text](url)
195    pub is_reference: bool,
196    /// Reference ID for reference links
197    pub reference_id: Option<Cow<'a, str>>,
198    /// Link type from pulldown-cmark
199    pub link_type: LinkType,
200}
201
202/// Information about a broken link reported by pulldown-cmark
203#[derive(Debug, Clone)]
204pub struct BrokenLinkInfo {
205    /// The reference text that couldn't be resolved
206    pub reference: String,
207    /// Byte span in the source document
208    pub span: std::ops::Range<usize>,
209}
210
211/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
212#[derive(Debug, Clone)]
213pub struct FootnoteRef {
214    /// The footnote ID (without the ^ prefix)
215    pub id: String,
216    /// Line number (1-indexed)
217    pub line: usize,
218    /// Start byte offset in document
219    pub byte_offset: usize,
220    /// End byte offset in document
221    pub byte_end: usize,
222}
223
224/// Parsed image information
225#[derive(Debug, Clone)]
226pub struct ParsedImage<'a> {
227    /// Line number (1-indexed)
228    pub line: usize,
229    /// Start column (0-indexed) in the line
230    pub start_col: usize,
231    /// End column (0-indexed) in the line
232    pub end_col: usize,
233    /// Byte offset in document
234    pub byte_offset: usize,
235    /// End byte offset in document
236    pub byte_end: usize,
237    /// Alt text
238    pub alt_text: Cow<'a, str>,
239    /// Image URL or reference
240    pub url: Cow<'a, str>,
241    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
242    pub is_reference: bool,
243    /// Reference ID for reference images
244    pub reference_id: Option<Cow<'a, str>>,
245    /// Link type from pulldown-cmark
246    pub link_type: LinkType,
247}
248
249/// Reference definition [ref]: url "title"
250#[derive(Debug, Clone)]
251pub struct ReferenceDef {
252    /// Line number (1-indexed)
253    pub line: usize,
254    /// Reference ID (normalized to lowercase)
255    pub id: String,
256    /// URL
257    pub url: String,
258    /// Optional title
259    pub title: Option<String>,
260    /// Byte offset where the reference definition starts
261    pub byte_offset: usize,
262    /// Byte offset where the reference definition ends
263    pub byte_end: usize,
264    /// Byte offset where the title starts (if present, includes quote)
265    pub title_byte_start: Option<usize>,
266    /// Byte offset where the title ends (if present, includes quote)
267    pub title_byte_end: Option<usize>,
268}
269
270/// Parsed code span information
271#[derive(Debug, Clone)]
272pub struct CodeSpan {
273    /// Line number where the code span starts (1-indexed)
274    pub line: usize,
275    /// Line number where the code span ends (1-indexed)
276    pub end_line: usize,
277    /// Start column (0-indexed) in the line
278    pub start_col: usize,
279    /// End column (0-indexed) in the line
280    pub end_col: usize,
281    /// Byte offset in document
282    pub byte_offset: usize,
283    /// End byte offset in document
284    pub byte_end: usize,
285    /// Number of backticks used (1, 2, 3, etc.)
286    pub backtick_count: usize,
287    /// Content inside the code span (without backticks)
288    pub content: String,
289}
290
291/// Parsed math span information (inline $...$ or display $$...$$)
292#[derive(Debug, Clone)]
293pub struct MathSpan {
294    /// Line number where the math span starts (1-indexed)
295    pub line: usize,
296    /// Line number where the math span ends (1-indexed)
297    pub end_line: usize,
298    /// Start column (0-indexed) in the line
299    pub start_col: usize,
300    /// End column (0-indexed) in the line
301    pub end_col: usize,
302    /// Byte offset in document
303    pub byte_offset: usize,
304    /// End byte offset in document
305    pub byte_end: usize,
306    /// Whether this is display math ($$...$$) vs inline ($...$)
307    pub is_display: bool,
308    /// Content inside the math delimiters
309    pub content: String,
310}
311
312/// Information about a heading
313#[derive(Debug, Clone)]
314pub struct HeadingInfo {
315    /// Heading level (1-6 for ATX, 1-2 for Setext)
316    pub level: u8,
317    /// Style of heading
318    pub style: HeadingStyle,
319    /// The heading marker (# characters or underline)
320    pub marker: String,
321    /// Column where the marker starts (0-based)
322    pub marker_column: usize,
323    /// Column where heading text starts
324    pub content_column: usize,
325    /// The heading text (without markers and without custom ID syntax)
326    pub text: String,
327    /// Custom header ID if present (e.g., from {#custom-id} syntax)
328    pub custom_id: Option<String>,
329    /// Original heading text including custom ID syntax
330    pub raw_text: String,
331    /// Whether it has a closing sequence (for ATX)
332    pub has_closing_sequence: bool,
333    /// The closing sequence if present
334    pub closing_sequence: String,
335    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
336    /// False for malformed headings like `#NoSpace` that MD018 should flag
337    pub is_valid: bool,
338}
339
340/// A valid heading from a filtered iteration
341///
342/// Only includes headings that are CommonMark-compliant (have space after #).
343/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
344#[derive(Debug, Clone)]
345pub struct ValidHeading<'a> {
346    /// The 1-indexed line number in the document
347    pub line_num: usize,
348    /// Reference to the heading information
349    pub heading: &'a HeadingInfo,
350    /// Reference to the full line info (for rules that need additional context)
351    pub line_info: &'a LineInfo,
352}
353
354/// Iterator over valid CommonMark headings in a document
355///
356/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
357/// but should not be processed by other heading rules.
358pub struct ValidHeadingsIter<'a> {
359    lines: &'a [LineInfo],
360    current_index: usize,
361}
362
363impl<'a> ValidHeadingsIter<'a> {
364    fn new(lines: &'a [LineInfo]) -> Self {
365        Self {
366            lines,
367            current_index: 0,
368        }
369    }
370}
371
372impl<'a> Iterator for ValidHeadingsIter<'a> {
373    type Item = ValidHeading<'a>;
374
375    fn next(&mut self) -> Option<Self::Item> {
376        while self.current_index < self.lines.len() {
377            let idx = self.current_index;
378            self.current_index += 1;
379
380            let line_info = &self.lines[idx];
381            if let Some(heading) = &line_info.heading
382                && heading.is_valid
383            {
384                return Some(ValidHeading {
385                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
386                    heading,
387                    line_info,
388                });
389            }
390        }
391        None
392    }
393}
394
395/// Information about a blockquote line
396#[derive(Debug, Clone)]
397pub struct BlockquoteInfo {
398    /// Nesting level (1 for >, 2 for >>, etc.)
399    pub nesting_level: usize,
400    /// The indentation before the blockquote marker
401    pub indent: String,
402    /// Column where the first > starts (0-based)
403    pub marker_column: usize,
404    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
405    pub prefix: String,
406    /// Content after the blockquote marker(s)
407    pub content: String,
408    /// Whether the line has no space after the marker
409    pub has_no_space_after_marker: bool,
410    /// Whether the line has multiple spaces after the marker
411    pub has_multiple_spaces_after_marker: bool,
412    /// Whether this is an empty blockquote line needing MD028 fix
413    pub needs_md028_fix: bool,
414}
415
416/// Information about a list block
417#[derive(Debug, Clone)]
418pub struct ListBlock {
419    /// Line number where the list starts (1-indexed)
420    pub start_line: usize,
421    /// Line number where the list ends (1-indexed)
422    pub end_line: usize,
423    /// Whether it's ordered or unordered
424    pub is_ordered: bool,
425    /// The consistent marker for unordered lists (if any)
426    pub marker: Option<String>,
427    /// Blockquote prefix for this list (empty if not in blockquote)
428    pub blockquote_prefix: String,
429    /// Lines that are list items within this block
430    pub item_lines: Vec<usize>,
431    /// Nesting level (0 for top-level lists)
432    pub nesting_level: usize,
433    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
434    pub max_marker_width: usize,
435}
436
437use std::sync::{Arc, OnceLock};
438
439/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
440type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
441
442/// Type alias for byte ranges used in JSX expression and MDX comment detection
443type ByteRanges = Vec<(usize, usize)>;
444
445/// Character frequency data for fast content analysis
446#[derive(Debug, Clone, Default)]
447pub struct CharFrequency {
448    /// Count of # characters (headings)
449    pub hash_count: usize,
450    /// Count of * characters (emphasis, lists, horizontal rules)
451    pub asterisk_count: usize,
452    /// Count of _ characters (emphasis, horizontal rules)
453    pub underscore_count: usize,
454    /// Count of - characters (lists, horizontal rules, setext headings)
455    pub hyphen_count: usize,
456    /// Count of + characters (lists)
457    pub plus_count: usize,
458    /// Count of > characters (blockquotes)
459    pub gt_count: usize,
460    /// Count of | characters (tables)
461    pub pipe_count: usize,
462    /// Count of [ characters (links, images)
463    pub bracket_count: usize,
464    /// Count of ` characters (code spans, code blocks)
465    pub backtick_count: usize,
466    /// Count of < characters (HTML tags, autolinks)
467    pub lt_count: usize,
468    /// Count of ! characters (images)
469    pub exclamation_count: usize,
470    /// Count of newline characters
471    pub newline_count: usize,
472}
473
474/// Pre-parsed HTML tag information
475#[derive(Debug, Clone)]
476pub struct HtmlTag {
477    /// Line number (1-indexed)
478    pub line: usize,
479    /// Start column (0-indexed) in the line
480    pub start_col: usize,
481    /// End column (0-indexed) in the line
482    pub end_col: usize,
483    /// Byte offset in document
484    pub byte_offset: usize,
485    /// End byte offset in document
486    pub byte_end: usize,
487    /// Tag name (e.g., "div", "img", "br")
488    pub tag_name: String,
489    /// Whether it's a closing tag (`</tag>`)
490    pub is_closing: bool,
491    /// Whether it's self-closing (`<tag />`)
492    pub is_self_closing: bool,
493    /// Raw tag content
494    pub raw_content: String,
495}
496
497/// Pre-parsed emphasis span information
498#[derive(Debug, Clone)]
499pub struct EmphasisSpan {
500    /// Line number (1-indexed)
501    pub line: usize,
502    /// Start column (0-indexed) in the line
503    pub start_col: usize,
504    /// End column (0-indexed) in the line
505    pub end_col: usize,
506    /// Byte offset in document
507    pub byte_offset: usize,
508    /// End byte offset in document
509    pub byte_end: usize,
510    /// Type of emphasis ('*' or '_')
511    pub marker: char,
512    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
513    pub marker_count: usize,
514    /// Content inside the emphasis
515    pub content: String,
516}
517
518/// Pre-parsed table row information
519#[derive(Debug, Clone)]
520pub struct TableRow {
521    /// Line number (1-indexed)
522    pub line: usize,
523    /// Whether this is a separator row (contains only |, -, :, and spaces)
524    pub is_separator: bool,
525    /// Number of columns (pipe-separated cells)
526    pub column_count: usize,
527    /// Alignment info from separator row
528    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
529}
530
531/// Pre-parsed bare URL information (not in links)
532#[derive(Debug, Clone)]
533pub struct BareUrl {
534    /// Line number (1-indexed)
535    pub line: usize,
536    /// Start column (0-indexed) in the line
537    pub start_col: usize,
538    /// End column (0-indexed) in the line
539    pub end_col: usize,
540    /// Byte offset in document
541    pub byte_offset: usize,
542    /// End byte offset in document
543    pub byte_end: usize,
544    /// The URL string
545    pub url: String,
546    /// Type of URL ("http", "https", "ftp", "email")
547    pub url_type: String,
548}
549
550pub struct LintContext<'a> {
551    pub content: &'a str,
552    pub line_offsets: Vec<usize>,
553    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
554    pub lines: Vec<LineInfo>,             // Pre-computed line information
555    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
556    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
557    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
558    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
559    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
560    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
561    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
562    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
563    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
564    pub char_frequency: CharFrequency,    // Character frequency analysis
565    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
566    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
567    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
568    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
569    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
570    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
571    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
572    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
573    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
574    pub flavor: MarkdownFlavor,           // Markdown flavor being used
575    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
576    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
577    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
578    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
579    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
580    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
581    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
582}
583
584/// Detailed blockquote parse result with all components
585struct BlockquoteComponents<'a> {
586    indent: &'a str,
587    markers: &'a str,
588    spaces_after: &'a str,
589    content: &'a str,
590}
591
592/// Parse blockquote prefix with detailed components using manual parsing
593#[inline]
594fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
595    let bytes = line.as_bytes();
596    let mut pos = 0;
597
598    // Parse leading whitespace (indent)
599    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
600        pos += 1;
601    }
602    let indent_end = pos;
603
604    // Must have at least one '>' marker
605    if pos >= bytes.len() || bytes[pos] != b'>' {
606        return None;
607    }
608
609    // Parse '>' markers
610    while pos < bytes.len() && bytes[pos] == b'>' {
611        pos += 1;
612    }
613    let markers_end = pos;
614
615    // Parse spaces after markers
616    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
617        pos += 1;
618    }
619    let spaces_end = pos;
620
621    Some(BlockquoteComponents {
622        indent: &line[0..indent_end],
623        markers: &line[indent_end..markers_end],
624        spaces_after: &line[markers_end..spaces_end],
625        content: &line[spaces_end..],
626    })
627}
628
629impl<'a> LintContext<'a> {
630    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
631        #[cfg(not(target_arch = "wasm32"))]
632        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
633        #[cfg(target_arch = "wasm32")]
634        let profile = false;
635
636        let line_offsets = profile_section!("Line offsets", profile, {
637            let mut offsets = vec![0];
638            for (i, c) in content.char_indices() {
639                if c == '\n' {
640                    offsets.push(i + 1);
641                }
642            }
643            offsets
644        });
645
646        // Detect code blocks and code spans once and cache them
647        let (code_blocks, code_span_ranges) = profile_section!(
648            "Code blocks",
649            profile,
650            CodeBlockUtils::detect_code_blocks_and_spans(content)
651        );
652
653        // Pre-compute HTML comment ranges ONCE for all operations
654        let html_comment_ranges = profile_section!(
655            "HTML comment ranges",
656            profile,
657            crate::utils::skip_context::compute_html_comment_ranges(content)
658        );
659
660        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
661        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
662            if flavor == MarkdownFlavor::MkDocs {
663                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
664            } else {
665                Vec::new()
666            }
667        });
668
669        // Pre-compute Quarto div block ranges for Quarto flavor
670        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
671            if flavor == MarkdownFlavor::Quarto {
672                crate::utils::quarto_divs::detect_div_block_ranges(content)
673            } else {
674                Vec::new()
675            }
676        });
677
678        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
679        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
680        let (mut lines, emphasis_spans) = profile_section!(
681            "Basic line info",
682            profile,
683            Self::compute_basic_line_info(
684                content,
685                &line_offsets,
686                &code_blocks,
687                flavor,
688                &html_comment_ranges,
689                &autodoc_ranges,
690                &quarto_div_ranges,
691            )
692        );
693
694        // Detect HTML blocks BEFORE heading detection
695        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
696
697        // Detect ESM import/export blocks in MDX files BEFORE heading detection
698        profile_section!(
699            "ESM blocks",
700            profile,
701            Self::detect_esm_blocks(content, &mut lines, flavor)
702        );
703
704        // Detect JSX expressions and MDX comments in MDX files
705        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
706            "JSX/MDX detection",
707            profile,
708            Self::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
709        );
710
711        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
712        profile_section!(
713            "MkDocs constructs",
714            profile,
715            Self::detect_mkdocs_line_info(content, &mut lines, flavor)
716        );
717
718        // Detect Obsidian comments (%%...%%) in Obsidian flavor
719        let obsidian_comment_ranges = profile_section!(
720            "Obsidian comments",
721            profile,
722            Self::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
723        );
724
725        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
726        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
727
728        // Now detect headings and blockquotes
729        profile_section!(
730            "Headings & blockquotes",
731            profile,
732            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
733        );
734
735        // Parse code spans early so we can exclude them from link/image parsing
736        let code_spans = profile_section!(
737            "Code spans",
738            profile,
739            Self::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
740        );
741
742        // Mark lines that are continuations of multi-line code spans
743        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
744        for span in &code_spans {
745            if span.end_line > span.line {
746                // Mark lines after the first line as continuations
747                for line_num in (span.line + 1)..=span.end_line {
748                    if let Some(line_info) = lines.get_mut(line_num - 1) {
749                        line_info.in_code_span_continuation = true;
750                    }
751                }
752            }
753        }
754
755        // Parse links, images, references, and list blocks
756        let (links, broken_links, footnote_refs) = profile_section!(
757            "Links",
758            profile,
759            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
760        );
761
762        let images = profile_section!(
763            "Images",
764            profile,
765            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
766        );
767
768        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
769
770        // Build O(1) lookup map for reference definitions by lowercase ID
771        let reference_defs_map: HashMap<String, usize> = reference_defs
772            .iter()
773            .enumerate()
774            .map(|(idx, def)| (def.id.to_lowercase(), idx))
775            .collect();
776
777        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
778
779        // Compute character frequency for fast content analysis
780        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
781
782        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
783        let table_blocks = profile_section!(
784            "Table blocks",
785            profile,
786            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
787                content,
788                &code_blocks,
789                &code_spans,
790                &html_comment_ranges,
791            )
792        );
793
794        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
795        let line_index = profile_section!(
796            "Line index",
797            profile,
798            crate::utils::range_utils::LineIndex::new(content)
799        );
800
801        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
802        let jinja_ranges = profile_section!(
803            "Jinja ranges",
804            profile,
805            crate::utils::jinja_utils::find_jinja_ranges(content)
806        );
807
808        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
809        let citation_ranges = profile_section!("Citation ranges", profile, {
810            if flavor == MarkdownFlavor::Quarto {
811                crate::utils::quarto_divs::find_citation_ranges(content)
812            } else {
813                Vec::new()
814            }
815        });
816
817        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
818        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
819            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
820            let mut ranges = Vec::new();
821            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
822                ranges.push((mat.start(), mat.end()));
823            }
824            ranges
825        });
826
827        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
828
829        Self {
830            content,
831            line_offsets,
832            code_blocks,
833            lines,
834            links,
835            images,
836            broken_links,
837            footnote_refs,
838            reference_defs,
839            reference_defs_map,
840            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
841            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
842            list_blocks,
843            char_frequency,
844            html_tags_cache: OnceLock::new(),
845            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
846            table_rows_cache: OnceLock::new(),
847            bare_urls_cache: OnceLock::new(),
848            has_mixed_list_nesting_cache: OnceLock::new(),
849            html_comment_ranges,
850            table_blocks,
851            line_index,
852            jinja_ranges,
853            flavor,
854            source_file,
855            jsx_expression_ranges,
856            mdx_comment_ranges,
857            citation_ranges,
858            shortcode_ranges,
859            inline_config,
860            obsidian_comment_ranges,
861        }
862    }
863
864    /// Check if a rule is disabled at a specific line number (1-indexed)
865    ///
866    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
867    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
868    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
869        self.inline_config.is_rule_disabled(rule_name, line_number)
870    }
871
872    /// Get code spans - computed lazily on first access
873    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
874        Arc::clone(
875            self.code_spans_cache
876                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
877        )
878    }
879
880    /// Get math spans - computed lazily on first access
881    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
882        Arc::clone(
883            self.math_spans_cache
884                .get_or_init(|| Arc::new(Self::parse_math_spans(self.content, &self.lines))),
885        )
886    }
887
888    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
889    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
890        let math_spans = self.math_spans();
891        math_spans
892            .iter()
893            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
894    }
895
896    /// Get HTML comment ranges - pre-computed during LintContext construction
897    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
898        &self.html_comment_ranges
899    }
900
901    /// Get Obsidian comment ranges - pre-computed during LintContext construction
902    /// Returns empty slice for non-Obsidian flavors
903    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
904        &self.obsidian_comment_ranges
905    }
906
907    /// Check if a byte position is inside an Obsidian comment
908    ///
909    /// Returns false for non-Obsidian flavors.
910    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
911        self.obsidian_comment_ranges
912            .iter()
913            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
914    }
915
916    /// Check if a line/column position is inside an Obsidian comment
917    ///
918    /// Line number is 1-indexed, column is 1-indexed.
919    /// Returns false for non-Obsidian flavors.
920    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
921        if self.obsidian_comment_ranges.is_empty() {
922            return false;
923        }
924
925        // Convert line/column (1-indexed, char-based) to byte position
926        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
927        self.is_in_obsidian_comment(byte_pos)
928    }
929
930    /// Get HTML tags - computed lazily on first access
931    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
932        Arc::clone(self.html_tags_cache.get_or_init(|| {
933            Arc::new(Self::parse_html_tags(
934                self.content,
935                &self.lines,
936                &self.code_blocks,
937                self.flavor,
938            ))
939        }))
940    }
941
942    /// Get emphasis spans - pre-computed during construction
943    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
944        Arc::clone(
945            self.emphasis_spans_cache
946                .get()
947                .expect("emphasis_spans_cache initialized during construction"),
948        )
949    }
950
951    /// Get table rows - computed lazily on first access
952    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
953        Arc::clone(
954            self.table_rows_cache
955                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
956        )
957    }
958
959    /// Get bare URLs - computed lazily on first access
960    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
961        Arc::clone(
962            self.bare_urls_cache
963                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
964        )
965    }
966
967    /// Check if document has mixed ordered/unordered list nesting.
968    /// Result is cached after first computation (document-level invariant).
969    /// This is used by MD007 for smart style auto-detection.
970    pub fn has_mixed_list_nesting(&self) -> bool {
971        *self
972            .has_mixed_list_nesting_cache
973            .get_or_init(|| self.compute_mixed_list_nesting())
974    }
975
976    /// Internal computation for mixed list nesting (only called once per LintContext).
977    fn compute_mixed_list_nesting(&self) -> bool {
978        // Track parent list items by their marker position and type
979        // Using marker_column instead of indent because it works correctly
980        // for blockquoted content where indent doesn't account for the prefix
981        // Stack stores: (marker_column, is_ordered)
982        let mut stack: Vec<(usize, bool)> = Vec::new();
983        let mut last_was_blank = false;
984
985        for line_info in &self.lines {
986            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
987            if line_info.in_code_block
988                || line_info.in_front_matter
989                || line_info.in_mkdocstrings
990                || line_info.in_html_comment
991                || line_info.in_esm_block
992            {
993                continue;
994            }
995
996            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
997            if line_info.is_blank {
998                last_was_blank = true;
999                continue;
1000            }
1001
1002            if let Some(list_item) = &line_info.list_item {
1003                // Normalize column 1 to column 0 (consistent with MD007 check function)
1004                let current_pos = if list_item.marker_column == 1 {
1005                    0
1006                } else {
1007                    list_item.marker_column
1008                };
1009
1010                // If there was a blank line and this item is at root level, reset stack
1011                if last_was_blank && current_pos == 0 {
1012                    stack.clear();
1013                }
1014                last_was_blank = false;
1015
1016                // Pop items at same or greater position (they're siblings or deeper, not parents)
1017                while let Some(&(pos, _)) = stack.last() {
1018                    if pos >= current_pos {
1019                        stack.pop();
1020                    } else {
1021                        break;
1022                    }
1023                }
1024
1025                // Check if immediate parent has different type - this is mixed nesting
1026                if let Some(&(_, parent_is_ordered)) = stack.last()
1027                    && parent_is_ordered != list_item.is_ordered
1028                {
1029                    return true; // Found mixed nesting - early exit
1030                }
1031
1032                stack.push((current_pos, list_item.is_ordered));
1033            } else {
1034                // Non-list line (but not blank) - could be paragraph or other content
1035                last_was_blank = false;
1036            }
1037        }
1038
1039        false
1040    }
1041
1042    /// Map a byte offset to (line, column)
1043    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1044        match self.line_offsets.binary_search(&offset) {
1045            Ok(line) => (line + 1, 1),
1046            Err(line) => {
1047                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1048                (line, offset - line_start + 1)
1049            }
1050        }
1051    }
1052
1053    /// Check if a position is within a code block or code span
1054    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1055        // Check code blocks first
1056        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1057            return true;
1058        }
1059
1060        // Check inline code spans (lazy load if needed)
1061        self.code_spans()
1062            .iter()
1063            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
1064    }
1065
1066    /// Get line information by line number (1-indexed)
1067    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1068        if line_num > 0 {
1069            self.lines.get(line_num - 1)
1070        } else {
1071            None
1072        }
1073    }
1074
1075    /// Get byte offset for a line number (1-indexed)
1076    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
1077        self.line_info(line_num).map(|info| info.byte_offset)
1078    }
1079
1080    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1081    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1082        let normalized_id = ref_id.to_lowercase();
1083        self.reference_defs_map
1084            .get(&normalized_id)
1085            .map(|&idx| self.reference_defs[idx].url.as_str())
1086    }
1087
1088    /// Get a reference definition by its ID (O(1) lookup via HashMap)
1089    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
1090        let normalized_id = ref_id.to_lowercase();
1091        self.reference_defs_map
1092            .get(&normalized_id)
1093            .map(|&idx| &self.reference_defs[idx])
1094    }
1095
1096    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
1097    pub fn has_reference_def(&self, ref_id: &str) -> bool {
1098        let normalized_id = ref_id.to_lowercase();
1099        self.reference_defs_map.contains_key(&normalized_id)
1100    }
1101
1102    /// Check if a line is part of a list block
1103    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1104        self.list_blocks
1105            .iter()
1106            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1107    }
1108
1109    /// Get the list block containing a specific line
1110    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
1111        self.list_blocks
1112            .iter()
1113            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
1114    }
1115
1116    // Compatibility methods for DocumentStructure migration
1117
1118    /// Check if a line is within a code block
1119    pub fn is_in_code_block(&self, line_num: usize) -> bool {
1120        if line_num == 0 || line_num > self.lines.len() {
1121            return false;
1122        }
1123        self.lines[line_num - 1].in_code_block
1124    }
1125
1126    /// Check if a line is within front matter
1127    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
1128        if line_num == 0 || line_num > self.lines.len() {
1129            return false;
1130        }
1131        self.lines[line_num - 1].in_front_matter
1132    }
1133
1134    /// Check if a line is within an HTML block
1135    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1136        if line_num == 0 || line_num > self.lines.len() {
1137            return false;
1138        }
1139        self.lines[line_num - 1].in_html_block
1140    }
1141
1142    /// Check if a line and column is within a code span
1143    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1144        if line_num == 0 || line_num > self.lines.len() {
1145            return false;
1146        }
1147
1148        // Use the code spans cache to check
1149        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1150        // Convert col to 0-indexed for comparison
1151        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1152        let code_spans = self.code_spans();
1153        code_spans.iter().any(|span| {
1154            // Check if line is within the span's line range
1155            if line_num < span.line || line_num > span.end_line {
1156                return false;
1157            }
1158
1159            if span.line == span.end_line {
1160                // Single-line span: check column bounds
1161                col_0indexed >= span.start_col && col_0indexed < span.end_col
1162            } else if line_num == span.line {
1163                // First line of multi-line span: anything after start_col is in span
1164                col_0indexed >= span.start_col
1165            } else if line_num == span.end_line {
1166                // Last line of multi-line span: anything before end_col is in span
1167                col_0indexed < span.end_col
1168            } else {
1169                // Middle line of multi-line span: entire line is in span
1170                true
1171            }
1172        })
1173    }
1174
1175    /// Check if a byte offset is within a code span
1176    #[inline]
1177    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1178        let code_spans = self.code_spans();
1179        code_spans
1180            .iter()
1181            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
1182    }
1183
1184    /// Check if a byte position is within a reference definition
1185    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
1186    #[inline]
1187    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1188        self.reference_defs
1189            .iter()
1190            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
1191    }
1192
1193    /// Check if a byte position is within an HTML comment
1194    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
1195    /// where k is the number of HTML comments (typically very small)
1196    #[inline]
1197    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1198        self.html_comment_ranges
1199            .iter()
1200            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1201    }
1202
1203    /// Check if a byte position is within an HTML tag (including multiline tags)
1204    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
1205    #[inline]
1206    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1207        self.html_tags()
1208            .iter()
1209            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
1210    }
1211
1212    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1213    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1214        self.jinja_ranges
1215            .iter()
1216            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1217    }
1218
1219    /// Check if a byte position is within a JSX expression (MDX: {expression})
1220    #[inline]
1221    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1222        self.jsx_expression_ranges
1223            .iter()
1224            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1225    }
1226
1227    /// Check if a byte position is within an MDX comment ({/* ... */})
1228    #[inline]
1229    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1230        self.mdx_comment_ranges
1231            .iter()
1232            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1233    }
1234
1235    /// Get all JSX expression byte ranges
1236    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1237        &self.jsx_expression_ranges
1238    }
1239
1240    /// Get all MDX comment byte ranges
1241    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1242        &self.mdx_comment_ranges
1243    }
1244
1245    /// Check if a byte position is within a Pandoc/Quarto citation (@key or [@key])
1246    /// Only active in Quarto flavor
1247    #[inline]
1248    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1249        self.citation_ranges
1250            .iter()
1251            .any(|range| byte_pos >= range.start && byte_pos < range.end)
1252    }
1253
1254    /// Get all citation byte ranges (Quarto flavor only)
1255    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1256        &self.citation_ranges
1257    }
1258
1259    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
1260    #[inline]
1261    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1262        self.shortcode_ranges
1263            .iter()
1264            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1265    }
1266
1267    /// Get all shortcode byte ranges
1268    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1269        &self.shortcode_ranges
1270    }
1271
1272    /// Check if a byte position is within a link reference definition title
1273    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1274        self.reference_defs.iter().any(|def| {
1275            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1276                byte_pos >= start && byte_pos < end
1277            } else {
1278                false
1279            }
1280        })
1281    }
1282
1283    /// Check if content has any instances of a specific character (fast)
1284    pub fn has_char(&self, ch: char) -> bool {
1285        match ch {
1286            '#' => self.char_frequency.hash_count > 0,
1287            '*' => self.char_frequency.asterisk_count > 0,
1288            '_' => self.char_frequency.underscore_count > 0,
1289            '-' => self.char_frequency.hyphen_count > 0,
1290            '+' => self.char_frequency.plus_count > 0,
1291            '>' => self.char_frequency.gt_count > 0,
1292            '|' => self.char_frequency.pipe_count > 0,
1293            '[' => self.char_frequency.bracket_count > 0,
1294            '`' => self.char_frequency.backtick_count > 0,
1295            '<' => self.char_frequency.lt_count > 0,
1296            '!' => self.char_frequency.exclamation_count > 0,
1297            '\n' => self.char_frequency.newline_count > 0,
1298            _ => self.content.contains(ch), // Fallback for other characters
1299        }
1300    }
1301
1302    /// Get count of a specific character (fast)
1303    pub fn char_count(&self, ch: char) -> usize {
1304        match ch {
1305            '#' => self.char_frequency.hash_count,
1306            '*' => self.char_frequency.asterisk_count,
1307            '_' => self.char_frequency.underscore_count,
1308            '-' => self.char_frequency.hyphen_count,
1309            '+' => self.char_frequency.plus_count,
1310            '>' => self.char_frequency.gt_count,
1311            '|' => self.char_frequency.pipe_count,
1312            '[' => self.char_frequency.bracket_count,
1313            '`' => self.char_frequency.backtick_count,
1314            '<' => self.char_frequency.lt_count,
1315            '!' => self.char_frequency.exclamation_count,
1316            '\n' => self.char_frequency.newline_count,
1317            _ => self.content.matches(ch).count(), // Fallback for other characters
1318        }
1319    }
1320
1321    /// Check if content likely contains headings (fast)
1322    pub fn likely_has_headings(&self) -> bool {
1323        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1324    }
1325
1326    /// Check if content likely contains lists (fast)
1327    pub fn likely_has_lists(&self) -> bool {
1328        self.char_frequency.asterisk_count > 0
1329            || self.char_frequency.hyphen_count > 0
1330            || self.char_frequency.plus_count > 0
1331    }
1332
1333    /// Check if content likely contains emphasis (fast)
1334    pub fn likely_has_emphasis(&self) -> bool {
1335        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1336    }
1337
1338    /// Check if content likely contains tables (fast)
1339    pub fn likely_has_tables(&self) -> bool {
1340        self.char_frequency.pipe_count > 2
1341    }
1342
1343    /// Check if content likely contains blockquotes (fast)
1344    pub fn likely_has_blockquotes(&self) -> bool {
1345        self.char_frequency.gt_count > 0
1346    }
1347
1348    /// Check if content likely contains code (fast)
1349    pub fn likely_has_code(&self) -> bool {
1350        self.char_frequency.backtick_count > 0
1351    }
1352
1353    /// Check if content likely contains links or images (fast)
1354    pub fn likely_has_links_or_images(&self) -> bool {
1355        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1356    }
1357
1358    /// Check if content likely contains HTML (fast)
1359    pub fn likely_has_html(&self) -> bool {
1360        self.char_frequency.lt_count > 0
1361    }
1362
1363    /// Get the blockquote prefix for inserting a blank line at the given line index.
1364    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1365    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1366    /// Returns an empty string if the line is not inside a blockquote.
1367    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1368        if let Some(line_info) = self.lines.get(line_idx)
1369            && let Some(ref bq) = line_info.blockquote
1370        {
1371            bq.prefix.trim_end().to_string()
1372        } else {
1373            String::new()
1374        }
1375    }
1376
1377    /// Get HTML tags on a specific line
1378    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1379        self.html_tags()
1380            .iter()
1381            .filter(|tag| tag.line == line_num)
1382            .cloned()
1383            .collect()
1384    }
1385
1386    /// Get emphasis spans on a specific line
1387    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1388        self.emphasis_spans()
1389            .iter()
1390            .filter(|span| span.line == line_num)
1391            .cloned()
1392            .collect()
1393    }
1394
1395    /// Get table rows on a specific line
1396    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1397        self.table_rows()
1398            .iter()
1399            .filter(|row| row.line == line_num)
1400            .cloned()
1401            .collect()
1402    }
1403
1404    /// Get bare URLs on a specific line
1405    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1406        self.bare_urls()
1407            .iter()
1408            .filter(|url| url.line == line_num)
1409            .cloned()
1410            .collect()
1411    }
1412
1413    /// Find the line index for a given byte offset using binary search.
1414    /// Returns (line_index, line_number, column) where:
1415    /// - line_index is the 0-based index in the lines array
1416    /// - line_number is the 1-based line number
1417    /// - column is the byte offset within that line
1418    #[inline]
1419    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1420        // Binary search to find the line containing this byte offset
1421        let idx = match lines.binary_search_by(|line| {
1422            if byte_offset < line.byte_offset {
1423                std::cmp::Ordering::Greater
1424            } else if byte_offset > line.byte_offset + line.byte_len {
1425                std::cmp::Ordering::Less
1426            } else {
1427                std::cmp::Ordering::Equal
1428            }
1429        }) {
1430            Ok(idx) => idx,
1431            Err(idx) => idx.saturating_sub(1),
1432        };
1433
1434        let line = &lines[idx];
1435        let line_num = idx + 1;
1436        let col = byte_offset.saturating_sub(line.byte_offset);
1437
1438        (idx, line_num, col)
1439    }
1440
1441    /// Check if a byte offset is within a code span using binary search
1442    #[inline]
1443    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1444        // Since spans are sorted by byte_offset, use partition_point for binary search
1445        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1446
1447        // Check the span that starts at or before our offset
1448        if idx > 0 {
1449            let span = &code_spans[idx - 1];
1450            if offset >= span.byte_offset && offset < span.byte_end {
1451                return true;
1452            }
1453        }
1454
1455        false
1456    }
1457
1458    /// Collect byte ranges of all links using pulldown-cmark
1459    /// This is used to skip heading detection for lines that fall within link syntax
1460    /// (e.g., multiline links like `[text](url\n#fragment)`)
1461    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1462        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1463
1464        let mut link_ranges = Vec::new();
1465        let mut options = Options::empty();
1466        options.insert(Options::ENABLE_WIKILINKS);
1467        options.insert(Options::ENABLE_FOOTNOTES);
1468
1469        let parser = Parser::new_ext(content, options).into_offset_iter();
1470        let mut link_stack: Vec<usize> = Vec::new();
1471
1472        for (event, range) in parser {
1473            match event {
1474                Event::Start(Tag::Link { .. }) => {
1475                    link_stack.push(range.start);
1476                }
1477                Event::End(TagEnd::Link) => {
1478                    if let Some(start_pos) = link_stack.pop() {
1479                        link_ranges.push((start_pos, range.end));
1480                    }
1481                }
1482                _ => {}
1483            }
1484        }
1485
1486        link_ranges
1487    }
1488
1489    /// Parse all links in the content
1490    fn parse_links(
1491        content: &'a str,
1492        lines: &[LineInfo],
1493        code_blocks: &[(usize, usize)],
1494        code_spans: &[CodeSpan],
1495        flavor: MarkdownFlavor,
1496        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1497    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1498        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1499        use std::collections::HashSet;
1500
1501        let mut links = Vec::with_capacity(content.len() / 500);
1502        let mut broken_links = Vec::new();
1503        let mut footnote_refs = Vec::new();
1504
1505        // Track byte positions of links found by pulldown-cmark
1506        let mut found_positions = HashSet::new();
1507
1508        // Use pulldown-cmark's streaming parser with BrokenLink callback
1509        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1510        // This automatically handles:
1511        // - Escaped links (won't generate events)
1512        // - Links in code blocks/spans (won't generate Link events)
1513        // - Images (generates Tag::Image instead)
1514        // - Reference resolution (dest_url is already resolved!)
1515        // - Broken references (callback is invoked)
1516        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1517        let mut options = Options::empty();
1518        options.insert(Options::ENABLE_WIKILINKS);
1519        options.insert(Options::ENABLE_FOOTNOTES);
1520
1521        let parser = Parser::new_with_broken_link_callback(
1522            content,
1523            options,
1524            Some(|link: BrokenLink<'_>| {
1525                broken_links.push(BrokenLinkInfo {
1526                    reference: link.reference.to_string(),
1527                    span: link.span.clone(),
1528                });
1529                None
1530            }),
1531        )
1532        .into_offset_iter();
1533
1534        let mut link_stack: Vec<(
1535            usize,
1536            usize,
1537            pulldown_cmark::CowStr<'a>,
1538            LinkType,
1539            pulldown_cmark::CowStr<'a>,
1540        )> = Vec::new();
1541        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1542
1543        for (event, range) in parser {
1544            match event {
1545                Event::Start(Tag::Link {
1546                    link_type,
1547                    dest_url,
1548                    id,
1549                    ..
1550                }) => {
1551                    // Link start - record position, URL, and reference ID
1552                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1553                    text_chunks.clear();
1554                }
1555                Event::Text(text) if !link_stack.is_empty() => {
1556                    // Track text content with its byte range
1557                    text_chunks.push((text.to_string(), range.start, range.end));
1558                }
1559                Event::Code(code) if !link_stack.is_empty() => {
1560                    // Include inline code in link text (with backticks)
1561                    let code_text = format!("`{code}`");
1562                    text_chunks.push((code_text, range.start, range.end));
1563                }
1564                Event::End(TagEnd::Link) => {
1565                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1566                        // Skip if in HTML comment
1567                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1568                            text_chunks.clear();
1569                            continue;
1570                        }
1571
1572                        // Find line and column information
1573                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1574
1575                        // Skip if this link is on a MkDocs snippet line
1576                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1577                            text_chunks.clear();
1578                            continue;
1579                        }
1580
1581                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1582
1583                        let is_reference = matches!(
1584                            link_type,
1585                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1586                        );
1587
1588                        // Extract link text directly from source bytes to preserve escaping
1589                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1590                        let link_text = if matches!(link_type, LinkType::WikiLink { .. }) {
1591                            // WikiLinks: [[destination]] or [[destination|display text]]
1592                            // pulldown-cmark's range excludes the final ]], so standard extraction fails
1593                            // Use accumulated text chunks (from Text events) for accurate text
1594                            if !text_chunks.is_empty() {
1595                                let text: String = text_chunks.iter().map(|(t, _, _)| t.as_str()).collect();
1596                                Cow::Owned(text)
1597                            } else {
1598                                // Fallback: use the URL as text (for simple [[destination]] links)
1599                                Cow::Owned(url.to_string())
1600                            }
1601                        } else if start_pos < content.len() {
1602                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1603
1604                            // Find MATCHING ] by tracking bracket depth for nested brackets
1605                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1606                            // Brackets inside code spans (between backticks) should be ignored
1607                            let mut close_pos = None;
1608                            let mut depth = 0;
1609                            let mut in_code_span = false;
1610
1611                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1612                                // Count preceding backslashes
1613                                let mut backslash_count = 0;
1614                                let mut j = i;
1615                                while j > 0 && link_bytes[j - 1] == b'\\' {
1616                                    backslash_count += 1;
1617                                    j -= 1;
1618                                }
1619                                let is_escaped = backslash_count % 2 != 0;
1620
1621                                // Track code spans - backticks toggle in/out of code
1622                                if byte == b'`' && !is_escaped {
1623                                    in_code_span = !in_code_span;
1624                                }
1625
1626                                // Only count brackets when NOT in a code span
1627                                if !is_escaped && !in_code_span {
1628                                    if byte == b'[' {
1629                                        depth += 1;
1630                                    } else if byte == b']' {
1631                                        if depth == 0 {
1632                                            // Found the matching closing bracket
1633                                            close_pos = Some(i);
1634                                            break;
1635                                        } else {
1636                                            depth -= 1;
1637                                        }
1638                                    }
1639                                }
1640                            }
1641
1642                            if let Some(pos) = close_pos {
1643                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1644                            } else {
1645                                Cow::Borrowed("")
1646                            }
1647                        } else {
1648                            Cow::Borrowed("")
1649                        };
1650
1651                        // For reference links, use the actual reference ID from pulldown-cmark
1652                        let reference_id = if is_reference && !ref_id.is_empty() {
1653                            Some(Cow::Owned(ref_id.to_lowercase()))
1654                        } else if is_reference {
1655                            // For collapsed/shortcut references without explicit ID, use the link text
1656                            Some(Cow::Owned(link_text.to_lowercase()))
1657                        } else {
1658                            None
1659                        };
1660
1661                        // Track this position as found
1662                        found_positions.insert(start_pos);
1663
1664                        links.push(ParsedLink {
1665                            line: line_num,
1666                            start_col: col_start,
1667                            end_col: col_end,
1668                            byte_offset: start_pos,
1669                            byte_end: range.end,
1670                            text: link_text,
1671                            url: Cow::Owned(url.to_string()),
1672                            is_reference,
1673                            reference_id,
1674                            link_type,
1675                        });
1676
1677                        text_chunks.clear();
1678                    }
1679                }
1680                Event::FootnoteReference(footnote_id) => {
1681                    // Capture footnote references like [^1], [^note]
1682                    // Skip if in HTML comment
1683                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1684                        continue;
1685                    }
1686
1687                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1688                    footnote_refs.push(FootnoteRef {
1689                        id: footnote_id.to_string(),
1690                        line: line_num,
1691                        byte_offset: range.start,
1692                        byte_end: range.end,
1693                    });
1694                }
1695                _ => {}
1696            }
1697        }
1698
1699        // Also find undefined references using regex
1700        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1701        // because the reference is undefined
1702        for cap in LINK_PATTERN.captures_iter(content) {
1703            let full_match = cap.get(0).unwrap();
1704            let match_start = full_match.start();
1705            let match_end = full_match.end();
1706
1707            // Skip if this was already found by pulldown-cmark (it's a valid link)
1708            if found_positions.contains(&match_start) {
1709                continue;
1710            }
1711
1712            // Skip if escaped
1713            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1714                continue;
1715            }
1716
1717            // Skip if it's an image
1718            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1719                continue;
1720            }
1721
1722            // Skip if in code block
1723            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1724                continue;
1725            }
1726
1727            // Skip if in code span
1728            if Self::is_offset_in_code_span(code_spans, match_start) {
1729                continue;
1730            }
1731
1732            // Skip if in HTML comment
1733            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1734                continue;
1735            }
1736
1737            // Find line and column information
1738            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1739
1740            // Skip if this link is on a MkDocs snippet line
1741            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1742                continue;
1743            }
1744
1745            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1746
1747            let text = cap.get(1).map_or("", |m| m.as_str());
1748
1749            // Only process reference links (group 6)
1750            if let Some(ref_id) = cap.get(6) {
1751                let ref_id_str = ref_id.as_str();
1752                let normalized_ref = if ref_id_str.is_empty() {
1753                    Cow::Owned(text.to_lowercase()) // Implicit reference
1754                } else {
1755                    Cow::Owned(ref_id_str.to_lowercase())
1756                };
1757
1758                // This is an undefined reference (pulldown-cmark didn't parse it)
1759                links.push(ParsedLink {
1760                    line: line_num,
1761                    start_col: col_start,
1762                    end_col: col_end,
1763                    byte_offset: match_start,
1764                    byte_end: match_end,
1765                    text: Cow::Borrowed(text),
1766                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1767                    is_reference: true,
1768                    reference_id: Some(normalized_ref),
1769                    link_type: LinkType::Reference, // Undefined references are reference-style
1770                });
1771            }
1772        }
1773
1774        (links, broken_links, footnote_refs)
1775    }
1776
1777    /// Parse all images in the content
1778    fn parse_images(
1779        content: &'a str,
1780        lines: &[LineInfo],
1781        code_blocks: &[(usize, usize)],
1782        code_spans: &[CodeSpan],
1783        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1784    ) -> Vec<ParsedImage<'a>> {
1785        use crate::utils::skip_context::is_in_html_comment_ranges;
1786        use std::collections::HashSet;
1787
1788        // Pre-size based on a heuristic: images are less common than links
1789        let mut images = Vec::with_capacity(content.len() / 1000);
1790        let mut found_positions = HashSet::new();
1791
1792        // Use pulldown-cmark for parsing - more accurate and faster
1793        let parser = Parser::new(content).into_offset_iter();
1794        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1795            Vec::new();
1796        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1797
1798        for (event, range) in parser {
1799            match event {
1800                Event::Start(Tag::Image {
1801                    link_type,
1802                    dest_url,
1803                    id,
1804                    ..
1805                }) => {
1806                    image_stack.push((range.start, dest_url, link_type, id));
1807                    text_chunks.clear();
1808                }
1809                Event::Text(text) if !image_stack.is_empty() => {
1810                    text_chunks.push((text.to_string(), range.start, range.end));
1811                }
1812                Event::Code(code) if !image_stack.is_empty() => {
1813                    let code_text = format!("`{code}`");
1814                    text_chunks.push((code_text, range.start, range.end));
1815                }
1816                Event::End(TagEnd::Image) => {
1817                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1818                        // Skip if in code block
1819                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1820                            continue;
1821                        }
1822
1823                        // Skip if in code span
1824                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1825                            continue;
1826                        }
1827
1828                        // Skip if in HTML comment
1829                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1830                            continue;
1831                        }
1832
1833                        // Find line and column using binary search
1834                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1835                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1836
1837                        let is_reference = matches!(
1838                            link_type,
1839                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1840                        );
1841
1842                        // Extract alt text directly from source bytes to preserve escaping
1843                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1844                        let alt_text = if start_pos < content.len() {
1845                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1846
1847                            // Find MATCHING ] by tracking bracket depth for nested brackets
1848                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1849                            let mut close_pos = None;
1850                            let mut depth = 0;
1851
1852                            if image_bytes.len() > 2 {
1853                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1854                                    // Count preceding backslashes
1855                                    let mut backslash_count = 0;
1856                                    let mut j = i;
1857                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1858                                        backslash_count += 1;
1859                                        j -= 1;
1860                                    }
1861                                    let is_escaped = backslash_count % 2 != 0;
1862
1863                                    if !is_escaped {
1864                                        if byte == b'[' {
1865                                            depth += 1;
1866                                        } else if byte == b']' {
1867                                            if depth == 0 {
1868                                                // Found the matching closing bracket
1869                                                close_pos = Some(i);
1870                                                break;
1871                                            } else {
1872                                                depth -= 1;
1873                                            }
1874                                        }
1875                                    }
1876                                }
1877                            }
1878
1879                            if let Some(pos) = close_pos {
1880                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1881                            } else {
1882                                Cow::Borrowed("")
1883                            }
1884                        } else {
1885                            Cow::Borrowed("")
1886                        };
1887
1888                        let reference_id = if is_reference && !ref_id.is_empty() {
1889                            Some(Cow::Owned(ref_id.to_lowercase()))
1890                        } else if is_reference {
1891                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1892                        } else {
1893                            None
1894                        };
1895
1896                        found_positions.insert(start_pos);
1897                        images.push(ParsedImage {
1898                            line: line_num,
1899                            start_col: col_start,
1900                            end_col: col_end,
1901                            byte_offset: start_pos,
1902                            byte_end: range.end,
1903                            alt_text,
1904                            url: Cow::Owned(url.to_string()),
1905                            is_reference,
1906                            reference_id,
1907                            link_type,
1908                        });
1909                    }
1910                }
1911                _ => {}
1912            }
1913        }
1914
1915        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1916        for cap in IMAGE_PATTERN.captures_iter(content) {
1917            let full_match = cap.get(0).unwrap();
1918            let match_start = full_match.start();
1919            let match_end = full_match.end();
1920
1921            // Skip if already found by pulldown-cmark
1922            if found_positions.contains(&match_start) {
1923                continue;
1924            }
1925
1926            // Skip if the ! is escaped
1927            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1928                continue;
1929            }
1930
1931            // Skip if in code block, code span, or HTML comment
1932            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1933                || Self::is_offset_in_code_span(code_spans, match_start)
1934                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1935            {
1936                continue;
1937            }
1938
1939            // Only process reference images (undefined references not found by pulldown-cmark)
1940            if let Some(ref_id) = cap.get(6) {
1941                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1942                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1943                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1944                let ref_id_str = ref_id.as_str();
1945                let normalized_ref = if ref_id_str.is_empty() {
1946                    Cow::Owned(alt_text.to_lowercase())
1947                } else {
1948                    Cow::Owned(ref_id_str.to_lowercase())
1949                };
1950
1951                images.push(ParsedImage {
1952                    line: line_num,
1953                    start_col: col_start,
1954                    end_col: col_end,
1955                    byte_offset: match_start,
1956                    byte_end: match_end,
1957                    alt_text: Cow::Borrowed(alt_text),
1958                    url: Cow::Borrowed(""),
1959                    is_reference: true,
1960                    reference_id: Some(normalized_ref),
1961                    link_type: LinkType::Reference, // Undefined references are reference-style
1962                });
1963            }
1964        }
1965
1966        images
1967    }
1968
1969    /// Parse reference definitions
1970    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1971        // Pre-size based on lines count as reference definitions are line-based
1972        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1973
1974        for (line_idx, line_info) in lines.iter().enumerate() {
1975            // Skip lines in code blocks
1976            if line_info.in_code_block {
1977                continue;
1978            }
1979
1980            let line = line_info.content(content);
1981            let line_num = line_idx + 1;
1982
1983            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1984                let id_raw = cap.get(1).unwrap().as_str();
1985
1986                // Skip footnote definitions - they use [^id]: syntax and are semantically
1987                // different from reference link definitions
1988                if id_raw.starts_with('^') {
1989                    continue;
1990                }
1991
1992                let id = id_raw.to_lowercase();
1993                let url = cap.get(2).unwrap().as_str().to_string();
1994                let title_match = cap.get(3).or_else(|| cap.get(4));
1995                let title = title_match.map(|m| m.as_str().to_string());
1996
1997                // Calculate byte positions
1998                // The match starts at the beginning of the line (0) and extends to the end
1999                let match_obj = cap.get(0).unwrap();
2000                let byte_offset = line_info.byte_offset + match_obj.start();
2001                let byte_end = line_info.byte_offset + match_obj.end();
2002
2003                // Calculate title byte positions (includes the quote character before content)
2004                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
2005                    // The match is the content inside quotes, so we include the quote before
2006                    let start = line_info.byte_offset + m.start().saturating_sub(1);
2007                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
2008                    (Some(start), Some(end))
2009                } else {
2010                    (None, None)
2011                };
2012
2013                refs.push(ReferenceDef {
2014                    line: line_num,
2015                    id,
2016                    url,
2017                    title,
2018                    byte_offset,
2019                    byte_end,
2020                    title_byte_start,
2021                    title_byte_end,
2022                });
2023            }
2024        }
2025
2026        refs
2027    }
2028
2029    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
2030    /// Handles nested blockquotes like `> > > content`
2031    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
2032    #[inline]
2033    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
2034        let trimmed_start = line.trim_start();
2035        if !trimmed_start.starts_with('>') {
2036            return None;
2037        }
2038
2039        // Track total prefix length to handle nested blockquotes
2040        let mut remaining = line;
2041        let mut total_prefix_len = 0;
2042
2043        loop {
2044            let trimmed = remaining.trim_start();
2045            if !trimmed.starts_with('>') {
2046                break;
2047            }
2048
2049            // Add leading whitespace + '>' to prefix
2050            let leading_ws_len = remaining.len() - trimmed.len();
2051            total_prefix_len += leading_ws_len + 1;
2052
2053            let after_gt = &trimmed[1..];
2054
2055            // Handle optional whitespace after '>' (space or tab)
2056            if let Some(stripped) = after_gt.strip_prefix(' ') {
2057                total_prefix_len += 1;
2058                remaining = stripped;
2059            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
2060                total_prefix_len += 1;
2061                remaining = stripped;
2062            } else {
2063                remaining = after_gt;
2064            }
2065        }
2066
2067        Some((&line[..total_prefix_len], remaining))
2068    }
2069
2070    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
2071    ///
2072    /// Returns a HashMap keyed by line byte offset, containing:
2073    /// `(is_ordered, marker, marker_column, content_column, number)`
2074    ///
2075    /// ## Why pulldown-cmark?
2076    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
2077    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
2078    /// This fixes issue #253 where continuation lines were falsely detected.
2079    ///
2080    /// ## Tab indentation quirk
2081    /// Pulldown-cmark reports nested list items at the newline character position
2082    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
2083    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
2084    /// We detect this and advance to the correct line.
2085    ///
2086    /// ## HashMap key strategy
2087    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
2088    /// that resolve to the same line (after newline adjustment). The first event
2089    /// for each line is authoritative.
2090    /// Detect list items and emphasis spans in a single pulldown-cmark pass.
2091    /// Returns both list items (for LineInfo) and emphasis spans (for MD030).
2092    /// This avoids a separate parse for emphasis detection.
2093    fn detect_list_items_and_emphasis_with_pulldown(
2094        content: &str,
2095        line_offsets: &[usize],
2096        flavor: MarkdownFlavor,
2097        front_matter_end: usize,
2098        code_blocks: &[(usize, usize)],
2099    ) -> (ListItemMap, Vec<EmphasisSpan>) {
2100        use std::collections::HashMap;
2101
2102        let mut list_items = HashMap::new();
2103        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
2104
2105        let mut options = Options::empty();
2106        options.insert(Options::ENABLE_TABLES);
2107        options.insert(Options::ENABLE_FOOTNOTES);
2108        options.insert(Options::ENABLE_STRIKETHROUGH);
2109        options.insert(Options::ENABLE_TASKLISTS);
2110        // Always enable GFM features for consistency with existing behavior
2111        options.insert(Options::ENABLE_GFM);
2112
2113        // Suppress unused variable warning
2114        let _ = flavor;
2115
2116        let parser = Parser::new_ext(content, options).into_offset_iter();
2117        let mut list_depth: usize = 0;
2118        let mut list_stack: Vec<bool> = Vec::new();
2119
2120        for (event, range) in parser {
2121            match event {
2122                // Capture emphasis spans (for MD030's emphasis detection)
2123                Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong) => {
2124                    let marker_count = if matches!(event, Event::Start(Tag::Strong)) {
2125                        2
2126                    } else {
2127                        1
2128                    };
2129                    let match_start = range.start;
2130                    let match_end = range.end;
2131
2132                    // Skip if in code block
2133                    if !CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
2134                        // Determine marker character by looking at the content at the start
2135                        let marker = content[match_start..].chars().next().unwrap_or('*');
2136                        if marker == '*' || marker == '_' {
2137                            // Extract content between markers
2138                            let content_start = match_start + marker_count;
2139                            let content_end = if match_end >= marker_count {
2140                                match_end - marker_count
2141                            } else {
2142                                match_end
2143                            };
2144                            let content_part = if content_start < content_end && content_end <= content.len() {
2145                                &content[content_start..content_end]
2146                            } else {
2147                                ""
2148                            };
2149
2150                            // Find which line this emphasis is on using line_offsets
2151                            let line_idx = match line_offsets.binary_search(&match_start) {
2152                                Ok(idx) => idx,
2153                                Err(idx) => idx.saturating_sub(1),
2154                            };
2155                            let line_num = line_idx + 1;
2156                            let line_start = line_offsets.get(line_idx).copied().unwrap_or(0);
2157                            let col_start = match_start - line_start;
2158                            let col_end = match_end - line_start;
2159
2160                            emphasis_spans.push(EmphasisSpan {
2161                                line: line_num,
2162                                start_col: col_start,
2163                                end_col: col_end,
2164                                byte_offset: match_start,
2165                                byte_end: match_end,
2166                                marker,
2167                                marker_count,
2168                                content: content_part.to_string(),
2169                            });
2170                        }
2171                    }
2172                }
2173                Event::Start(Tag::List(start_number)) => {
2174                    list_depth += 1;
2175                    list_stack.push(start_number.is_some());
2176                }
2177                Event::End(TagEnd::List(_)) => {
2178                    list_depth = list_depth.saturating_sub(1);
2179                    list_stack.pop();
2180                }
2181                Event::Start(Tag::Item) if list_depth > 0 => {
2182                    // Get the ordered state for the CURRENT (innermost) list
2183                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
2184                    // Find which line this byte offset corresponds to
2185                    let item_start = range.start;
2186
2187                    // Binary search to find the line number
2188                    let mut line_idx = match line_offsets.binary_search(&item_start) {
2189                        Ok(idx) => idx,
2190                        Err(idx) => idx.saturating_sub(1),
2191                    };
2192
2193                    // Pulldown-cmark reports nested list items at the newline before the item
2194                    // when using tab indentation (e.g., "* Item\n\t- Nested").
2195                    // Advance to the actual content line in this case.
2196                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
2197                        line_idx += 1;
2198                    }
2199
2200                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
2201                    if front_matter_end > 0 && line_idx < front_matter_end {
2202                        continue;
2203                    }
2204
2205                    if line_idx < line_offsets.len() {
2206                        let line_start_byte = line_offsets[line_idx];
2207                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
2208                        let line = &content[line_start_byte..line_end.min(content.len())];
2209
2210                        // Strip trailing newline
2211                        let line = line
2212                            .strip_suffix('\n')
2213                            .or_else(|| line.strip_suffix("\r\n"))
2214                            .unwrap_or(line);
2215
2216                        // Strip blockquote prefix if present
2217                        let blockquote_parse = Self::parse_blockquote_prefix(line);
2218                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
2219                            (prefix.len(), content)
2220                        } else {
2221                            (0, line)
2222                        };
2223
2224                        // Parse the list marker from the actual line
2225                        if current_list_is_ordered {
2226                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
2227                                Self::parse_ordered_list(line_to_parse)
2228                            {
2229                                let marker = format!("{number_str}{delimiter}");
2230                                let marker_column = blockquote_prefix_len + leading_spaces.len();
2231                                let content_column = marker_column + marker.len() + spacing.len();
2232                                let number = number_str.parse().ok();
2233
2234                                list_items.entry(line_start_byte).or_insert((
2235                                    true,
2236                                    marker,
2237                                    marker_column,
2238                                    content_column,
2239                                    number,
2240                                ));
2241                            }
2242                        } else if let Some((leading_spaces, marker, spacing, _content)) =
2243                            Self::parse_unordered_list(line_to_parse)
2244                        {
2245                            let marker_column = blockquote_prefix_len + leading_spaces.len();
2246                            let content_column = marker_column + 1 + spacing.len();
2247
2248                            list_items.entry(line_start_byte).or_insert((
2249                                false,
2250                                marker.to_string(),
2251                                marker_column,
2252                                content_column,
2253                                None,
2254                            ));
2255                        }
2256                    }
2257                }
2258                _ => {}
2259            }
2260        }
2261
2262        (list_items, emphasis_spans)
2263    }
2264
2265    /// Fast unordered list parser - replaces regex for 5-10x speedup
2266    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
2267    /// Returns: Some((leading_ws, marker, spacing, content)) or None
2268    #[inline]
2269    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
2270        let bytes = line.as_bytes();
2271        let mut i = 0;
2272
2273        // Skip leading whitespace
2274        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2275            i += 1;
2276        }
2277
2278        // Check for marker
2279        if i >= bytes.len() {
2280            return None;
2281        }
2282        let marker = bytes[i] as char;
2283        if marker != '-' && marker != '*' && marker != '+' {
2284            return None;
2285        }
2286        let marker_pos = i;
2287        i += 1;
2288
2289        // Collect spacing after marker (space or tab only)
2290        let spacing_start = i;
2291        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2292            i += 1;
2293        }
2294
2295        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
2296    }
2297
2298    /// Fast ordered list parser - replaces regex for 5-10x speedup
2299    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
2300    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
2301    #[inline]
2302    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
2303        let bytes = line.as_bytes();
2304        let mut i = 0;
2305
2306        // Skip leading whitespace
2307        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2308            i += 1;
2309        }
2310
2311        // Collect digits
2312        let number_start = i;
2313        while i < bytes.len() && bytes[i].is_ascii_digit() {
2314            i += 1;
2315        }
2316        if i == number_start {
2317            return None; // No digits found
2318        }
2319
2320        // Check for delimiter
2321        if i >= bytes.len() {
2322            return None;
2323        }
2324        let delimiter = bytes[i] as char;
2325        if delimiter != '.' && delimiter != ')' {
2326            return None;
2327        }
2328        let delimiter_pos = i;
2329        i += 1;
2330
2331        // Collect spacing after delimiter (space or tab only)
2332        let spacing_start = i;
2333        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
2334            i += 1;
2335        }
2336
2337        Some((
2338            &line[..number_start],
2339            &line[number_start..delimiter_pos],
2340            delimiter,
2341            &line[spacing_start..i],
2342            &line[i..],
2343        ))
2344    }
2345
2346    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
2347    /// Returns a Vec<bool> where index i indicates if line i is in a code block
2348    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
2349        let num_lines = line_offsets.len();
2350        let mut in_code_block = vec![false; num_lines];
2351
2352        // For each code block, mark all lines within it
2353        for &(start, end) in code_blocks {
2354            // Ensure we're at valid UTF-8 boundaries
2355            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2356                let mut boundary = start;
2357                while boundary > 0 && !content.is_char_boundary(boundary) {
2358                    boundary -= 1;
2359                }
2360                boundary
2361            } else {
2362                start
2363            };
2364
2365            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2366                let mut boundary = end;
2367                while boundary < content.len() && !content.is_char_boundary(boundary) {
2368                    boundary += 1;
2369                }
2370                boundary
2371            } else {
2372                end.min(content.len())
2373            };
2374
2375            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2376            // That function now has proper list context awareness (see code_block_utils.rs)
2377            // and correctly distinguishes between:
2378            // - Fenced code blocks (``` or ~~~)
2379            // - Indented code blocks at document level (4 spaces + blank line before)
2380            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2381            //
2382            // We no longer need to re-validate here. The original validation logic
2383            // was causing false positives by marking list continuation paragraphs as
2384            // code blocks when they have 4 spaces of indentation.
2385
2386            // Use binary search to find the first and last line indices
2387            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2388            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2389            //
2390            // Find the line that CONTAINS safe_start: the line with the largest
2391            // start offset that is <= safe_start. partition_point gives us the
2392            // first line that starts AFTER safe_start, so we subtract 1.
2393            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2394            let first_line = first_line_after.saturating_sub(1);
2395            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2396
2397            // Mark all lines in the range at once
2398            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2399                *flag = true;
2400            }
2401        }
2402
2403        in_code_block
2404    }
2405
2406    /// Pre-compute which lines are inside math blocks ($$ ... $$) - O(n) single pass
2407    /// Returns a Vec<bool> where index i indicates if line i is in a math block
2408    fn compute_math_block_line_map(content: &str, code_block_map: &[bool]) -> Vec<bool> {
2409        let content_lines: Vec<&str> = content.lines().collect();
2410        let num_lines = content_lines.len();
2411        let mut in_math_block = vec![false; num_lines];
2412
2413        let mut inside_math = false;
2414
2415        for (i, line) in content_lines.iter().enumerate() {
2416            // Skip lines that are in code blocks - math delimiters inside code are literal
2417            if code_block_map.get(i).copied().unwrap_or(false) {
2418                continue;
2419            }
2420
2421            let trimmed = line.trim();
2422
2423            // Check for math block delimiter ($$)
2424            // A line with just $$ toggles the math block state
2425            if trimmed == "$$" {
2426                if inside_math {
2427                    // Closing delimiter - this line is still part of the math block
2428                    in_math_block[i] = true;
2429                    inside_math = false;
2430                } else {
2431                    // Opening delimiter - this line starts the math block
2432                    in_math_block[i] = true;
2433                    inside_math = true;
2434                }
2435            } else if inside_math {
2436                // Content inside math block
2437                in_math_block[i] = true;
2438            }
2439        }
2440
2441        in_math_block
2442    }
2443
2444    /// Pre-compute basic line information (without headings/blockquotes)
2445    /// Also returns emphasis spans detected during the pulldown-cmark parse
2446    fn compute_basic_line_info(
2447        content: &str,
2448        line_offsets: &[usize],
2449        code_blocks: &[(usize, usize)],
2450        flavor: MarkdownFlavor,
2451        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2452        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2453        quarto_div_ranges: &[crate::utils::skip_context::ByteRange],
2454    ) -> (Vec<LineInfo>, Vec<EmphasisSpan>) {
2455        let content_lines: Vec<&str> = content.lines().collect();
2456        let mut lines = Vec::with_capacity(content_lines.len());
2457
2458        // Pre-compute which lines are in code blocks
2459        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2460
2461        // Pre-compute which lines are in math blocks ($$ ... $$)
2462        let math_block_map = Self::compute_math_block_line_map(content, &code_block_map);
2463
2464        // Detect front matter boundaries FIRST, before any other parsing
2465        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2466        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2467
2468        // Use pulldown-cmark to detect list items AND emphasis spans in a single pass
2469        // (context-aware, eliminates false positives)
2470        let (list_item_map, emphasis_spans) = Self::detect_list_items_and_emphasis_with_pulldown(
2471            content,
2472            line_offsets,
2473            flavor,
2474            front_matter_end,
2475            code_blocks,
2476        );
2477
2478        for (i, line) in content_lines.iter().enumerate() {
2479            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2480            let indent = line.len() - line.trim_start().len();
2481            // Compute visual indent with proper CommonMark tab expansion
2482            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2483
2484            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2485            let blockquote_parse = Self::parse_blockquote_prefix(line);
2486
2487            // For blank detection, consider blockquote context
2488            let is_blank = if let Some((_, content)) = blockquote_parse {
2489                // In blockquote context, check if content after prefix is blank
2490                content.trim().is_empty()
2491            } else {
2492                line.trim().is_empty()
2493            };
2494
2495            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2496            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2497
2498            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2499            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2500                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2501            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2502            // This ensures content after `-->` on the same line is not incorrectly skipped
2503            let line_end_offset = byte_offset + line.len();
2504            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2505                html_comment_ranges,
2506                byte_offset,
2507                line_end_offset,
2508            );
2509            // Use pulldown-cmark's list detection for context-aware parsing
2510            // This eliminates false positives on continuation lines (issue #253)
2511            let list_item =
2512                list_item_map
2513                    .get(&byte_offset)
2514                    .map(
2515                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2516                            marker: marker.clone(),
2517                            is_ordered: *is_ordered,
2518                            number: *number,
2519                            marker_column: *marker_column,
2520                            content_column: *content_column,
2521                        },
2522                    );
2523
2524            // Detect horizontal rules (only outside code blocks and frontmatter)
2525            // Uses CommonMark-compliant check including leading indentation validation
2526            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2527            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2528
2529            // Get math block status for this line
2530            let in_math_block = math_block_map.get(i).copied().unwrap_or(false);
2531
2532            // Check if line is inside a Quarto div block
2533            let in_quarto_div = flavor == MarkdownFlavor::Quarto
2534                && crate::utils::quarto_divs::is_within_div_block_ranges(quarto_div_ranges, byte_offset);
2535
2536            lines.push(LineInfo {
2537                byte_offset,
2538                byte_len: line.len(),
2539                indent,
2540                visual_indent,
2541                is_blank,
2542                in_code_block,
2543                in_front_matter,
2544                in_html_block: false, // Will be populated after line creation
2545                in_html_comment,
2546                list_item,
2547                heading: None,    // Will be populated in second pass for Setext headings
2548                blockquote: None, // Will be populated after line creation
2549                in_mkdocstrings,
2550                in_esm_block: false, // Will be populated after line creation for MDX files
2551                in_code_span_continuation: false, // Will be populated after code spans are parsed
2552                is_horizontal_rule: is_hr,
2553                in_math_block,
2554                in_quarto_div,
2555                in_jsx_expression: false,       // Will be populated for MDX files
2556                in_mdx_comment: false,          // Will be populated for MDX files
2557                in_jsx_component: false,        // Will be populated for MDX files
2558                in_jsx_fragment: false,         // Will be populated for MDX files
2559                in_admonition: false,           // Will be populated for MkDocs files
2560                in_content_tab: false,          // Will be populated for MkDocs files
2561                in_mkdocs_html_markdown: false, // Will be populated for MkDocs files
2562                in_definition_list: false,      // Will be populated for MkDocs files
2563                in_obsidian_comment: false,     // Will be populated for Obsidian files
2564            });
2565        }
2566
2567        (lines, emphasis_spans)
2568    }
2569
2570    /// Detect headings and blockquotes (called after HTML block detection)
2571    fn detect_headings_and_blockquotes(
2572        content: &str,
2573        lines: &mut [LineInfo],
2574        flavor: MarkdownFlavor,
2575        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2576        link_byte_ranges: &[(usize, usize)],
2577    ) {
2578        // Regex for heading detection
2579        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2580            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2581        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2582            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2583
2584        let content_lines: Vec<&str> = content.lines().collect();
2585
2586        // Detect front matter boundaries to skip those lines
2587        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2588
2589        // Detect headings (including Setext which needs look-ahead) and blockquotes
2590        for i in 0..lines.len() {
2591            let line = content_lines[i];
2592
2593            // Detect blockquotes FIRST, before any skip conditions.
2594            // A line can be both a blockquote AND contain a code block inside it.
2595            // We need to know about the blockquote marker regardless of code block status.
2596            // Skip only frontmatter lines - those are never blockquotes.
2597            if !(front_matter_end > 0 && i < front_matter_end)
2598                && let Some(bq) = parse_blockquote_detailed(line)
2599            {
2600                let nesting_level = bq.markers.len();
2601                let marker_column = bq.indent.len();
2602                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2603                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2604                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2605                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2606
2607                lines[i].blockquote = Some(BlockquoteInfo {
2608                    nesting_level,
2609                    indent: bq.indent.to_string(),
2610                    marker_column,
2611                    prefix,
2612                    content: bq.content.to_string(),
2613                    has_no_space_after_marker: has_no_space,
2614                    has_multiple_spaces_after_marker: has_multiple_spaces,
2615                    needs_md028_fix,
2616                });
2617
2618                // Update is_horizontal_rule for blockquote content
2619                // The original detection doesn't strip blockquote prefix, so we need to check here
2620                if !lines[i].in_code_block && is_horizontal_rule_content(bq.content.trim()) {
2621                    lines[i].is_horizontal_rule = true;
2622                }
2623            }
2624
2625            // Now apply skip conditions for heading detection
2626            if lines[i].in_code_block {
2627                continue;
2628            }
2629
2630            // Skip lines in front matter
2631            if front_matter_end > 0 && i < front_matter_end {
2632                continue;
2633            }
2634
2635            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2636            if lines[i].in_html_block {
2637                continue;
2638            }
2639
2640            // Skip heading detection for blank lines
2641            if lines[i].is_blank {
2642                continue;
2643            }
2644
2645            // Check for ATX headings (but skip MkDocs snippet lines)
2646            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2647            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2648                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2649                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2650            } else {
2651                false
2652            };
2653
2654            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2655                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2656                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2657                    continue;
2658                }
2659                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2660                // This prevents false positives where `#fragment` is detected as a heading
2661                let line_offset = lines[i].byte_offset;
2662                if link_byte_ranges
2663                    .iter()
2664                    .any(|&(start, end)| line_offset > start && line_offset < end)
2665                {
2666                    continue;
2667                }
2668                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2669                let hashes = caps.get(2).map_or("", |m| m.as_str());
2670                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2671                let rest = caps.get(4).map_or("", |m| m.as_str());
2672
2673                let level = hashes.len() as u8;
2674                let marker_column = leading_spaces.len();
2675
2676                // Check for closing sequence, but handle custom IDs that might come after
2677                let (text, has_closing, closing_seq) = {
2678                    // First check if there's a custom ID at the end
2679                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2680                        // Check if this looks like a valid custom ID (ends with })
2681                        if rest[id_start..].trim_end().ends_with('}') {
2682                            // Split off the custom ID
2683                            (&rest[..id_start], &rest[id_start..])
2684                        } else {
2685                            (rest, "")
2686                        }
2687                    } else {
2688                        (rest, "")
2689                    };
2690
2691                    // Now look for closing hashes in the part before the custom ID
2692                    let trimmed_rest = rest_without_id.trim_end();
2693                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2694                        // Find the start of the hash sequence by walking backwards
2695                        // Use char_indices to get byte positions at char boundaries
2696                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2697
2698                        // Find which char index corresponds to last_hash_byte_pos
2699                        let last_hash_char_idx = char_positions
2700                            .iter()
2701                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2702
2703                        if let Some(mut char_idx) = last_hash_char_idx {
2704                            // Walk backwards to find start of hash sequence
2705                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2706                                char_idx -= 1;
2707                            }
2708
2709                            // Get the byte position of the start of hashes
2710                            let start_of_hashes = char_positions[char_idx].0;
2711
2712                            // Check if there's at least one space before the closing hashes
2713                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2714
2715                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2716                            let potential_closing = &trimmed_rest[start_of_hashes..];
2717                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2718
2719                            if is_all_hashes && has_space_before {
2720                                // This is a closing sequence
2721                                let closing_hashes = potential_closing.to_string();
2722                                // The text is everything before the closing hashes
2723                                // Don't include the custom ID here - it will be extracted later
2724                                let text_part = if !custom_id_part.is_empty() {
2725                                    // If we have a custom ID, append it back to get the full rest
2726                                    // This allows the extract_header_id function to handle it properly
2727                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2728                                } else {
2729                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2730                                };
2731                                (text_part, true, closing_hashes)
2732                            } else {
2733                                // Not a valid closing sequence, return the full content
2734                                (rest.to_string(), false, String::new())
2735                            }
2736                        } else {
2737                            // Couldn't find char boundary, return the full content
2738                            (rest.to_string(), false, String::new())
2739                        }
2740                    } else {
2741                        // No hashes found, return the full content
2742                        (rest.to_string(), false, String::new())
2743                    }
2744                };
2745
2746                let content_column = marker_column + hashes.len() + spaces_after.len();
2747
2748                // Extract custom header ID if present
2749                let raw_text = text.trim().to_string();
2750                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2751
2752                // If no custom ID was found on the header line, check the next line for standalone attr-list
2753                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2754                    let next_line = content_lines[i + 1];
2755                    if !lines[i + 1].in_code_block
2756                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2757                        && let Some(next_line_id) =
2758                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2759                    {
2760                        custom_id = Some(next_line_id);
2761                    }
2762                }
2763
2764                // ATX heading is "valid" for processing by heading rules if:
2765                // 1. Has space after # (CommonMark compliant): `# Heading`
2766                // 2. Is empty (just hashes): `#`
2767                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2768                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2769                //
2770                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2771                // - `#tag` - single # with lowercase (social hashtag)
2772                // - `#123` - single # with number (GitHub issue ref)
2773                let is_valid = !spaces_after.is_empty()
2774                    || rest.is_empty()
2775                    || level > 1
2776                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2777
2778                lines[i].heading = Some(HeadingInfo {
2779                    level,
2780                    style: HeadingStyle::ATX,
2781                    marker: hashes.to_string(),
2782                    marker_column,
2783                    content_column,
2784                    text: clean_text,
2785                    custom_id,
2786                    raw_text,
2787                    has_closing_sequence: has_closing,
2788                    closing_sequence: closing_seq,
2789                    is_valid,
2790                });
2791            }
2792            // Check for Setext headings (need to look at next line)
2793            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2794                let next_line = content_lines[i + 1];
2795                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2796                    // Skip if next line is front matter delimiter
2797                    if front_matter_end > 0 && i < front_matter_end {
2798                        continue;
2799                    }
2800
2801                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2802                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2803                    {
2804                        continue;
2805                    }
2806
2807                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2808                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2809                    let content_line = line.trim();
2810
2811                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2812                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2813                        continue;
2814                    }
2815
2816                    // Skip underscore thematic breaks (___)
2817                    if content_line.starts_with('_') {
2818                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2819                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2820                            continue;
2821                        }
2822                    }
2823
2824                    // Skip numbered lists (1. Item, 2. Item, etc.)
2825                    if let Some(first_char) = content_line.chars().next()
2826                        && first_char.is_ascii_digit()
2827                    {
2828                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2829                        if num_end < content_line.len() {
2830                            let next = content_line.chars().nth(num_end);
2831                            if next == Some('.') || next == Some(')') {
2832                                continue;
2833                            }
2834                        }
2835                    }
2836
2837                    // Skip ATX headings
2838                    if ATX_HEADING_REGEX.is_match(line) {
2839                        continue;
2840                    }
2841
2842                    // Skip blockquotes
2843                    if content_line.starts_with('>') {
2844                        continue;
2845                    }
2846
2847                    // Skip code fences
2848                    let trimmed_start = line.trim_start();
2849                    if trimmed_start.len() >= 3 {
2850                        let first_three: String = trimmed_start.chars().take(3).collect();
2851                        if first_three == "```" || first_three == "~~~" {
2852                            continue;
2853                        }
2854                    }
2855
2856                    // Skip HTML blocks
2857                    if content_line.starts_with('<') {
2858                        continue;
2859                    }
2860
2861                    let underline = next_line.trim();
2862
2863                    let level = if underline.starts_with('=') { 1 } else { 2 };
2864                    let style = if level == 1 {
2865                        HeadingStyle::Setext1
2866                    } else {
2867                        HeadingStyle::Setext2
2868                    };
2869
2870                    // Extract custom header ID if present
2871                    let raw_text = line.trim().to_string();
2872                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2873
2874                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2875                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2876                        let attr_line = content_lines[i + 2];
2877                        if !lines[i + 2].in_code_block
2878                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2879                            && let Some(attr_line_id) =
2880                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2881                        {
2882                            custom_id = Some(attr_line_id);
2883                        }
2884                    }
2885
2886                    lines[i].heading = Some(HeadingInfo {
2887                        level,
2888                        style,
2889                        marker: underline.to_string(),
2890                        marker_column: next_line.len() - next_line.trim_start().len(),
2891                        content_column: lines[i].indent,
2892                        text: clean_text,
2893                        custom_id,
2894                        raw_text,
2895                        has_closing_sequence: false,
2896                        closing_sequence: String::new(),
2897                        is_valid: true, // Setext headings are always valid
2898                    });
2899                }
2900            }
2901        }
2902    }
2903
2904    /// Detect HTML blocks in the content
2905    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2906        // HTML block elements that trigger block context
2907        // Includes HTML5 media, embedded content, and interactive elements
2908        const BLOCK_ELEMENTS: &[&str] = &[
2909            "address",
2910            "article",
2911            "aside",
2912            "audio",
2913            "blockquote",
2914            "canvas",
2915            "details",
2916            "dialog",
2917            "dd",
2918            "div",
2919            "dl",
2920            "dt",
2921            "embed",
2922            "fieldset",
2923            "figcaption",
2924            "figure",
2925            "footer",
2926            "form",
2927            "h1",
2928            "h2",
2929            "h3",
2930            "h4",
2931            "h5",
2932            "h6",
2933            "header",
2934            "hr",
2935            "iframe",
2936            "li",
2937            "main",
2938            "menu",
2939            "nav",
2940            "noscript",
2941            "object",
2942            "ol",
2943            "p",
2944            "picture",
2945            "pre",
2946            "script",
2947            "search",
2948            "section",
2949            "source",
2950            "style",
2951            "summary",
2952            "svg",
2953            "table",
2954            "tbody",
2955            "td",
2956            "template",
2957            "textarea",
2958            "tfoot",
2959            "th",
2960            "thead",
2961            "tr",
2962            "track",
2963            "ul",
2964            "video",
2965        ];
2966
2967        let mut i = 0;
2968        while i < lines.len() {
2969            // Skip if already in code block or front matter
2970            if lines[i].in_code_block || lines[i].in_front_matter {
2971                i += 1;
2972                continue;
2973            }
2974
2975            let trimmed = lines[i].content(content).trim_start();
2976
2977            // Check if line starts with an HTML tag
2978            if trimmed.starts_with('<') && trimmed.len() > 1 {
2979                // Extract tag name safely
2980                let after_bracket = &trimmed[1..];
2981                let is_closing = after_bracket.starts_with('/');
2982                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2983
2984                // Extract tag name (stop at space, >, /, or end of string)
2985                let tag_name = tag_start
2986                    .chars()
2987                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2988                    .collect::<String>()
2989                    .to_lowercase();
2990
2991                // Check if it's a block element
2992                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2993                    // Mark this line as in HTML block
2994                    lines[i].in_html_block = true;
2995
2996                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2997                    // This avoids complex nesting logic that might cause infinite loops
2998                    // Only search for closing tag on subsequent lines if the opening tag
2999                    // does NOT have its closing tag on the same line
3000                    if !is_closing {
3001                        let closing_tag = format!("</{tag_name}>");
3002
3003                        // Check if closing tag is on the same line as opening tag
3004                        // (e.g., <script src="..."></script> or <style>.class{}</style>)
3005                        let same_line_close = lines[i].content(content).contains(&closing_tag);
3006
3007                        // Only search subsequent lines if the tag isn't self-closed on this line
3008                        if !same_line_close {
3009                            // style and script tags can contain blank lines (CSS/JS formatting)
3010                            let allow_blank_lines = tag_name == "style" || tag_name == "script";
3011                            let mut j = i + 1;
3012                            let mut found_closing_tag = false;
3013                            while j < lines.len() && j < i + 100 {
3014                                // Limit search to 100 lines
3015                                // Stop at blank lines (except for style/script tags)
3016                                if !allow_blank_lines && lines[j].is_blank {
3017                                    break;
3018                                }
3019
3020                                lines[j].in_html_block = true;
3021
3022                                // Check if this line contains the closing tag
3023                                if lines[j].content(content).contains(&closing_tag) {
3024                                    found_closing_tag = true;
3025                                }
3026
3027                                // After finding closing tag, continue marking lines as
3028                                // in_html_block until blank line (per CommonMark spec)
3029                                if found_closing_tag {
3030                                    j += 1;
3031                                    // Continue marking subsequent lines until blank
3032                                    while j < lines.len() && j < i + 100 {
3033                                        if lines[j].is_blank {
3034                                            break;
3035                                        }
3036                                        lines[j].in_html_block = true;
3037                                        j += 1;
3038                                    }
3039                                    break;
3040                                }
3041                                j += 1;
3042                            }
3043                        }
3044                    }
3045                }
3046            }
3047
3048            i += 1;
3049        }
3050    }
3051
3052    /// Detect ESM import/export blocks anywhere in MDX files
3053    /// MDX 2.0+ allows imports/exports anywhere in the document, not just at the top
3054    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3055        // Only process MDX files
3056        if !flavor.supports_esm_blocks() {
3057            return;
3058        }
3059
3060        let mut in_multiline_import = false;
3061
3062        for line in lines.iter_mut() {
3063            // Skip code blocks, front matter, and HTML comments
3064            if line.in_code_block || line.in_front_matter || line.in_html_comment {
3065                in_multiline_import = false;
3066                continue;
3067            }
3068
3069            let line_content = line.content(content);
3070            let trimmed = line_content.trim();
3071
3072            // Handle continuation of multi-line import/export
3073            if in_multiline_import {
3074                line.in_esm_block = true;
3075                // Check if this line completes the statement
3076                // Multi-line import ends when we see the closing quote + optional semicolon
3077                if trimmed.ends_with('\'')
3078                    || trimmed.ends_with('"')
3079                    || trimmed.ends_with("';")
3080                    || trimmed.ends_with("\";")
3081                    || line_content.contains(';')
3082                {
3083                    in_multiline_import = false;
3084                }
3085                continue;
3086            }
3087
3088            // Skip blank lines
3089            if line.is_blank {
3090                continue;
3091            }
3092
3093            // Check if line starts with import or export
3094            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
3095                line.in_esm_block = true;
3096
3097                // Determine if this is a complete single-line statement or starts a multi-line one
3098                // Multi-line imports look like:
3099                //   import {
3100                //     Foo,
3101                //     Bar
3102                //   } from 'module'
3103                // Single-line imports/exports end with a quote, semicolon, or are simple exports
3104                let is_import = trimmed.starts_with("import ");
3105
3106                // Check for simple complete statements
3107                let is_complete =
3108                    // Ends with semicolon
3109                    trimmed.ends_with(';')
3110                    // import/export with from clause that ends with quote
3111                    || (trimmed.contains(" from ") && (trimmed.ends_with('\'') || trimmed.ends_with('"')))
3112                    // Simple export (export const/let/var/function/class without from)
3113                    || (!is_import && !trimmed.contains(" from ") && (
3114                        trimmed.starts_with("export const ")
3115                        || trimmed.starts_with("export let ")
3116                        || trimmed.starts_with("export var ")
3117                        || trimmed.starts_with("export function ")
3118                        || trimmed.starts_with("export class ")
3119                        || trimmed.starts_with("export default ")
3120                    ));
3121
3122                if !is_complete && is_import {
3123                    // Only imports can span multiple lines in the typical case
3124                    // Check if it looks like the start of a multi-line import
3125                    // e.g., "import {" or "import type {"
3126                    if trimmed.contains('{') && !trimmed.contains('}') {
3127                        in_multiline_import = true;
3128                    }
3129                }
3130            }
3131        }
3132    }
3133
3134    /// Detect JSX expressions {expression} and MDX comments {/* comment */} in MDX files
3135    /// Returns (jsx_expression_ranges, mdx_comment_ranges)
3136    fn detect_jsx_and_mdx_comments(
3137        content: &str,
3138        lines: &mut [LineInfo],
3139        flavor: MarkdownFlavor,
3140        code_blocks: &[(usize, usize)],
3141    ) -> (ByteRanges, ByteRanges) {
3142        // Only process MDX files
3143        if !flavor.supports_jsx() {
3144            return (Vec::new(), Vec::new());
3145        }
3146
3147        let mut jsx_expression_ranges: Vec<(usize, usize)> = Vec::new();
3148        let mut mdx_comment_ranges: Vec<(usize, usize)> = Vec::new();
3149
3150        // Quick check - if no braces, no JSX expressions or MDX comments
3151        if !content.contains('{') {
3152            return (jsx_expression_ranges, mdx_comment_ranges);
3153        }
3154
3155        let bytes = content.as_bytes();
3156        let mut i = 0;
3157
3158        while i < bytes.len() {
3159            if bytes[i] == b'{' {
3160                // Check if we're in a code block
3161                if code_blocks.iter().any(|(start, end)| i >= *start && i < *end) {
3162                    i += 1;
3163                    continue;
3164                }
3165
3166                let start = i;
3167
3168                // Check if it's an MDX comment: {/* ... */}
3169                if i + 2 < bytes.len() && &bytes[i + 1..i + 3] == b"/*" {
3170                    // Find the closing */}
3171                    let mut j = i + 3;
3172                    while j + 2 < bytes.len() {
3173                        if &bytes[j..j + 2] == b"*/" && j + 2 < bytes.len() && bytes[j + 2] == b'}' {
3174                            let end = j + 3;
3175                            mdx_comment_ranges.push((start, end));
3176
3177                            // Mark lines as in MDX comment
3178                            Self::mark_lines_in_range(lines, content, start, end, |line| {
3179                                line.in_mdx_comment = true;
3180                            });
3181
3182                            i = end;
3183                            break;
3184                        }
3185                        j += 1;
3186                    }
3187                    if j + 2 >= bytes.len() {
3188                        // Unclosed MDX comment - mark rest as comment
3189                        mdx_comment_ranges.push((start, bytes.len()));
3190                        Self::mark_lines_in_range(lines, content, start, bytes.len(), |line| {
3191                            line.in_mdx_comment = true;
3192                        });
3193                        break;
3194                    }
3195                } else {
3196                    // Regular JSX expression: { ... }
3197                    // Need to handle nested braces
3198                    let mut brace_depth = 1;
3199                    let mut j = i + 1;
3200                    let mut in_string = false;
3201                    let mut string_char = b'"';
3202
3203                    while j < bytes.len() && brace_depth > 0 {
3204                        let c = bytes[j];
3205
3206                        // Handle strings to avoid counting braces inside them
3207                        if !in_string && (c == b'"' || c == b'\'' || c == b'`') {
3208                            in_string = true;
3209                            string_char = c;
3210                        } else if in_string && c == string_char && (j == 0 || bytes[j - 1] != b'\\') {
3211                            in_string = false;
3212                        } else if !in_string {
3213                            if c == b'{' {
3214                                brace_depth += 1;
3215                            } else if c == b'}' {
3216                                brace_depth -= 1;
3217                            }
3218                        }
3219                        j += 1;
3220                    }
3221
3222                    if brace_depth == 0 {
3223                        let end = j;
3224                        jsx_expression_ranges.push((start, end));
3225
3226                        // Mark lines as in JSX expression
3227                        Self::mark_lines_in_range(lines, content, start, end, |line| {
3228                            line.in_jsx_expression = true;
3229                        });
3230
3231                        i = end;
3232                    } else {
3233                        i += 1;
3234                    }
3235                }
3236            } else {
3237                i += 1;
3238            }
3239        }
3240
3241        (jsx_expression_ranges, mdx_comment_ranges)
3242    }
3243
3244    /// Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
3245    /// and populate the corresponding fields in LineInfo
3246    fn detect_mkdocs_line_info(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
3247        if flavor != MarkdownFlavor::MkDocs {
3248            return;
3249        }
3250
3251        use crate::utils::mkdocs_admonitions;
3252        use crate::utils::mkdocs_definition_lists;
3253        use crate::utils::mkdocs_tabs;
3254
3255        let content_lines: Vec<&str> = content.lines().collect();
3256
3257        // Track admonition context
3258        let mut in_admonition = false;
3259        let mut admonition_indent = 0;
3260
3261        // Track tab context
3262        let mut in_tab = false;
3263        let mut tab_indent = 0;
3264
3265        // Track fenced code blocks within MkDocs containers (separate from pulldown-cmark detection)
3266        let mut in_mkdocs_fenced_code = false;
3267        let mut mkdocs_fence_marker: Option<String> = None;
3268
3269        // Track definition list context
3270        let mut in_definition = false;
3271
3272        // Track markdown-enabled HTML block context (grid cards, etc.)
3273        let mut markdown_html_tracker = MarkdownHtmlTracker::new();
3274
3275        for (i, line) in content_lines.iter().enumerate() {
3276            if i >= lines.len() {
3277                break;
3278            }
3279
3280            // Check for admonition markers first - even on lines marked as code blocks
3281            // Pulldown-cmark marks 4-space indented content as indented code blocks,
3282            // but in MkDocs this is admonition/tab content, not code.
3283            if mkdocs_admonitions::is_admonition_start(line) {
3284                in_admonition = true;
3285                admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3286                lines[i].in_admonition = true;
3287            } else if in_admonition {
3288                // Check if still in admonition content
3289                if line.trim().is_empty() {
3290                    // Blank lines are part of admonitions
3291                    lines[i].in_admonition = true;
3292                    // Override code block detection for blank lines inside admonitions
3293                    lines[i].in_code_block = false;
3294                } else if mkdocs_admonitions::is_admonition_content(line, admonition_indent) {
3295                    lines[i].in_admonition = true;
3296                    // Override code block detection - this is admonition content, not code
3297                    lines[i].in_code_block = false;
3298                } else {
3299                    // End of admonition
3300                    in_admonition = false;
3301                    // Check if this line starts a new admonition
3302                    if mkdocs_admonitions::is_admonition_start(line) {
3303                        in_admonition = true;
3304                        admonition_indent = mkdocs_admonitions::get_admonition_indent(line).unwrap_or(0);
3305                        lines[i].in_admonition = true;
3306                    }
3307                }
3308            }
3309
3310            // Check for tab markers - also before the code block skip
3311            // Tab content also uses 4-space indentation which pulldown-cmark treats as code
3312            if mkdocs_tabs::is_tab_marker(line) {
3313                in_tab = true;
3314                tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3315                lines[i].in_content_tab = true;
3316                // Reset fenced code tracking when entering new tab
3317                in_mkdocs_fenced_code = false;
3318                mkdocs_fence_marker = None;
3319            } else if in_tab {
3320                let trimmed = line.trim();
3321
3322                // Track fenced code blocks within tabs
3323                if !in_mkdocs_fenced_code {
3324                    // Check for fence start (``` or ~~~)
3325                    if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3326                        let fence_char = trimmed.chars().next().unwrap();
3327                        let fence_len = trimmed.chars().take_while(|&c| c == fence_char).count();
3328                        if fence_len >= 3 {
3329                            in_mkdocs_fenced_code = true;
3330                            mkdocs_fence_marker = Some(fence_char.to_string().repeat(fence_len));
3331                        }
3332                    }
3333                } else if let Some(ref marker) = mkdocs_fence_marker {
3334                    // Check for fence end (same or more chars)
3335                    let fence_char = marker.chars().next().unwrap();
3336                    if trimmed.starts_with(marker.as_str())
3337                        && trimmed
3338                            .chars()
3339                            .skip(marker.len())
3340                            .all(|c| c == fence_char || c.is_whitespace())
3341                    {
3342                        in_mkdocs_fenced_code = false;
3343                        mkdocs_fence_marker = None;
3344                    }
3345                }
3346
3347                // Check if still in tab content
3348                if line.trim().is_empty() {
3349                    // Blank lines are part of tabs
3350                    lines[i].in_content_tab = true;
3351                    // Only override code block if not in a fenced code block
3352                    if !in_mkdocs_fenced_code {
3353                        lines[i].in_code_block = false;
3354                    }
3355                } else if mkdocs_tabs::is_tab_content(line, tab_indent) {
3356                    lines[i].in_content_tab = true;
3357                    // Override INDENTED code block detection - this is tab content, not code
3358                    // But preserve fenced code block detection (```...```)
3359                    if !in_mkdocs_fenced_code {
3360                        lines[i].in_code_block = false;
3361                    }
3362                } else {
3363                    // End of tab content
3364                    in_tab = false;
3365                    in_mkdocs_fenced_code = false;
3366                    mkdocs_fence_marker = None;
3367                    // Check if this line starts a new tab
3368                    if mkdocs_tabs::is_tab_marker(line) {
3369                        in_tab = true;
3370                        tab_indent = mkdocs_tabs::get_tab_indent(line).unwrap_or(0);
3371                        lines[i].in_content_tab = true;
3372                    }
3373                }
3374            }
3375
3376            // Check for markdown-enabled HTML blocks (grid cards, etc.)
3377            // Supports div, section, article, aside, details, figure, footer, header, main, nav
3378            // with markdown, markdown="1", or markdown="block" attributes
3379            lines[i].in_mkdocs_html_markdown = markdown_html_tracker.process_line(line);
3380
3381            // Skip remaining detection for lines in actual code blocks
3382            if lines[i].in_code_block {
3383                continue;
3384            }
3385
3386            // Check for definition list items
3387            if mkdocs_definition_lists::is_definition_line(line) {
3388                in_definition = true;
3389                lines[i].in_definition_list = true;
3390            } else if in_definition {
3391                // Check if continuation
3392                if mkdocs_definition_lists::is_definition_continuation(line) {
3393                    lines[i].in_definition_list = true;
3394                } else if line.trim().is_empty() {
3395                    // Blank line might continue definition
3396                    lines[i].in_definition_list = true;
3397                } else if mkdocs_definition_lists::could_be_term_line(line) {
3398                    // This could be a new term - check if followed by definition
3399                    if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1])
3400                    {
3401                        lines[i].in_definition_list = true;
3402                    } else {
3403                        in_definition = false;
3404                    }
3405                } else {
3406                    in_definition = false;
3407                }
3408            } else if mkdocs_definition_lists::could_be_term_line(line) {
3409                // Check if this is a term followed by a definition
3410                if i + 1 < content_lines.len() && mkdocs_definition_lists::is_definition_line(content_lines[i + 1]) {
3411                    lines[i].in_definition_list = true;
3412                    in_definition = true;
3413                }
3414            }
3415        }
3416    }
3417
3418    /// Detect Obsidian comment blocks (%%...%%) in Obsidian flavor
3419    ///
3420    /// Obsidian comments use `%%` as delimiters:
3421    /// - Inline: `text %%hidden%% text`
3422    /// - Block: `%%\nmulti-line\n%%`
3423    ///
3424    /// Comments do NOT nest - the first `%%` after an opening `%%` closes the comment.
3425    /// Comments are NOT detected inside code blocks or HTML comments.
3426    ///
3427    /// Returns the computed comment ranges for use by rules that need position-level checking.
3428    fn detect_obsidian_comments(
3429        content: &str,
3430        lines: &mut [LineInfo],
3431        flavor: MarkdownFlavor,
3432        code_span_ranges: &[(usize, usize)],
3433    ) -> Vec<(usize, usize)> {
3434        // Only process Obsidian files
3435        if flavor != MarkdownFlavor::Obsidian {
3436            return Vec::new();
3437        }
3438
3439        // Compute Obsidian comment ranges (byte ranges)
3440        let comment_ranges = Self::compute_obsidian_comment_ranges(content, lines, code_span_ranges);
3441
3442        // Mark lines that fall within comment ranges
3443        for range in &comment_ranges {
3444            for line in lines.iter_mut() {
3445                // Skip lines in code blocks or HTML comments - they take precedence
3446                if line.in_code_block || line.in_html_comment {
3447                    continue;
3448                }
3449
3450                let line_start = line.byte_offset;
3451                let line_end = line.byte_offset + line.byte_len;
3452
3453                // Check if this line is entirely within a comment
3454                // A line is "in" a comment if it starts within or after the comment start
3455                // AND ends within or before the comment end
3456                if line_start >= range.0 && line_end <= range.1 {
3457                    line.in_obsidian_comment = true;
3458                } else if line_start < range.1 && line_end > range.0 {
3459                    // Line partially overlaps with comment - check if the overlap is significant
3460                    // For inline comments on a line, we still mark the line if any part is in comment
3461                    // However, for the filtered_lines API, we only skip lines entirely within comments
3462                    // This matches the behavior of HTML comments
3463
3464                    // Check if the ENTIRE line content (excluding leading/trailing whitespace)
3465                    // is within the comment range
3466                    let line_content_start = line_start;
3467                    let line_content_end = line_end;
3468
3469                    if line_content_start >= range.0 && line_content_end <= range.1 {
3470                        line.in_obsidian_comment = true;
3471                    }
3472                }
3473            }
3474        }
3475
3476        comment_ranges
3477    }
3478
3479    /// Compute byte ranges for all Obsidian comments in the content
3480    ///
3481    /// Returns a vector of (start, end) byte offset pairs for each comment.
3482    /// Comments do not nest - first `%%` after an opening `%%` closes it.
3483    fn compute_obsidian_comment_ranges(
3484        content: &str,
3485        lines: &[LineInfo],
3486        code_span_ranges: &[(usize, usize)],
3487    ) -> Vec<(usize, usize)> {
3488        let mut ranges = Vec::new();
3489
3490        // Quick check - if no %% at all, no comments
3491        if !content.contains("%%") {
3492            return ranges;
3493        }
3494
3495        // Build skip ranges for code blocks, HTML comments, and inline code spans
3496        // to avoid detecting %% inside those regions.
3497        let mut skip_ranges: Vec<(usize, usize)> = Vec::new();
3498        for line in lines {
3499            if line.in_code_block || line.in_html_comment {
3500                skip_ranges.push((line.byte_offset, line.byte_offset + line.byte_len));
3501            }
3502        }
3503        skip_ranges.extend(code_span_ranges.iter().copied());
3504
3505        if !skip_ranges.is_empty() {
3506            // Sort and merge overlapping ranges for efficient scanning
3507            skip_ranges.sort_by_key(|(start, _)| *start);
3508            let mut merged: Vec<(usize, usize)> = Vec::with_capacity(skip_ranges.len());
3509            for (start, end) in skip_ranges {
3510                if let Some((_, last_end)) = merged.last_mut()
3511                    && start <= *last_end
3512                {
3513                    *last_end = (*last_end).max(end);
3514                    continue;
3515                }
3516                merged.push((start, end));
3517            }
3518            skip_ranges = merged;
3519        }
3520
3521        let content_bytes = content.as_bytes();
3522        let len = content.len();
3523        let mut i = 0;
3524        let mut in_comment = false;
3525        let mut comment_start = 0;
3526        let mut skip_idx = 0;
3527
3528        while i < len.saturating_sub(1) {
3529            // Fast-skip any ranges we should ignore (code blocks, HTML comments, code spans)
3530            if skip_idx < skip_ranges.len() {
3531                let (skip_start, skip_end) = skip_ranges[skip_idx];
3532                if i >= skip_end {
3533                    skip_idx += 1;
3534                    continue;
3535                }
3536                if i >= skip_start {
3537                    i = skip_end;
3538                    continue;
3539                }
3540            }
3541
3542            // Check for %%
3543            if content_bytes[i] == b'%' && content_bytes[i + 1] == b'%' {
3544                if !in_comment {
3545                    // Opening %%
3546                    in_comment = true;
3547                    comment_start = i;
3548                    i += 2;
3549                } else {
3550                    // Closing %%
3551                    let comment_end = i + 2;
3552                    ranges.push((comment_start, comment_end));
3553                    in_comment = false;
3554                    i += 2;
3555                }
3556            } else {
3557                i += 1;
3558            }
3559        }
3560
3561        // Handle unclosed comment - extends to end of document
3562        if in_comment {
3563            ranges.push((comment_start, len));
3564        }
3565
3566        ranges
3567    }
3568
3569    /// Helper to mark lines within a byte range
3570    fn mark_lines_in_range<F>(lines: &mut [LineInfo], content: &str, start: usize, end: usize, mut f: F)
3571    where
3572        F: FnMut(&mut LineInfo),
3573    {
3574        // Find lines that overlap with the range
3575        for line in lines.iter_mut() {
3576            let line_start = line.byte_offset;
3577            let line_end = line.byte_offset + line.byte_len;
3578
3579            // Check if this line overlaps with the range
3580            if line_start < end && line_end > start {
3581                f(line);
3582            }
3583        }
3584
3585        // Silence unused warning for content (needed for signature consistency)
3586        let _ = content;
3587    }
3588
3589    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
3590    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
3591        // Quick check - if no backticks, no code spans
3592        if !content.contains('`') {
3593            return Vec::new();
3594        }
3595
3596        // Use pulldown-cmark's streaming parser with byte offsets
3597        let parser = Parser::new(content).into_offset_iter();
3598        let mut ranges = Vec::new();
3599
3600        for (event, range) in parser {
3601            if let Event::Code(_) = event {
3602                ranges.push((range.start, range.end));
3603            }
3604        }
3605
3606        Self::build_code_spans_from_ranges(content, lines, &ranges)
3607    }
3608
3609    fn build_code_spans_from_ranges(content: &str, lines: &[LineInfo], ranges: &[(usize, usize)]) -> Vec<CodeSpan> {
3610        let mut code_spans = Vec::new();
3611        if ranges.is_empty() {
3612            return code_spans;
3613        }
3614
3615        for &(start_pos, end_pos) in ranges {
3616            // The range includes the backticks, extract the actual content
3617            let full_span = &content[start_pos..end_pos];
3618            let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
3619
3620            // Extract content between backticks, preserving spaces
3621            let content_start = start_pos + backtick_count;
3622            let content_end = end_pos - backtick_count;
3623            let span_content = if content_start < content_end {
3624                content[content_start..content_end].to_string()
3625            } else {
3626                String::new()
3627            };
3628
3629            // Use binary search to find line number - O(log n) instead of O(n)
3630            // Find the rightmost line whose byte_offset <= start_pos
3631            let line_idx = lines
3632                .partition_point(|line| line.byte_offset <= start_pos)
3633                .saturating_sub(1);
3634            let line_num = line_idx + 1;
3635            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3636
3637            // Find end column using binary search
3638            let end_line_idx = lines
3639                .partition_point(|line| line.byte_offset <= end_pos)
3640                .saturating_sub(1);
3641            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3642
3643            // Convert byte offsets to character positions for correct Unicode handling
3644            // This ensures consistency with warning.column which uses character positions
3645            let line_content = lines[line_idx].content(content);
3646            let col_start = if byte_col_start <= line_content.len() {
3647                line_content[..byte_col_start].chars().count()
3648            } else {
3649                line_content.chars().count()
3650            };
3651
3652            let end_line_content = lines[end_line_idx].content(content);
3653            let col_end = if byte_col_end <= end_line_content.len() {
3654                end_line_content[..byte_col_end].chars().count()
3655            } else {
3656                end_line_content.chars().count()
3657            };
3658
3659            code_spans.push(CodeSpan {
3660                line: line_num,
3661                end_line: end_line_idx + 1,
3662                start_col: col_start,
3663                end_col: col_end,
3664                byte_offset: start_pos,
3665                byte_end: end_pos,
3666                backtick_count,
3667                content: span_content,
3668            });
3669        }
3670
3671        // Sort by position to ensure consistent ordering
3672        code_spans.sort_by_key(|span| span.byte_offset);
3673
3674        code_spans
3675    }
3676
3677    /// Parse all math spans (inline $...$ and display $$...$$) using pulldown-cmark
3678    fn parse_math_spans(content: &str, lines: &[LineInfo]) -> Vec<MathSpan> {
3679        let mut math_spans = Vec::new();
3680
3681        // Quick check - if no $ signs, no math spans
3682        if !content.contains('$') {
3683            return math_spans;
3684        }
3685
3686        // Use pulldown-cmark with ENABLE_MATH option
3687        let mut options = Options::empty();
3688        options.insert(Options::ENABLE_MATH);
3689        let parser = Parser::new_ext(content, options).into_offset_iter();
3690
3691        for (event, range) in parser {
3692            let (is_display, math_content) = match &event {
3693                Event::InlineMath(text) => (false, text.as_ref()),
3694                Event::DisplayMath(text) => (true, text.as_ref()),
3695                _ => continue,
3696            };
3697
3698            let start_pos = range.start;
3699            let end_pos = range.end;
3700
3701            // Use binary search to find line number - O(log n) instead of O(n)
3702            let line_idx = lines
3703                .partition_point(|line| line.byte_offset <= start_pos)
3704                .saturating_sub(1);
3705            let line_num = line_idx + 1;
3706            let byte_col_start = start_pos - lines[line_idx].byte_offset;
3707
3708            // Find end column using binary search
3709            let end_line_idx = lines
3710                .partition_point(|line| line.byte_offset <= end_pos)
3711                .saturating_sub(1);
3712            let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
3713
3714            // Convert byte offsets to character positions for correct Unicode handling
3715            let line_content = lines[line_idx].content(content);
3716            let col_start = if byte_col_start <= line_content.len() {
3717                line_content[..byte_col_start].chars().count()
3718            } else {
3719                line_content.chars().count()
3720            };
3721
3722            let end_line_content = lines[end_line_idx].content(content);
3723            let col_end = if byte_col_end <= end_line_content.len() {
3724                end_line_content[..byte_col_end].chars().count()
3725            } else {
3726                end_line_content.chars().count()
3727            };
3728
3729            math_spans.push(MathSpan {
3730                line: line_num,
3731                end_line: end_line_idx + 1,
3732                start_col: col_start,
3733                end_col: col_end,
3734                byte_offset: start_pos,
3735                byte_end: end_pos,
3736                is_display,
3737                content: math_content.to_string(),
3738            });
3739        }
3740
3741        // Sort by position to ensure consistent ordering
3742        math_spans.sort_by_key(|span| span.byte_offset);
3743
3744        math_spans
3745    }
3746
3747    /// Parse all list blocks in the content (legacy line-by-line approach)
3748    ///
3749    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
3750    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
3751    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
3752    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
3753    ///   treated as list continuation (based on the list marker width)
3754    ///
3755    /// When a new list item is encountered, we check if list-breaking content was seen
3756    /// since the last item. If so, we start a new list block.
3757    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
3758        // Minimum indentation for unordered list continuation per CommonMark spec
3759        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
3760
3761        /// Initialize or reset the forward-scanning tracking state.
3762        /// This helper eliminates code duplication across three initialization sites.
3763        #[inline]
3764        fn reset_tracking_state(
3765            list_item: &ListItemInfo,
3766            has_list_breaking_content: &mut bool,
3767            min_continuation: &mut usize,
3768        ) {
3769            *has_list_breaking_content = false;
3770            let marker_width = if list_item.is_ordered {
3771                list_item.marker.len() + 1 // Ordered markers need space after period/paren
3772            } else {
3773                list_item.marker.len()
3774            };
3775            *min_continuation = if list_item.is_ordered {
3776                marker_width
3777            } else {
3778                UNORDERED_LIST_MIN_CONTINUATION_INDENT
3779            };
3780        }
3781
3782        // Pre-size based on lines that could be list items
3783        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
3784        let mut current_block: Option<ListBlock> = None;
3785        let mut last_list_item_line = 0;
3786        let mut current_indent_level = 0;
3787        let mut last_marker_width = 0;
3788
3789        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
3790        let mut has_list_breaking_content_since_last_item = false;
3791        let mut min_continuation_for_tracking = 0;
3792
3793        for (line_idx, line_info) in lines.iter().enumerate() {
3794            let line_num = line_idx + 1;
3795
3796            // Enhanced code block handling using Design #3's context analysis
3797            if line_info.in_code_block {
3798                if let Some(ref mut block) = current_block {
3799                    // Calculate minimum indentation for list continuation
3800                    let min_continuation_indent =
3801                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
3802
3803                    // Analyze code block context using the three-tier classification
3804                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
3805
3806                    match context {
3807                        CodeBlockContext::Indented => {
3808                            // Code block is properly indented - continues the list
3809                            block.end_line = line_num;
3810                            continue;
3811                        }
3812                        CodeBlockContext::Standalone => {
3813                            // Code block separates lists - end current block
3814                            let completed_block = current_block.take().unwrap();
3815                            list_blocks.push(completed_block);
3816                            continue;
3817                        }
3818                        CodeBlockContext::Adjacent => {
3819                            // Edge case - use conservative behavior (continue list)
3820                            block.end_line = line_num;
3821                            continue;
3822                        }
3823                    }
3824                } else {
3825                    // No current list block - skip code block lines
3826                    continue;
3827                }
3828            }
3829
3830            // Extract blockquote prefix if any
3831            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
3832                caps.get(0).unwrap().as_str().to_string()
3833            } else {
3834                String::new()
3835            };
3836
3837            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
3838            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
3839            if let Some(ref block) = current_block
3840                && line_info.list_item.is_none()
3841                && !line_info.is_blank
3842                && !line_info.in_code_span_continuation
3843            {
3844                let line_content = line_info.content(content).trim();
3845
3846                // Check for structural separators that break lists
3847                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
3848                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
3849                // as they indicate improper indentation rather than lazy continuation.
3850                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
3851
3852                // Check if blockquote context changes (different prefix than current block)
3853                // Lines within the SAME blockquote context don't break lists
3854                let blockquote_prefix_changes = blockquote_prefix.trim() != block.blockquote_prefix.trim();
3855
3856                let breaks_list = line_info.heading.is_some()
3857                    || line_content.starts_with("---")
3858                    || line_content.starts_with("***")
3859                    || line_content.starts_with("___")
3860                    || crate::utils::skip_context::is_table_line(line_content)
3861                    || blockquote_prefix_changes
3862                    || (line_info.indent > 0
3863                        && line_info.indent < min_continuation_for_tracking
3864                        && !is_lazy_continuation);
3865
3866                if breaks_list {
3867                    has_list_breaking_content_since_last_item = true;
3868                }
3869            }
3870
3871            // If this line is a code span continuation within an active list block,
3872            // extend the block's end_line to include this line (maintains list continuity)
3873            if line_info.in_code_span_continuation
3874                && line_info.list_item.is_none()
3875                && let Some(ref mut block) = current_block
3876            {
3877                block.end_line = line_num;
3878            }
3879
3880            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
3881            // properly indented lines within the list). This ensures the workaround at line 2448
3882            // works correctly when there are multiple continuation lines before a nested list item.
3883            // Also include lazy continuation lines (indent=0) per CommonMark spec.
3884            // For blockquote lines, compute effective indent after stripping the prefix
3885            let effective_continuation_indent = if let Some(ref block) = current_block {
3886                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3887                let line_content = line_info.content(content);
3888                let line_bq_level = line_content
3889                    .chars()
3890                    .take_while(|c| *c == '>' || c.is_whitespace())
3891                    .filter(|&c| c == '>')
3892                    .count();
3893                if line_bq_level > 0 && line_bq_level == block_bq_level {
3894                    // Compute indent after blockquote markers
3895                    let mut pos = 0;
3896                    let mut found_markers = 0;
3897                    for c in line_content.chars() {
3898                        pos += c.len_utf8();
3899                        if c == '>' {
3900                            found_markers += 1;
3901                            if found_markers == line_bq_level {
3902                                if line_content.get(pos..pos + 1) == Some(" ") {
3903                                    pos += 1;
3904                                }
3905                                break;
3906                            }
3907                        }
3908                    }
3909                    let after_bq = &line_content[pos..];
3910                    after_bq.len() - after_bq.trim_start().len()
3911                } else {
3912                    line_info.indent
3913                }
3914            } else {
3915                line_info.indent
3916            };
3917            let adjusted_min_continuation_for_tracking = if let Some(ref block) = current_block {
3918                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3919                if block_bq_level > 0 {
3920                    if block.is_ordered { last_marker_width } else { 2 }
3921                } else {
3922                    min_continuation_for_tracking
3923                }
3924            } else {
3925                min_continuation_for_tracking
3926            };
3927            // Lazy continuation allows unindented text to continue a list item,
3928            // but NOT structural elements like headings, code fences, or horizontal rules
3929            let is_structural_element = line_info.heading.is_some()
3930                || line_info.content(content).trim().starts_with("```")
3931                || line_info.content(content).trim().starts_with("~~~");
3932            let is_valid_continuation = effective_continuation_indent >= adjusted_min_continuation_for_tracking
3933                || (line_info.indent == 0 && !line_info.is_blank && !is_structural_element);
3934
3935            if std::env::var("RUMDL_DEBUG_LIST").is_ok() && line_info.list_item.is_none() && !line_info.is_blank {
3936                eprintln!(
3937                    "[DEBUG] Line {}: checking continuation - indent={}, min_cont={}, is_valid={}, in_code_span={}, in_code_block={}, has_block={}",
3938                    line_num,
3939                    effective_continuation_indent,
3940                    adjusted_min_continuation_for_tracking,
3941                    is_valid_continuation,
3942                    line_info.in_code_span_continuation,
3943                    line_info.in_code_block,
3944                    current_block.is_some()
3945                );
3946            }
3947
3948            if !line_info.in_code_span_continuation
3949                && line_info.list_item.is_none()
3950                && !line_info.is_blank
3951                && !line_info.in_code_block
3952                && is_valid_continuation
3953                && let Some(ref mut block) = current_block
3954            {
3955                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3956                    eprintln!(
3957                        "[DEBUG] Line {}: extending block.end_line from {} to {}",
3958                        line_num, block.end_line, line_num
3959                    );
3960                }
3961                block.end_line = line_num;
3962            }
3963
3964            // Check if this line is a list item
3965            if let Some(list_item) = &line_info.list_item {
3966                // Calculate nesting level based on indentation
3967                let item_indent = list_item.marker_column;
3968                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
3969
3970                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
3971                    eprintln!(
3972                        "[DEBUG] Line {}: list item found, marker={:?}, indent={}",
3973                        line_num, list_item.marker, item_indent
3974                    );
3975                }
3976
3977                if let Some(ref mut block) = current_block {
3978                    // Check if this continues the current block
3979                    // For nested lists, we need to check if this is a nested item (higher nesting level)
3980                    // or a continuation at the same or lower level
3981                    let is_nested = nesting > block.nesting_level;
3982                    let same_type =
3983                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
3984                    let same_context = block.blockquote_prefix == blockquote_prefix;
3985                    // Allow one blank line after last item, or lines immediately after block content
3986                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
3987
3988                    // For unordered lists, also check marker consistency
3989                    let marker_compatible =
3990                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
3991
3992                    // O(1) check: Use the tracked variable instead of O(n) nested loop
3993                    // This eliminates the quadratic bottleneck from issue #148
3994                    let has_non_list_content = has_list_breaking_content_since_last_item;
3995
3996                    // A list continues if:
3997                    // 1. It's a nested item (indented more than the parent), OR
3998                    // 2. It's the same type at the same level with reasonable distance
3999                    let mut continues_list = if is_nested {
4000                        // Nested items always continue the list if they're in the same context
4001                        same_context && reasonable_distance && !has_non_list_content
4002                    } else {
4003                        // Same-level items need to match type and markers
4004                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
4005                    };
4006
4007                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4008                        eprintln!(
4009                            "[DEBUG] Line {}: continues_list={}, is_nested={}, same_type={}, same_context={}, reasonable_distance={}, marker_compatible={}, has_non_list_content={}, last_item={}, block.end_line={}",
4010                            line_num,
4011                            continues_list,
4012                            is_nested,
4013                            same_type,
4014                            same_context,
4015                            reasonable_distance,
4016                            marker_compatible,
4017                            has_non_list_content,
4018                            last_list_item_line,
4019                            block.end_line
4020                        );
4021                    }
4022
4023                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
4024                    // This handles edge cases where content patterns might otherwise split lists incorrectly
4025                    // Apply for: nested items (different types OK), OR same-level same-type items
4026                    if !continues_list
4027                        && (is_nested || same_type)
4028                        && reasonable_distance
4029                        && line_num > 0
4030                        && block.end_line == line_num - 1
4031                    {
4032                        // Check if the previous line was a list item or a continuation of a list item
4033                        // (including lazy continuation lines)
4034                        if block.item_lines.contains(&(line_num - 1)) {
4035                            // They're consecutive list items - force them to be in the same list
4036                            continues_list = true;
4037                        } else {
4038                            // Previous line is a continuation line within this block
4039                            // (e.g., lazy continuation with indent=0)
4040                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
4041                            continues_list = true;
4042                        }
4043                    }
4044
4045                    if continues_list {
4046                        // Extend current block
4047                        block.end_line = line_num;
4048                        block.item_lines.push(line_num);
4049
4050                        // Update max marker width
4051                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
4052                            list_item.marker.len() + 1
4053                        } else {
4054                            list_item.marker.len()
4055                        });
4056
4057                        // Update marker consistency for unordered lists
4058                        if !block.is_ordered
4059                            && block.marker.is_some()
4060                            && block.marker.as_ref() != Some(&list_item.marker)
4061                        {
4062                            // Mixed markers, clear the marker field
4063                            block.marker = None;
4064                        }
4065
4066                        // Reset tracked state for issue #148 optimization
4067                        reset_tracking_state(
4068                            list_item,
4069                            &mut has_list_breaking_content_since_last_item,
4070                            &mut min_continuation_for_tracking,
4071                        );
4072                    } else {
4073                        // End current block and start a new one
4074                        // When a different list type starts AT THE SAME LEVEL (not nested),
4075                        // trim back lazy continuation lines (they become part of the gap, not the list)
4076                        // For nested items, different types are fine - they're sub-lists
4077                        if !same_type
4078                            && !is_nested
4079                            && let Some(&last_item) = block.item_lines.last()
4080                        {
4081                            block.end_line = last_item;
4082                        }
4083
4084                        list_blocks.push(block.clone());
4085
4086                        *block = ListBlock {
4087                            start_line: line_num,
4088                            end_line: line_num,
4089                            is_ordered: list_item.is_ordered,
4090                            marker: if list_item.is_ordered {
4091                                None
4092                            } else {
4093                                Some(list_item.marker.clone())
4094                            },
4095                            blockquote_prefix: blockquote_prefix.clone(),
4096                            item_lines: vec![line_num],
4097                            nesting_level: nesting,
4098                            max_marker_width: if list_item.is_ordered {
4099                                list_item.marker.len() + 1
4100                            } else {
4101                                list_item.marker.len()
4102                            },
4103                        };
4104
4105                        // Initialize tracked state for new block (issue #148 optimization)
4106                        reset_tracking_state(
4107                            list_item,
4108                            &mut has_list_breaking_content_since_last_item,
4109                            &mut min_continuation_for_tracking,
4110                        );
4111                    }
4112                } else {
4113                    // Start a new block
4114                    current_block = Some(ListBlock {
4115                        start_line: line_num,
4116                        end_line: line_num,
4117                        is_ordered: list_item.is_ordered,
4118                        marker: if list_item.is_ordered {
4119                            None
4120                        } else {
4121                            Some(list_item.marker.clone())
4122                        },
4123                        blockquote_prefix,
4124                        item_lines: vec![line_num],
4125                        nesting_level: nesting,
4126                        max_marker_width: list_item.marker.len(),
4127                    });
4128
4129                    // Initialize tracked state for new block (issue #148 optimization)
4130                    reset_tracking_state(
4131                        list_item,
4132                        &mut has_list_breaking_content_since_last_item,
4133                        &mut min_continuation_for_tracking,
4134                    );
4135                }
4136
4137                last_list_item_line = line_num;
4138                current_indent_level = item_indent;
4139                last_marker_width = if list_item.is_ordered {
4140                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
4141                } else {
4142                    list_item.marker.len()
4143                };
4144            } else if let Some(ref mut block) = current_block {
4145                // Not a list item - check if it continues the current block
4146                if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4147                    eprintln!(
4148                        "[DEBUG] Line {}: non-list-item, is_blank={}, block exists",
4149                        line_num, line_info.is_blank
4150                    );
4151                }
4152
4153                // For MD032 compatibility, we use a simple approach:
4154                // - Indented lines continue the list
4155                // - Blank lines followed by indented content continue the list
4156                // - Everything else ends the list
4157
4158                // Check if the last line in the list block ended with a backslash (hard line break)
4159                // This handles cases where list items use backslash for hard line breaks
4160                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
4161                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
4162                } else {
4163                    false
4164                };
4165
4166                // Calculate minimum indentation for list continuation
4167                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
4168                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
4169                let min_continuation_indent = if block.is_ordered {
4170                    current_indent_level + last_marker_width
4171                } else {
4172                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
4173                };
4174
4175                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
4176                    // Indented line or backslash continuation continues the list
4177                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4178                        eprintln!(
4179                            "[DEBUG] Line {}: indented continuation (indent={}, min={})",
4180                            line_num, line_info.indent, min_continuation_indent
4181                        );
4182                    }
4183                    block.end_line = line_num;
4184                } else if line_info.is_blank {
4185                    // Blank line - check if it's internal to the list or ending it
4186                    // We only include blank lines that are followed by more list content
4187                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4188                        eprintln!("[DEBUG] Line {line_num}: entering blank line handling");
4189                    }
4190                    let mut check_idx = line_idx + 1;
4191                    let mut found_continuation = false;
4192
4193                    // Skip additional blank lines
4194                    while check_idx < lines.len() && lines[check_idx].is_blank {
4195                        check_idx += 1;
4196                    }
4197
4198                    if check_idx < lines.len() {
4199                        let next_line = &lines[check_idx];
4200                        // For blockquote lines, compute indent AFTER stripping the blockquote prefix
4201                        let next_content = next_line.content(content);
4202                        // Use blockquote level (count of >) to compare, not the full prefix
4203                        // This avoids issues where the regex captures extra whitespace
4204                        let block_bq_level_for_indent = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4205                        let next_bq_level_for_indent = next_content
4206                            .chars()
4207                            .take_while(|c| *c == '>' || c.is_whitespace())
4208                            .filter(|&c| c == '>')
4209                            .count();
4210                        let effective_indent =
4211                            if next_bq_level_for_indent > 0 && next_bq_level_for_indent == block_bq_level_for_indent {
4212                                // For lines in the same blockquote context, compute indent after the blockquote marker(s)
4213                                // Find position after ">" and one space
4214                                let mut pos = 0;
4215                                let mut found_markers = 0;
4216                                for c in next_content.chars() {
4217                                    pos += c.len_utf8();
4218                                    if c == '>' {
4219                                        found_markers += 1;
4220                                        if found_markers == next_bq_level_for_indent {
4221                                            // Skip optional space after last >
4222                                            if next_content.get(pos..pos + 1) == Some(" ") {
4223                                                pos += 1;
4224                                            }
4225                                            break;
4226                                        }
4227                                    }
4228                                }
4229                                let after_blockquote_marker = &next_content[pos..];
4230                                after_blockquote_marker.len() - after_blockquote_marker.trim_start().len()
4231                            } else {
4232                                next_line.indent
4233                            };
4234                        // Also adjust min_continuation_indent for blockquote lists
4235                        // The marker_column includes blockquote prefix, so subtract it
4236                        let adjusted_min_continuation = if block_bq_level_for_indent > 0 {
4237                            // For blockquote lists, the continuation is relative to blockquote content
4238                            // current_indent_level includes blockquote prefix (2 for "> "), so use just 2 for unordered
4239                            if block.is_ordered { last_marker_width } else { 2 }
4240                        } else {
4241                            min_continuation_indent
4242                        };
4243                        // Check if followed by indented content (list continuation)
4244                        if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4245                            eprintln!(
4246                                "[DEBUG] Blank line {} checking next line {}: effective_indent={}, adjusted_min={}, next_is_list={}, in_code_block={}",
4247                                line_num,
4248                                check_idx + 1,
4249                                effective_indent,
4250                                adjusted_min_continuation,
4251                                next_line.list_item.is_some(),
4252                                next_line.in_code_block
4253                            );
4254                        }
4255                        if !next_line.in_code_block && effective_indent >= adjusted_min_continuation {
4256                            found_continuation = true;
4257                        }
4258                        // Check if followed by another list item at the same level
4259                        else if !next_line.in_code_block
4260                            && next_line.list_item.is_some()
4261                            && let Some(item) = &next_line.list_item
4262                        {
4263                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
4264                                .find(next_line.content(content))
4265                                .map_or(String::new(), |m| m.as_str().to_string());
4266                            if item.marker_column == current_indent_level
4267                                && item.is_ordered == block.is_ordered
4268                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
4269                            {
4270                                // Check if there was meaningful content between the list items (unused now)
4271                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
4272                                // Pre-compute block's blockquote level for use in closures
4273                                let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4274                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
4275                                    if let Some(between_line) = lines.get(idx) {
4276                                        let between_content = between_line.content(content);
4277                                        let trimmed = between_content.trim();
4278                                        // Skip empty lines
4279                                        if trimmed.is_empty() {
4280                                            return false;
4281                                        }
4282                                        // Check for meaningful content
4283                                        let line_indent = between_content.len() - between_content.trim_start().len();
4284
4285                                        // Check if blockquote level changed (not just if line starts with ">")
4286                                        let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4287                                            .find(between_content)
4288                                            .map_or(String::new(), |m| m.as_str().to_string());
4289                                        let between_bq_level = between_bq_prefix.chars().filter(|&c| c == '>').count();
4290                                        let blockquote_level_changed =
4291                                            trimmed.starts_with(">") && between_bq_level != block_bq_level;
4292
4293                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
4294                                        if trimmed.starts_with("```")
4295                                            || trimmed.starts_with("~~~")
4296                                            || trimmed.starts_with("---")
4297                                            || trimmed.starts_with("***")
4298                                            || trimmed.starts_with("___")
4299                                            || blockquote_level_changed
4300                                            || crate::utils::skip_context::is_table_line(trimmed)
4301                                            || between_line.heading.is_some()
4302                                        {
4303                                            return true; // These are structural separators - meaningful content that breaks lists
4304                                        }
4305
4306                                        // Only properly indented content continues the list
4307                                        line_indent >= min_continuation_indent
4308                                    } else {
4309                                        false
4310                                    }
4311                                });
4312
4313                                if block.is_ordered {
4314                                    // For ordered lists: don't continue if there are structural separators
4315                                    // Check if there are structural separators between the list items
4316                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4317                                        if let Some(between_line) = lines.get(idx) {
4318                                            let between_content = between_line.content(content);
4319                                            let trimmed = between_content.trim();
4320                                            if trimmed.is_empty() {
4321                                                return false;
4322                                            }
4323                                            // Check if blockquote level changed (not just if line starts with ">")
4324                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4325                                                .find(between_content)
4326                                                .map_or(String::new(), |m| m.as_str().to_string());
4327                                            let between_bq_level =
4328                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4329                                            let blockquote_level_changed =
4330                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4331                                            // Check for structural separators that break lists
4332                                            trimmed.starts_with("```")
4333                                                || trimmed.starts_with("~~~")
4334                                                || trimmed.starts_with("---")
4335                                                || trimmed.starts_with("***")
4336                                                || trimmed.starts_with("___")
4337                                                || blockquote_level_changed
4338                                                || crate::utils::skip_context::is_table_line(trimmed)
4339                                                || between_line.heading.is_some()
4340                                        } else {
4341                                            false
4342                                        }
4343                                    });
4344                                    found_continuation = !has_structural_separators;
4345                                } else {
4346                                    // For unordered lists: also check for structural separators
4347                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
4348                                        if let Some(between_line) = lines.get(idx) {
4349                                            let between_content = between_line.content(content);
4350                                            let trimmed = between_content.trim();
4351                                            if trimmed.is_empty() {
4352                                                return false;
4353                                            }
4354                                            // Check if blockquote level changed (not just if line starts with ">")
4355                                            let between_bq_prefix = BLOCKQUOTE_PREFIX_REGEX
4356                                                .find(between_content)
4357                                                .map_or(String::new(), |m| m.as_str().to_string());
4358                                            let between_bq_level =
4359                                                between_bq_prefix.chars().filter(|&c| c == '>').count();
4360                                            let blockquote_level_changed =
4361                                                trimmed.starts_with(">") && between_bq_level != block_bq_level;
4362                                            // Check for structural separators that break lists
4363                                            trimmed.starts_with("```")
4364                                                || trimmed.starts_with("~~~")
4365                                                || trimmed.starts_with("---")
4366                                                || trimmed.starts_with("***")
4367                                                || trimmed.starts_with("___")
4368                                                || blockquote_level_changed
4369                                                || crate::utils::skip_context::is_table_line(trimmed)
4370                                                || between_line.heading.is_some()
4371                                        } else {
4372                                            false
4373                                        }
4374                                    });
4375                                    found_continuation = !has_structural_separators;
4376                                }
4377                            }
4378                        }
4379                    }
4380
4381                    if std::env::var("RUMDL_DEBUG_LIST").is_ok() {
4382                        eprintln!("[DEBUG] Blank line {line_num} final: found_continuation={found_continuation}");
4383                    }
4384                    if found_continuation {
4385                        // Include the blank line in the block
4386                        block.end_line = line_num;
4387                    } else {
4388                        // Blank line ends the list - don't include it
4389                        list_blocks.push(block.clone());
4390                        current_block = None;
4391                    }
4392                } else {
4393                    // Check for lazy continuation - non-indented line immediately after a list item
4394                    // But only if the line has sufficient indentation for the list type
4395                    let min_required_indent = if block.is_ordered {
4396                        current_indent_level + last_marker_width
4397                    } else {
4398                        current_indent_level + 2
4399                    };
4400
4401                    // For lazy continuation to apply, the line must either:
4402                    // 1. Have no indentation (true lazy continuation)
4403                    // 2. Have sufficient indentation for the list type
4404                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
4405                    let line_content = line_info.content(content).trim();
4406
4407                    // Check for table-like patterns
4408                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
4409
4410                    // Check if blockquote level changed (not just if line starts with ">")
4411                    // Lines within the same blockquote level are NOT structural separators
4412                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
4413                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
4414                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
4415
4416                    let is_structural_separator = line_info.heading.is_some()
4417                        || line_content.starts_with("```")
4418                        || line_content.starts_with("~~~")
4419                        || line_content.starts_with("---")
4420                        || line_content.starts_with("***")
4421                        || line_content.starts_with("___")
4422                        || blockquote_level_changed
4423                        || looks_like_table;
4424
4425                    // Allow lazy continuation if we're still within the same list block
4426                    // (not just immediately after a list item)
4427                    // Also treat code span continuations as valid continuations regardless of indent
4428                    let is_lazy_continuation = !is_structural_separator
4429                        && !line_info.is_blank
4430                        && (line_info.indent == 0
4431                            || line_info.indent >= min_required_indent
4432                            || line_info.in_code_span_continuation);
4433
4434                    if is_lazy_continuation {
4435                        // Per CommonMark, lazy continuation continues until a blank line
4436                        // or structural element, regardless of uppercase at line start
4437                        block.end_line = line_num;
4438                    } else {
4439                        // Non-indented, non-blank line that's not a lazy continuation - end the block
4440                        list_blocks.push(block.clone());
4441                        current_block = None;
4442                    }
4443                }
4444            }
4445        }
4446
4447        // Don't forget the last block
4448        if let Some(block) = current_block {
4449            list_blocks.push(block);
4450        }
4451
4452        // Merge adjacent blocks that should be one
4453        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
4454
4455        list_blocks
4456    }
4457
4458    /// Compute character frequency for fast content analysis
4459    fn compute_char_frequency(content: &str) -> CharFrequency {
4460        let mut frequency = CharFrequency::default();
4461
4462        for ch in content.chars() {
4463            match ch {
4464                '#' => frequency.hash_count += 1,
4465                '*' => frequency.asterisk_count += 1,
4466                '_' => frequency.underscore_count += 1,
4467                '-' => frequency.hyphen_count += 1,
4468                '+' => frequency.plus_count += 1,
4469                '>' => frequency.gt_count += 1,
4470                '|' => frequency.pipe_count += 1,
4471                '[' => frequency.bracket_count += 1,
4472                '`' => frequency.backtick_count += 1,
4473                '<' => frequency.lt_count += 1,
4474                '!' => frequency.exclamation_count += 1,
4475                '\n' => frequency.newline_count += 1,
4476                _ => {}
4477            }
4478        }
4479
4480        frequency
4481    }
4482
4483    /// Parse HTML tags in the content
4484    fn parse_html_tags(
4485        content: &str,
4486        lines: &[LineInfo],
4487        code_blocks: &[(usize, usize)],
4488        flavor: MarkdownFlavor,
4489    ) -> Vec<HtmlTag> {
4490        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
4491            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
4492
4493        let mut html_tags = Vec::with_capacity(content.matches('<').count());
4494
4495        for cap in HTML_TAG_REGEX.captures_iter(content) {
4496            let full_match = cap.get(0).unwrap();
4497            let match_start = full_match.start();
4498            let match_end = full_match.end();
4499
4500            // Skip if in code block
4501            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4502                continue;
4503            }
4504
4505            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
4506            let tag_name_original = cap.get(2).unwrap().as_str();
4507            let tag_name = tag_name_original.to_lowercase();
4508            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
4509
4510            // Skip JSX components in MDX files (tags starting with uppercase letter)
4511            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
4512            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
4513                continue;
4514            }
4515
4516            // Find which line this tag is on
4517            let mut line_num = 1;
4518            let mut col_start = match_start;
4519            let mut col_end = match_end;
4520            for (idx, line_info) in lines.iter().enumerate() {
4521                if match_start >= line_info.byte_offset {
4522                    line_num = idx + 1;
4523                    col_start = match_start - line_info.byte_offset;
4524                    col_end = match_end - line_info.byte_offset;
4525                } else {
4526                    break;
4527                }
4528            }
4529
4530            html_tags.push(HtmlTag {
4531                line: line_num,
4532                start_col: col_start,
4533                end_col: col_end,
4534                byte_offset: match_start,
4535                byte_end: match_end,
4536                tag_name,
4537                is_closing,
4538                is_self_closing,
4539                raw_content: full_match.as_str().to_string(),
4540            });
4541        }
4542
4543        html_tags
4544    }
4545
4546    /// Parse table rows in the content
4547    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
4548        let mut table_rows = Vec::with_capacity(lines.len() / 20);
4549
4550        for (line_idx, line_info) in lines.iter().enumerate() {
4551            // Skip lines in code blocks or blank lines
4552            if line_info.in_code_block || line_info.is_blank {
4553                continue;
4554            }
4555
4556            let line = line_info.content(content);
4557            let line_num = line_idx + 1;
4558
4559            // Check if this line contains pipes (potential table row)
4560            if !line.contains('|') {
4561                continue;
4562            }
4563
4564            // Count columns by splitting on pipes
4565            let parts: Vec<&str> = line.split('|').collect();
4566            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
4567
4568            // Check if this is a separator row
4569            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
4570            let mut column_alignments = Vec::new();
4571
4572            if is_separator {
4573                for part in &parts[1..parts.len() - 1] {
4574                    // Skip first and last empty parts
4575                    let trimmed = part.trim();
4576                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
4577                        "center".to_string()
4578                    } else if trimmed.ends_with(':') {
4579                        "right".to_string()
4580                    } else if trimmed.starts_with(':') {
4581                        "left".to_string()
4582                    } else {
4583                        "none".to_string()
4584                    };
4585                    column_alignments.push(alignment);
4586                }
4587            }
4588
4589            table_rows.push(TableRow {
4590                line: line_num,
4591                is_separator,
4592                column_count,
4593                column_alignments,
4594            });
4595        }
4596
4597        table_rows
4598    }
4599
4600    /// Parse bare URLs and emails in the content
4601    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
4602        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
4603
4604        // Check for bare URLs (not in angle brackets or markdown links)
4605        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
4606            let full_match = cap.get(0).unwrap();
4607            let match_start = full_match.start();
4608            let match_end = full_match.end();
4609
4610            // Skip if in code block
4611            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4612                continue;
4613            }
4614
4615            // Skip if already in angle brackets or markdown links
4616            let preceding_char = if match_start > 0 {
4617                content.chars().nth(match_start - 1)
4618            } else {
4619                None
4620            };
4621            let following_char = content.chars().nth(match_end);
4622
4623            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4624                continue;
4625            }
4626            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4627                continue;
4628            }
4629
4630            let url = full_match.as_str();
4631            let url_type = if url.starts_with("https://") {
4632                "https"
4633            } else if url.starts_with("http://") {
4634                "http"
4635            } else if url.starts_with("ftp://") {
4636                "ftp"
4637            } else {
4638                "other"
4639            };
4640
4641            // Find which line this URL is on
4642            let mut line_num = 1;
4643            let mut col_start = match_start;
4644            let mut col_end = match_end;
4645            for (idx, line_info) in lines.iter().enumerate() {
4646                if match_start >= line_info.byte_offset {
4647                    line_num = idx + 1;
4648                    col_start = match_start - line_info.byte_offset;
4649                    col_end = match_end - line_info.byte_offset;
4650                } else {
4651                    break;
4652                }
4653            }
4654
4655            bare_urls.push(BareUrl {
4656                line: line_num,
4657                start_col: col_start,
4658                end_col: col_end,
4659                byte_offset: match_start,
4660                byte_end: match_end,
4661                url: url.to_string(),
4662                url_type: url_type.to_string(),
4663            });
4664        }
4665
4666        // Check for bare email addresses
4667        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
4668            let full_match = cap.get(0).unwrap();
4669            let match_start = full_match.start();
4670            let match_end = full_match.end();
4671
4672            // Skip if in code block
4673            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
4674                continue;
4675            }
4676
4677            // Skip if already in angle brackets or markdown links
4678            let preceding_char = if match_start > 0 {
4679                content.chars().nth(match_start - 1)
4680            } else {
4681                None
4682            };
4683            let following_char = content.chars().nth(match_end);
4684
4685            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
4686                continue;
4687            }
4688            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
4689                continue;
4690            }
4691
4692            let email = full_match.as_str();
4693
4694            // Find which line this email is on
4695            let mut line_num = 1;
4696            let mut col_start = match_start;
4697            let mut col_end = match_end;
4698            for (idx, line_info) in lines.iter().enumerate() {
4699                if match_start >= line_info.byte_offset {
4700                    line_num = idx + 1;
4701                    col_start = match_start - line_info.byte_offset;
4702                    col_end = match_end - line_info.byte_offset;
4703                } else {
4704                    break;
4705                }
4706            }
4707
4708            bare_urls.push(BareUrl {
4709                line: line_num,
4710                start_col: col_start,
4711                end_col: col_end,
4712                byte_offset: match_start,
4713                byte_end: match_end,
4714                url: email.to_string(),
4715                url_type: "email".to_string(),
4716            });
4717        }
4718
4719        bare_urls
4720    }
4721
4722    /// Get an iterator over valid CommonMark headings
4723    ///
4724    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
4725    /// that should be flagged by MD018 but should not be processed by other heading rules.
4726    ///
4727    /// # Examples
4728    ///
4729    /// ```rust
4730    /// use rumdl_lib::lint_context::LintContext;
4731    /// use rumdl_lib::config::MarkdownFlavor;
4732    ///
4733    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
4734    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4735    ///
4736    /// for heading in ctx.valid_headings() {
4737    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
4738    /// }
4739    /// // Only prints valid headings, skips `#NoSpace`
4740    /// ```
4741    #[must_use]
4742    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
4743        ValidHeadingsIter::new(&self.lines)
4744    }
4745
4746    /// Check if the document contains any valid CommonMark headings
4747    ///
4748    /// Returns `true` if there is at least one heading with proper space after `#`.
4749    #[must_use]
4750    pub fn has_valid_headings(&self) -> bool {
4751        self.lines
4752            .iter()
4753            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
4754    }
4755}
4756
4757/// Merge adjacent list blocks that should be treated as one
4758fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
4759    if list_blocks.len() < 2 {
4760        return;
4761    }
4762
4763    let mut merger = ListBlockMerger::new(content, lines);
4764    *list_blocks = merger.merge(list_blocks);
4765}
4766
4767/// Helper struct to manage the complex logic of merging list blocks
4768struct ListBlockMerger<'a> {
4769    content: &'a str,
4770    lines: &'a [LineInfo],
4771}
4772
4773impl<'a> ListBlockMerger<'a> {
4774    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
4775        Self { content, lines }
4776    }
4777
4778    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
4779        let mut merged = Vec::with_capacity(list_blocks.len());
4780        let mut current = list_blocks[0].clone();
4781
4782        for next in list_blocks.iter().skip(1) {
4783            if self.should_merge_blocks(&current, next) {
4784                current = self.merge_two_blocks(current, next);
4785            } else {
4786                merged.push(current);
4787                current = next.clone();
4788            }
4789        }
4790
4791        merged.push(current);
4792        merged
4793    }
4794
4795    /// Determine if two adjacent list blocks should be merged
4796    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
4797        // Basic compatibility checks
4798        if !self.blocks_are_compatible(current, next) {
4799            return false;
4800        }
4801
4802        // Check spacing and content between blocks
4803        let spacing = self.analyze_spacing_between(current, next);
4804        match spacing {
4805            BlockSpacing::Consecutive => true,
4806            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
4807            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
4808                self.can_merge_with_content_between(current, next)
4809            }
4810        }
4811    }
4812
4813    /// Check if blocks have compatible structure for merging
4814    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
4815        current.is_ordered == next.is_ordered
4816            && current.blockquote_prefix == next.blockquote_prefix
4817            && current.nesting_level == next.nesting_level
4818    }
4819
4820    /// Analyze the spacing between two list blocks
4821    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
4822        let gap = next.start_line - current.end_line;
4823
4824        match gap {
4825            1 => BlockSpacing::Consecutive,
4826            2 => BlockSpacing::SingleBlank,
4827            _ if gap > 2 => {
4828                if self.has_only_blank_lines_between(current, next) {
4829                    BlockSpacing::MultipleBlanks
4830                } else {
4831                    BlockSpacing::ContentBetween
4832                }
4833            }
4834            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
4835        }
4836    }
4837
4838    /// Check if unordered lists can be merged with a single blank line between
4839    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4840        // Check if there are structural separators between the blocks
4841        // If has_meaningful_content_between returns true, it means there are structural separators
4842        if has_meaningful_content_between(self.content, current, next, self.lines) {
4843            return false; // Structural separators prevent merging
4844        }
4845
4846        // Only merge unordered lists with same marker across single blank
4847        !current.is_ordered && current.marker == next.marker
4848    }
4849
4850    /// Check if ordered lists can be merged when there's content between them
4851    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4852        // Do not merge lists if there are structural separators between them
4853        if has_meaningful_content_between(self.content, current, next, self.lines) {
4854            return false; // Structural separators prevent merging
4855        }
4856
4857        // Only consider merging ordered lists if there's no structural content between
4858        current.is_ordered && next.is_ordered
4859    }
4860
4861    /// Check if there are only blank lines between blocks
4862    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
4863        for line_num in (current.end_line + 1)..next.start_line {
4864            if let Some(line_info) = self.lines.get(line_num - 1)
4865                && !line_info.content(self.content).trim().is_empty()
4866            {
4867                return false;
4868            }
4869        }
4870        true
4871    }
4872
4873    /// Merge two compatible list blocks into one
4874    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
4875        current.end_line = next.end_line;
4876        current.item_lines.extend_from_slice(&next.item_lines);
4877
4878        // Update max marker width
4879        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
4880
4881        // Handle marker consistency for unordered lists
4882        if !current.is_ordered && self.markers_differ(&current, next) {
4883            current.marker = None; // Mixed markers
4884        }
4885
4886        current
4887    }
4888
4889    /// Check if two blocks have different markers
4890    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
4891        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
4892    }
4893}
4894
4895/// Types of spacing between list blocks
4896#[derive(Debug, PartialEq)]
4897enum BlockSpacing {
4898    Consecutive,    // No gap between blocks
4899    SingleBlank,    // One blank line between blocks
4900    MultipleBlanks, // Multiple blank lines but no content
4901    ContentBetween, // Content exists between blocks
4902}
4903
4904/// Check if there's meaningful content (not just blank lines) between two list blocks
4905fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
4906    // Check lines between current.end_line and next.start_line
4907    for line_num in (current.end_line + 1)..next.start_line {
4908        if let Some(line_info) = lines.get(line_num - 1) {
4909            // Convert to 0-indexed
4910            let trimmed = line_info.content(content).trim();
4911
4912            // Skip empty lines
4913            if trimmed.is_empty() {
4914                continue;
4915            }
4916
4917            // Check for structural separators that should separate lists (CommonMark compliant)
4918
4919            // Headings separate lists
4920            if line_info.heading.is_some() {
4921                return true; // Has meaningful content - headings separate lists
4922            }
4923
4924            // Horizontal rules separate lists (---, ***, ___)
4925            if is_horizontal_rule(trimmed) {
4926                return true; // Has meaningful content - horizontal rules separate lists
4927            }
4928
4929            // Tables separate lists
4930            if crate::utils::skip_context::is_table_line(trimmed) {
4931                return true; // Has meaningful content - tables separate lists
4932            }
4933
4934            // Blockquotes separate lists
4935            if trimmed.starts_with('>') {
4936                return true; // Has meaningful content - blockquotes separate lists
4937            }
4938
4939            // Code block fences separate lists (unless properly indented as list content)
4940            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
4941                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4942
4943                // Check if this code block is properly indented as list continuation
4944                let min_continuation_indent = if current.is_ordered {
4945                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
4946                } else {
4947                    current.nesting_level + 2
4948                };
4949
4950                if line_indent < min_continuation_indent {
4951                    // This is a standalone code block that separates lists
4952                    return true; // Has meaningful content - standalone code blocks separate lists
4953                }
4954            }
4955
4956            // Check if this line has proper indentation for list continuation
4957            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
4958
4959            // Calculate minimum indentation needed to be list continuation
4960            let min_indent = if current.is_ordered {
4961                current.nesting_level + current.max_marker_width
4962            } else {
4963                current.nesting_level + 2
4964            };
4965
4966            // If the line is not indented enough to be list continuation, it's meaningful content
4967            if line_indent < min_indent {
4968                return true; // Has meaningful content - content not indented as list continuation
4969            }
4970
4971            // If we reach here, the line is properly indented as list continuation
4972            // Continue checking other lines
4973        }
4974    }
4975
4976    // Only blank lines or properly indented list continuation content between blocks
4977    false
4978}
4979
4980/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
4981/// CommonMark rules for thematic breaks (horizontal rules):
4982/// - May have 0-3 spaces of leading indentation (but NOT tabs)
4983/// - Must have 3+ of the same character (-, *, or _)
4984/// - May have spaces between characters
4985/// - No other characters allowed
4986pub fn is_horizontal_rule_line(line: &str) -> bool {
4987    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
4988    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
4989    if leading_spaces > 3 || line.starts_with('\t') {
4990        return false;
4991    }
4992
4993    is_horizontal_rule_content(line.trim())
4994}
4995
4996/// Check if trimmed content matches horizontal rule pattern.
4997/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
4998pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
4999    if trimmed.len() < 3 {
5000        return false;
5001    }
5002
5003    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
5004    let chars: Vec<char> = trimmed.chars().collect();
5005    if let Some(&first_char) = chars.first()
5006        && (first_char == '-' || first_char == '*' || first_char == '_')
5007    {
5008        let mut count = 0;
5009        for &ch in &chars {
5010            if ch == first_char {
5011                count += 1;
5012            } else if ch != ' ' && ch != '\t' {
5013                return false; // Non-matching, non-whitespace character
5014            }
5015        }
5016        return count >= 3;
5017    }
5018    false
5019}
5020
5021/// Backwards-compatible alias for `is_horizontal_rule_content`
5022pub fn is_horizontal_rule(trimmed: &str) -> bool {
5023    is_horizontal_rule_content(trimmed)
5024}
5025
5026/// Check if content contains patterns that cause the markdown crate to panic
5027#[cfg(test)]
5028mod tests {
5029    use super::*;
5030
5031    #[test]
5032    fn test_empty_content() {
5033        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5034        assert_eq!(ctx.content, "");
5035        assert_eq!(ctx.line_offsets, vec![0]);
5036        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5037        assert_eq!(ctx.lines.len(), 0);
5038    }
5039
5040    #[test]
5041    fn test_single_line() {
5042        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
5043        assert_eq!(ctx.content, "# Hello");
5044        assert_eq!(ctx.line_offsets, vec![0]);
5045        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
5046        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
5047    }
5048
5049    #[test]
5050    fn test_multi_line() {
5051        let content = "# Title\n\nSecond line\nThird line";
5052        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5053        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
5054        // Test offset to line/col
5055        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
5056        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
5057        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
5058        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
5059        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
5060    }
5061
5062    #[test]
5063    fn test_line_info() {
5064        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
5065        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5066
5067        // Test line info
5068        assert_eq!(ctx.lines.len(), 7);
5069
5070        // Line 1: "# Title"
5071        let line1 = &ctx.lines[0];
5072        assert_eq!(line1.content(ctx.content), "# Title");
5073        assert_eq!(line1.byte_offset, 0);
5074        assert_eq!(line1.indent, 0);
5075        assert!(!line1.is_blank);
5076        assert!(!line1.in_code_block);
5077        assert!(line1.list_item.is_none());
5078
5079        // Line 2: "    indented"
5080        let line2 = &ctx.lines[1];
5081        assert_eq!(line2.content(ctx.content), "    indented");
5082        assert_eq!(line2.byte_offset, 8);
5083        assert_eq!(line2.indent, 4);
5084        assert!(!line2.is_blank);
5085
5086        // Line 3: "" (blank)
5087        let line3 = &ctx.lines[2];
5088        assert_eq!(line3.content(ctx.content), "");
5089        assert!(line3.is_blank);
5090
5091        // Test helper methods
5092        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
5093        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
5094        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
5095        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
5096    }
5097
5098    #[test]
5099    fn test_list_item_detection() {
5100        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
5101        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5102
5103        // Line 1: "- Unordered item"
5104        let line1 = &ctx.lines[0];
5105        assert!(line1.list_item.is_some());
5106        let list1 = line1.list_item.as_ref().unwrap();
5107        assert_eq!(list1.marker, "-");
5108        assert!(!list1.is_ordered);
5109        assert_eq!(list1.marker_column, 0);
5110        assert_eq!(list1.content_column, 2);
5111
5112        // Line 2: "  * Nested item"
5113        let line2 = &ctx.lines[1];
5114        assert!(line2.list_item.is_some());
5115        let list2 = line2.list_item.as_ref().unwrap();
5116        assert_eq!(list2.marker, "*");
5117        assert_eq!(list2.marker_column, 2);
5118
5119        // Line 3: "1. Ordered item"
5120        let line3 = &ctx.lines[2];
5121        assert!(line3.list_item.is_some());
5122        let list3 = line3.list_item.as_ref().unwrap();
5123        assert_eq!(list3.marker, "1.");
5124        assert!(list3.is_ordered);
5125        assert_eq!(list3.number, Some(1));
5126
5127        // Line 6: "Not a list"
5128        let line6 = &ctx.lines[5];
5129        assert!(line6.list_item.is_none());
5130    }
5131
5132    #[test]
5133    fn test_offset_to_line_col_edge_cases() {
5134        let content = "a\nb\nc";
5135        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5136        // line_offsets: [0, 2, 4]
5137        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
5138        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
5139        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
5140        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
5141        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
5142        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
5143    }
5144
5145    #[test]
5146    fn test_mdx_esm_blocks() {
5147        let content = r##"import {Chart} from './snowfall.js'
5148export const year = 2023
5149
5150# Last year's snowfall
5151
5152In {year}, the snowfall was above average.
5153It was followed by a warm spring which caused
5154flood conditions in many of the nearby rivers.
5155
5156<Chart color="#fcb32c" year={year} />
5157"##;
5158
5159        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
5160
5161        // Check that lines 1 and 2 are marked as ESM blocks
5162        assert_eq!(ctx.lines.len(), 10);
5163        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
5164        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
5165        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
5166        assert!(
5167            !ctx.lines[3].in_esm_block,
5168            "Line 4 (heading) should NOT be in_esm_block"
5169        );
5170        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
5171        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
5172    }
5173
5174    #[test]
5175    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
5176        let content = r#"import {Chart} from './snowfall.js'
5177export const year = 2023
5178
5179# Last year's snowfall
5180"#;
5181
5182        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5183
5184        // ESM blocks should NOT be detected in Standard flavor
5185        assert!(
5186            !ctx.lines[0].in_esm_block,
5187            "Line 1 should NOT be in_esm_block in Standard flavor"
5188        );
5189        assert!(
5190            !ctx.lines[1].in_esm_block,
5191            "Line 2 should NOT be in_esm_block in Standard flavor"
5192        );
5193    }
5194
5195    #[test]
5196    fn test_blockquote_with_indented_content() {
5197        // Lines with `>` followed by heavily-indented content should be detected as blockquotes.
5198        // The content inside the blockquote may also be detected as a code block (which is correct),
5199        // but for MD046 purposes, we need to know the line is inside a blockquote.
5200        let content = r#"# Heading
5201
5202>      -S socket-path
5203>                    More text
5204"#;
5205        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5206
5207        // Line 3 (index 2) should be detected as blockquote
5208        assert!(
5209            ctx.lines.get(2).is_some_and(|l| l.blockquote.is_some()),
5210            "Line 3 should be a blockquote"
5211        );
5212        // Line 4 (index 3) should also be blockquote
5213        assert!(
5214            ctx.lines.get(3).is_some_and(|l| l.blockquote.is_some()),
5215            "Line 4 should be a blockquote"
5216        );
5217
5218        // Verify blockquote content is correctly parsed
5219        // Note: spaces_after includes the spaces between `>` and content
5220        let bq3 = ctx.lines.get(2).unwrap().blockquote.as_ref().unwrap();
5221        assert_eq!(bq3.content, "-S socket-path");
5222        assert_eq!(bq3.nesting_level, 1);
5223        // 6 spaces after the `>` marker
5224        assert!(bq3.has_multiple_spaces_after_marker);
5225
5226        let bq4 = ctx.lines.get(3).unwrap().blockquote.as_ref().unwrap();
5227        assert_eq!(bq4.content, "More text");
5228        assert_eq!(bq4.nesting_level, 1);
5229    }
5230
5231    #[test]
5232    fn test_footnote_definitions_not_parsed_as_reference_defs() {
5233        // Footnote definitions use [^id]: syntax and should NOT be parsed as reference definitions
5234        let content = r#"# Title
5235
5236A footnote[^1].
5237
5238[^1]: This is the footnote content.
5239
5240[^note]: Another footnote with [link](https://example.com).
5241
5242[regular]: ./path.md "A real reference definition"
5243"#;
5244        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5245
5246        // Should only have one reference definition (the regular one)
5247        assert_eq!(
5248            ctx.reference_defs.len(),
5249            1,
5250            "Footnotes should not be parsed as reference definitions"
5251        );
5252
5253        // The only reference def should be the regular one
5254        assert_eq!(ctx.reference_defs[0].id, "regular");
5255        assert_eq!(ctx.reference_defs[0].url, "./path.md");
5256        assert_eq!(
5257            ctx.reference_defs[0].title,
5258            Some("A real reference definition".to_string())
5259        );
5260    }
5261
5262    #[test]
5263    fn test_footnote_with_inline_link_not_misidentified() {
5264        // Regression test for issue #286: footnote containing an inline link
5265        // was incorrectly parsed as a reference definition with URL "[link](url)"
5266        let content = r#"# Title
5267
5268A footnote[^1].
5269
5270[^1]: [link](https://www.google.com).
5271"#;
5272        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5273
5274        // Should have no reference definitions
5275        assert!(
5276            ctx.reference_defs.is_empty(),
5277            "Footnote with inline link should not create a reference definition"
5278        );
5279    }
5280
5281    #[test]
5282    fn test_various_footnote_formats_excluded() {
5283        // Test various footnote ID formats are all excluded
5284        let content = r#"[^1]: Numeric footnote
5285[^note]: Named footnote
5286[^a]: Single char footnote
5287[^long-footnote-name]: Long named footnote
5288[^123abc]: Mixed alphanumeric
5289
5290[ref1]: ./file1.md
5291[ref2]: ./file2.md
5292"#;
5293        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5294
5295        // Should only have the two regular reference definitions
5296        assert_eq!(
5297            ctx.reference_defs.len(),
5298            2,
5299            "Only regular reference definitions should be parsed"
5300        );
5301
5302        let ids: Vec<&str> = ctx.reference_defs.iter().map(|r| r.id.as_str()).collect();
5303        assert!(ids.contains(&"ref1"));
5304        assert!(ids.contains(&"ref2"));
5305        assert!(!ids.iter().any(|id| id.starts_with('^')));
5306    }
5307
5308    // =========================================================================
5309    // Tests for has_char and char_count methods
5310    // =========================================================================
5311
5312    #[test]
5313    fn test_has_char_tracked_characters() {
5314        // Test all 12 tracked characters
5315        let content = "# Heading\n* list item\n_emphasis_ and -hyphen-\n+ plus\n> quote\n| table |\n[link]\n`code`\n<html>\n!image";
5316        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5317
5318        // All tracked characters should be detected
5319        assert!(ctx.has_char('#'), "Should detect hash");
5320        assert!(ctx.has_char('*'), "Should detect asterisk");
5321        assert!(ctx.has_char('_'), "Should detect underscore");
5322        assert!(ctx.has_char('-'), "Should detect hyphen");
5323        assert!(ctx.has_char('+'), "Should detect plus");
5324        assert!(ctx.has_char('>'), "Should detect gt");
5325        assert!(ctx.has_char('|'), "Should detect pipe");
5326        assert!(ctx.has_char('['), "Should detect bracket");
5327        assert!(ctx.has_char('`'), "Should detect backtick");
5328        assert!(ctx.has_char('<'), "Should detect lt");
5329        assert!(ctx.has_char('!'), "Should detect exclamation");
5330        assert!(ctx.has_char('\n'), "Should detect newline");
5331    }
5332
5333    #[test]
5334    fn test_has_char_absent_characters() {
5335        let content = "Simple text without special chars";
5336        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5337
5338        // None of the tracked characters should be present
5339        assert!(!ctx.has_char('#'), "Should not detect hash");
5340        assert!(!ctx.has_char('*'), "Should not detect asterisk");
5341        assert!(!ctx.has_char('_'), "Should not detect underscore");
5342        assert!(!ctx.has_char('-'), "Should not detect hyphen");
5343        assert!(!ctx.has_char('+'), "Should not detect plus");
5344        assert!(!ctx.has_char('>'), "Should not detect gt");
5345        assert!(!ctx.has_char('|'), "Should not detect pipe");
5346        assert!(!ctx.has_char('['), "Should not detect bracket");
5347        assert!(!ctx.has_char('`'), "Should not detect backtick");
5348        assert!(!ctx.has_char('<'), "Should not detect lt");
5349        assert!(!ctx.has_char('!'), "Should not detect exclamation");
5350        // Note: single line content has no newlines
5351        assert!(!ctx.has_char('\n'), "Should not detect newline in single line");
5352    }
5353
5354    #[test]
5355    fn test_has_char_fallback_for_untracked() {
5356        let content = "Text with @mention and $dollar and %percent";
5357        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5358
5359        // Untracked characters should fall back to content.contains()
5360        assert!(ctx.has_char('@'), "Should detect @ via fallback");
5361        assert!(ctx.has_char('$'), "Should detect $ via fallback");
5362        assert!(ctx.has_char('%'), "Should detect % via fallback");
5363        assert!(!ctx.has_char('^'), "Should not detect absent ^ via fallback");
5364    }
5365
5366    #[test]
5367    fn test_char_count_tracked_characters() {
5368        let content = "## Heading ##\n***bold***\n__emphasis__\n---\n+++\n>> nested\n|| table ||\n[[link]]\n``code``\n<<html>>\n!!";
5369        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5370
5371        // Count each tracked character
5372        assert_eq!(ctx.char_count('#'), 4, "Should count 4 hashes");
5373        assert_eq!(ctx.char_count('*'), 6, "Should count 6 asterisks");
5374        assert_eq!(ctx.char_count('_'), 4, "Should count 4 underscores");
5375        assert_eq!(ctx.char_count('-'), 3, "Should count 3 hyphens");
5376        assert_eq!(ctx.char_count('+'), 3, "Should count 3 pluses");
5377        assert_eq!(ctx.char_count('>'), 4, "Should count 4 gt (2 nested + 2 in <<html>>)");
5378        assert_eq!(ctx.char_count('|'), 4, "Should count 4 pipes");
5379        assert_eq!(ctx.char_count('['), 2, "Should count 2 brackets");
5380        assert_eq!(ctx.char_count('`'), 4, "Should count 4 backticks");
5381        assert_eq!(ctx.char_count('<'), 2, "Should count 2 lt");
5382        assert_eq!(ctx.char_count('!'), 2, "Should count 2 exclamations");
5383        assert_eq!(ctx.char_count('\n'), 10, "Should count 10 newlines");
5384    }
5385
5386    #[test]
5387    fn test_char_count_zero_for_absent() {
5388        let content = "Plain text";
5389        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5390
5391        assert_eq!(ctx.char_count('#'), 0);
5392        assert_eq!(ctx.char_count('*'), 0);
5393        assert_eq!(ctx.char_count('_'), 0);
5394        assert_eq!(ctx.char_count('\n'), 0);
5395    }
5396
5397    #[test]
5398    fn test_char_count_fallback_for_untracked() {
5399        let content = "@@@ $$ %%%";
5400        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5401
5402        assert_eq!(ctx.char_count('@'), 3, "Should count 3 @ via fallback");
5403        assert_eq!(ctx.char_count('$'), 2, "Should count 2 $ via fallback");
5404        assert_eq!(ctx.char_count('%'), 3, "Should count 3 % via fallback");
5405        assert_eq!(ctx.char_count('^'), 0, "Should count 0 for absent char");
5406    }
5407
5408    #[test]
5409    fn test_char_count_empty_content() {
5410        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
5411
5412        assert_eq!(ctx.char_count('#'), 0);
5413        assert_eq!(ctx.char_count('*'), 0);
5414        assert_eq!(ctx.char_count('@'), 0);
5415        assert!(!ctx.has_char('#'));
5416        assert!(!ctx.has_char('@'));
5417    }
5418
5419    // =========================================================================
5420    // Tests for is_in_html_tag method
5421    // =========================================================================
5422
5423    #[test]
5424    fn test_is_in_html_tag_simple() {
5425        let content = "<div>content</div>";
5426        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5427
5428        // Inside opening tag
5429        assert!(ctx.is_in_html_tag(0), "Position 0 (<) should be in tag");
5430        assert!(ctx.is_in_html_tag(1), "Position 1 (d) should be in tag");
5431        assert!(ctx.is_in_html_tag(4), "Position 4 (>) should be in tag");
5432
5433        // Outside tag (in content)
5434        assert!(!ctx.is_in_html_tag(5), "Position 5 (c) should not be in tag");
5435        assert!(!ctx.is_in_html_tag(10), "Position 10 (t) should not be in tag");
5436
5437        // Inside closing tag
5438        assert!(ctx.is_in_html_tag(12), "Position 12 (<) should be in tag");
5439        assert!(ctx.is_in_html_tag(17), "Position 17 (>) should be in tag");
5440    }
5441
5442    #[test]
5443    fn test_is_in_html_tag_self_closing() {
5444        let content = "Text <br/> more text";
5445        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5446
5447        // Before tag
5448        assert!(!ctx.is_in_html_tag(0), "Position 0 should not be in tag");
5449        assert!(!ctx.is_in_html_tag(4), "Position 4 (space) should not be in tag");
5450
5451        // Inside self-closing tag
5452        assert!(ctx.is_in_html_tag(5), "Position 5 (<) should be in tag");
5453        assert!(ctx.is_in_html_tag(8), "Position 8 (/) should be in tag");
5454        assert!(ctx.is_in_html_tag(9), "Position 9 (>) should be in tag");
5455
5456        // After tag
5457        assert!(!ctx.is_in_html_tag(10), "Position 10 (space) should not be in tag");
5458    }
5459
5460    #[test]
5461    fn test_is_in_html_tag_with_attributes() {
5462        let content = r#"<a href="url" class="link">text</a>"#;
5463        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5464
5465        // All positions inside opening tag with attributes
5466        assert!(ctx.is_in_html_tag(0), "Start of tag");
5467        assert!(ctx.is_in_html_tag(10), "Inside href attribute");
5468        assert!(ctx.is_in_html_tag(20), "Inside class attribute");
5469        assert!(ctx.is_in_html_tag(26), "End of opening tag");
5470
5471        // Content between tags
5472        assert!(!ctx.is_in_html_tag(27), "Start of content");
5473        assert!(!ctx.is_in_html_tag(30), "End of content");
5474
5475        // Closing tag
5476        assert!(ctx.is_in_html_tag(31), "Start of closing tag");
5477    }
5478
5479    #[test]
5480    fn test_is_in_html_tag_multiline() {
5481        let content = "<div\n  class=\"test\"\n>\ncontent\n</div>";
5482        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5483
5484        // Opening tag spans multiple lines
5485        assert!(ctx.is_in_html_tag(0), "Start of multiline tag");
5486        assert!(ctx.is_in_html_tag(5), "After first newline in tag");
5487        assert!(ctx.is_in_html_tag(15), "Inside attribute");
5488
5489        // After closing > of opening tag
5490        let closing_bracket_pos = content.find(">\n").unwrap();
5491        assert!(!ctx.is_in_html_tag(closing_bracket_pos + 2), "Content after tag");
5492    }
5493
5494    #[test]
5495    fn test_is_in_html_tag_no_tags() {
5496        let content = "Plain text without any HTML";
5497        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5498
5499        // No position should be in an HTML tag
5500        for i in 0..content.len() {
5501            assert!(!ctx.is_in_html_tag(i), "Position {i} should not be in tag");
5502        }
5503    }
5504
5505    // =========================================================================
5506    // Tests for is_in_jinja_range method
5507    // =========================================================================
5508
5509    #[test]
5510    fn test_is_in_jinja_range_expression() {
5511        let content = "Hello {{ name }}!";
5512        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5513
5514        // Before Jinja
5515        assert!(!ctx.is_in_jinja_range(0), "H should not be in Jinja");
5516        assert!(!ctx.is_in_jinja_range(5), "Space before Jinja should not be in Jinja");
5517
5518        // Inside Jinja expression (positions 6-15 for "{{ name }}")
5519        assert!(ctx.is_in_jinja_range(6), "First brace should be in Jinja");
5520        assert!(ctx.is_in_jinja_range(7), "Second brace should be in Jinja");
5521        assert!(ctx.is_in_jinja_range(10), "name should be in Jinja");
5522        assert!(ctx.is_in_jinja_range(14), "Closing brace should be in Jinja");
5523        assert!(ctx.is_in_jinja_range(15), "Second closing brace should be in Jinja");
5524
5525        // After Jinja
5526        assert!(!ctx.is_in_jinja_range(16), "! should not be in Jinja");
5527    }
5528
5529    #[test]
5530    fn test_is_in_jinja_range_statement() {
5531        let content = "{% if condition %}content{% endif %}";
5532        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5533
5534        // Inside opening statement
5535        assert!(ctx.is_in_jinja_range(0), "Start of Jinja statement");
5536        assert!(ctx.is_in_jinja_range(5), "condition should be in Jinja");
5537        assert!(ctx.is_in_jinja_range(17), "End of opening statement");
5538
5539        // Content between
5540        assert!(!ctx.is_in_jinja_range(18), "content should not be in Jinja");
5541
5542        // Inside closing statement
5543        assert!(ctx.is_in_jinja_range(25), "Start of endif");
5544        assert!(ctx.is_in_jinja_range(32), "endif should be in Jinja");
5545    }
5546
5547    #[test]
5548    fn test_is_in_jinja_range_multiple() {
5549        let content = "{{ a }} and {{ b }}";
5550        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5551
5552        // First Jinja expression
5553        assert!(ctx.is_in_jinja_range(0));
5554        assert!(ctx.is_in_jinja_range(3));
5555        assert!(ctx.is_in_jinja_range(6));
5556
5557        // Between expressions
5558        assert!(!ctx.is_in_jinja_range(8));
5559        assert!(!ctx.is_in_jinja_range(11));
5560
5561        // Second Jinja expression
5562        assert!(ctx.is_in_jinja_range(12));
5563        assert!(ctx.is_in_jinja_range(15));
5564        assert!(ctx.is_in_jinja_range(18));
5565    }
5566
5567    #[test]
5568    fn test_is_in_jinja_range_no_jinja() {
5569        let content = "Plain text with single braces but not Jinja";
5570        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5571
5572        // No position should be in Jinja
5573        for i in 0..content.len() {
5574            assert!(!ctx.is_in_jinja_range(i), "Position {i} should not be in Jinja");
5575        }
5576    }
5577
5578    // =========================================================================
5579    // Tests for is_in_link_title method
5580    // =========================================================================
5581
5582    #[test]
5583    fn test_is_in_link_title_with_title() {
5584        let content = r#"[ref]: https://example.com "Title text"
5585
5586Some content."#;
5587        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5588
5589        // Verify we have a reference def with title
5590        assert_eq!(ctx.reference_defs.len(), 1);
5591        let def = &ctx.reference_defs[0];
5592        assert!(def.title_byte_start.is_some());
5593        assert!(def.title_byte_end.is_some());
5594
5595        let title_start = def.title_byte_start.unwrap();
5596        let title_end = def.title_byte_end.unwrap();
5597
5598        // Before title (in URL)
5599        assert!(!ctx.is_in_link_title(10), "URL should not be in title");
5600
5601        // Inside title
5602        assert!(ctx.is_in_link_title(title_start), "Title start should be in title");
5603        assert!(
5604            ctx.is_in_link_title(title_start + 5),
5605            "Middle of title should be in title"
5606        );
5607        assert!(ctx.is_in_link_title(title_end - 1), "End of title should be in title");
5608
5609        // After title
5610        assert!(
5611            !ctx.is_in_link_title(title_end),
5612            "After title end should not be in title"
5613        );
5614    }
5615
5616    #[test]
5617    fn test_is_in_link_title_without_title() {
5618        let content = "[ref]: https://example.com\n\nSome content.";
5619        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5620
5621        // Reference def without title
5622        assert_eq!(ctx.reference_defs.len(), 1);
5623        let def = &ctx.reference_defs[0];
5624        assert!(def.title_byte_start.is_none());
5625        assert!(def.title_byte_end.is_none());
5626
5627        // No position should be in a title
5628        for i in 0..content.len() {
5629            assert!(!ctx.is_in_link_title(i), "Position {i} should not be in title");
5630        }
5631    }
5632
5633    #[test]
5634    fn test_is_in_link_title_multiple_refs() {
5635        let content = r#"[ref1]: /url1 "Title One"
5636[ref2]: /url2
5637[ref3]: /url3 "Title Three"
5638"#;
5639        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5640
5641        // Should have 3 reference defs
5642        assert_eq!(ctx.reference_defs.len(), 3);
5643
5644        // ref1 has title
5645        let ref1 = ctx.reference_defs.iter().find(|r| r.id == "ref1").unwrap();
5646        assert!(ref1.title_byte_start.is_some());
5647
5648        // ref2 has no title
5649        let ref2 = ctx.reference_defs.iter().find(|r| r.id == "ref2").unwrap();
5650        assert!(ref2.title_byte_start.is_none());
5651
5652        // ref3 has title
5653        let ref3 = ctx.reference_defs.iter().find(|r| r.id == "ref3").unwrap();
5654        assert!(ref3.title_byte_start.is_some());
5655
5656        // Check positions in ref1's title
5657        if let (Some(start), Some(end)) = (ref1.title_byte_start, ref1.title_byte_end) {
5658            assert!(ctx.is_in_link_title(start + 1));
5659            assert!(!ctx.is_in_link_title(end + 5));
5660        }
5661
5662        // Check positions in ref3's title
5663        if let (Some(start), Some(_end)) = (ref3.title_byte_start, ref3.title_byte_end) {
5664            assert!(ctx.is_in_link_title(start + 1));
5665        }
5666    }
5667
5668    #[test]
5669    fn test_is_in_link_title_single_quotes() {
5670        let content = "[ref]: /url 'Single quoted title'\n";
5671        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5672
5673        assert_eq!(ctx.reference_defs.len(), 1);
5674        let def = &ctx.reference_defs[0];
5675
5676        if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5677            assert!(ctx.is_in_link_title(start));
5678            assert!(ctx.is_in_link_title(start + 5));
5679            assert!(!ctx.is_in_link_title(end));
5680        }
5681    }
5682
5683    #[test]
5684    fn test_is_in_link_title_parentheses() {
5685        // Note: The reference def parser may not support parenthesized titles
5686        // This test verifies the is_in_link_title method works when titles exist
5687        let content = "[ref]: /url (Parenthesized title)\n";
5688        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5689
5690        // Parser behavior: may or may not parse parenthesized titles
5691        // We test that is_in_link_title correctly reflects whatever was parsed
5692        if ctx.reference_defs.is_empty() {
5693            // Parser didn't recognize this as a reference def
5694            for i in 0..content.len() {
5695                assert!(!ctx.is_in_link_title(i));
5696            }
5697        } else {
5698            let def = &ctx.reference_defs[0];
5699            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
5700                assert!(ctx.is_in_link_title(start));
5701                assert!(ctx.is_in_link_title(start + 5));
5702                assert!(!ctx.is_in_link_title(end));
5703            } else {
5704                // Title wasn't parsed, so no position should be in title
5705                for i in 0..content.len() {
5706                    assert!(!ctx.is_in_link_title(i));
5707                }
5708            }
5709        }
5710    }
5711
5712    #[test]
5713    fn test_is_in_link_title_no_refs() {
5714        let content = "Just plain text without any reference definitions.";
5715        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5716
5717        assert!(ctx.reference_defs.is_empty());
5718
5719        for i in 0..content.len() {
5720            assert!(!ctx.is_in_link_title(i));
5721        }
5722    }
5723
5724    // =========================================================================
5725    // Math span tests (Issue #289)
5726    // =========================================================================
5727
5728    #[test]
5729    fn test_math_spans_inline() {
5730        let content = "Text with inline math $[f](x)$ in it.";
5731        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5732
5733        let math_spans = ctx.math_spans();
5734        assert_eq!(math_spans.len(), 1, "Should detect one inline math span");
5735
5736        let span = &math_spans[0];
5737        assert!(!span.is_display, "Should be inline math, not display");
5738        assert_eq!(span.content, "[f](x)", "Content should be extracted correctly");
5739    }
5740
5741    #[test]
5742    fn test_math_spans_display_single_line() {
5743        let content = "$$X(\\zeta) = \\mathcal Z [x](\\zeta)$$";
5744        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5745
5746        let math_spans = ctx.math_spans();
5747        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5748
5749        let span = &math_spans[0];
5750        assert!(span.is_display, "Should be display math");
5751        assert!(
5752            span.content.contains("[x](\\zeta)"),
5753            "Content should contain the link-like pattern"
5754        );
5755    }
5756
5757    #[test]
5758    fn test_math_spans_display_multiline() {
5759        let content = "Before\n\n$$\n[x](\\zeta) = \\sum_k x(k)\n$$\n\nAfter";
5760        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5761
5762        let math_spans = ctx.math_spans();
5763        assert_eq!(math_spans.len(), 1, "Should detect one display math span");
5764
5765        let span = &math_spans[0];
5766        assert!(span.is_display, "Should be display math");
5767    }
5768
5769    #[test]
5770    fn test_is_in_math_span() {
5771        let content = "Text $[f](x)$ more text";
5772        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5773
5774        // Position inside the math span
5775        let math_start = content.find('$').unwrap();
5776        let math_end = content.rfind('$').unwrap() + 1;
5777
5778        assert!(
5779            ctx.is_in_math_span(math_start + 1),
5780            "Position inside math span should return true"
5781        );
5782        assert!(
5783            ctx.is_in_math_span(math_start + 3),
5784            "Position inside math span should return true"
5785        );
5786
5787        // Position outside the math span
5788        assert!(!ctx.is_in_math_span(0), "Position before math span should return false");
5789        assert!(
5790            !ctx.is_in_math_span(math_end + 1),
5791            "Position after math span should return false"
5792        );
5793    }
5794
5795    #[test]
5796    fn test_math_spans_mixed_with_code() {
5797        let content = "Math $[f](x)$ and code `[g](y)` mixed";
5798        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5799
5800        let math_spans = ctx.math_spans();
5801        let code_spans = ctx.code_spans();
5802
5803        assert_eq!(math_spans.len(), 1, "Should have one math span");
5804        assert_eq!(code_spans.len(), 1, "Should have one code span");
5805
5806        // Verify math span content
5807        assert_eq!(math_spans[0].content, "[f](x)");
5808        // Verify code span content
5809        assert_eq!(code_spans[0].content, "[g](y)");
5810    }
5811
5812    #[test]
5813    fn test_math_spans_no_math() {
5814        let content = "Regular text without any math at all.";
5815        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5816
5817        let math_spans = ctx.math_spans();
5818        assert!(math_spans.is_empty(), "Should have no math spans");
5819    }
5820
5821    #[test]
5822    fn test_math_spans_multiple() {
5823        let content = "First $a$ and second $b$ and display $$c$$";
5824        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5825
5826        let math_spans = ctx.math_spans();
5827        assert_eq!(math_spans.len(), 3, "Should detect three math spans");
5828
5829        // Two inline, one display
5830        let inline_count = math_spans.iter().filter(|s| !s.is_display).count();
5831        let display_count = math_spans.iter().filter(|s| s.is_display).count();
5832
5833        assert_eq!(inline_count, 2, "Should have two inline math spans");
5834        assert_eq!(display_count, 1, "Should have one display math span");
5835    }
5836
5837    #[test]
5838    fn test_is_in_math_span_boundary_positions() {
5839        // Test exact boundary positions: $[f](x)$
5840        // Byte positions:                0123456789
5841        let content = "$[f](x)$";
5842        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5843
5844        let math_spans = ctx.math_spans();
5845        assert_eq!(math_spans.len(), 1, "Should have one math span");
5846
5847        let span = &math_spans[0];
5848
5849        // Position at opening $ should be in span (byte 0)
5850        assert!(
5851            ctx.is_in_math_span(span.byte_offset),
5852            "Start position should be in span"
5853        );
5854
5855        // Position just inside should be in span
5856        assert!(
5857            ctx.is_in_math_span(span.byte_offset + 1),
5858            "Position after start should be in span"
5859        );
5860
5861        // Position at closing $ should be in span (exclusive end means we check byte_end - 1)
5862        assert!(
5863            ctx.is_in_math_span(span.byte_end - 1),
5864            "Position at end-1 should be in span"
5865        );
5866
5867        // Position at byte_end should NOT be in span (exclusive end)
5868        assert!(
5869            !ctx.is_in_math_span(span.byte_end),
5870            "Position at byte_end should NOT be in span (exclusive)"
5871        );
5872    }
5873
5874    #[test]
5875    fn test_math_spans_at_document_start() {
5876        let content = "$x$ text";
5877        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5878
5879        let math_spans = ctx.math_spans();
5880        assert_eq!(math_spans.len(), 1);
5881        assert_eq!(math_spans[0].byte_offset, 0, "Math should start at byte 0");
5882    }
5883
5884    #[test]
5885    fn test_math_spans_at_document_end() {
5886        let content = "text $x$";
5887        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5888
5889        let math_spans = ctx.math_spans();
5890        assert_eq!(math_spans.len(), 1);
5891        assert_eq!(math_spans[0].byte_end, content.len(), "Math should end at document end");
5892    }
5893
5894    #[test]
5895    fn test_math_spans_consecutive() {
5896        let content = "$a$$b$";
5897        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5898
5899        let math_spans = ctx.math_spans();
5900        // pulldown-cmark should parse these as separate spans
5901        assert!(!math_spans.is_empty(), "Should detect at least one math span");
5902
5903        // All positions should be in some math span
5904        for i in 0..content.len() {
5905            assert!(ctx.is_in_math_span(i), "Position {i} should be in a math span");
5906        }
5907    }
5908
5909    #[test]
5910    fn test_math_spans_currency_not_math() {
5911        // Unbalanced $ should not create math spans
5912        let content = "Price is $100";
5913        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5914
5915        let math_spans = ctx.math_spans();
5916        // pulldown-cmark requires balanced delimiters for math
5917        // $100 alone is not math
5918        assert!(
5919            math_spans.is_empty() || !math_spans.iter().any(|s| s.content.contains("100")),
5920            "Unbalanced $ should not create math span containing 100"
5921        );
5922    }
5923
5924    // =========================================================================
5925    // Tests for O(1) reference definition lookups via HashMap
5926    // =========================================================================
5927
5928    #[test]
5929    fn test_reference_lookup_o1_basic() {
5930        let content = r#"[ref1]: /url1
5931[REF2]: /url2 "Title"
5932[Ref3]: /url3
5933
5934Use [link][ref1] and [link][REF2]."#;
5935        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5936
5937        // Verify we have 3 reference defs
5938        assert_eq!(ctx.reference_defs.len(), 3);
5939
5940        // Test get_reference_url with various cases
5941        assert_eq!(ctx.get_reference_url("ref1"), Some("/url1"));
5942        assert_eq!(ctx.get_reference_url("REF1"), Some("/url1")); // case insensitive
5943        assert_eq!(ctx.get_reference_url("Ref1"), Some("/url1")); // case insensitive
5944        assert_eq!(ctx.get_reference_url("ref2"), Some("/url2"));
5945        assert_eq!(ctx.get_reference_url("REF2"), Some("/url2"));
5946        assert_eq!(ctx.get_reference_url("ref3"), Some("/url3"));
5947        assert_eq!(ctx.get_reference_url("nonexistent"), None);
5948    }
5949
5950    #[test]
5951    fn test_reference_lookup_o1_get_reference_def() {
5952        let content = r#"[myref]: https://example.com "My Title"
5953"#;
5954        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5955
5956        // Test get_reference_def
5957        let def = ctx.get_reference_def("myref").expect("Should find myref");
5958        assert_eq!(def.url, "https://example.com");
5959        assert_eq!(def.title.as_deref(), Some("My Title"));
5960
5961        // Case insensitive
5962        let def2 = ctx.get_reference_def("MYREF").expect("Should find MYREF");
5963        assert_eq!(def2.url, "https://example.com");
5964
5965        // Non-existent
5966        assert!(ctx.get_reference_def("nonexistent").is_none());
5967    }
5968
5969    #[test]
5970    fn test_reference_lookup_o1_has_reference_def() {
5971        let content = r#"[foo]: /foo
5972[BAR]: /bar
5973"#;
5974        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5975
5976        // Test has_reference_def
5977        assert!(ctx.has_reference_def("foo"));
5978        assert!(ctx.has_reference_def("FOO")); // case insensitive
5979        assert!(ctx.has_reference_def("bar"));
5980        assert!(ctx.has_reference_def("Bar")); // case insensitive
5981        assert!(!ctx.has_reference_def("baz")); // doesn't exist
5982    }
5983
5984    #[test]
5985    fn test_reference_lookup_o1_empty_content() {
5986        let content = "No references here.";
5987        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
5988
5989        assert!(ctx.reference_defs.is_empty());
5990        assert_eq!(ctx.get_reference_url("anything"), None);
5991        assert!(ctx.get_reference_def("anything").is_none());
5992        assert!(!ctx.has_reference_def("anything"));
5993    }
5994
5995    #[test]
5996    fn test_reference_lookup_o1_special_characters_in_id() {
5997        let content = r#"[ref-with-dash]: /url1
5998[ref_with_underscore]: /url2
5999[ref.with.dots]: /url3
6000"#;
6001        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6002
6003        assert_eq!(ctx.get_reference_url("ref-with-dash"), Some("/url1"));
6004        assert_eq!(ctx.get_reference_url("ref_with_underscore"), Some("/url2"));
6005        assert_eq!(ctx.get_reference_url("ref.with.dots"), Some("/url3"));
6006    }
6007
6008    #[test]
6009    fn test_reference_lookup_o1_unicode_id() {
6010        let content = r#"[日本語]: /japanese
6011[émoji]: /emoji
6012"#;
6013        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
6014
6015        assert_eq!(ctx.get_reference_url("日本語"), Some("/japanese"));
6016        assert_eq!(ctx.get_reference_url("émoji"), Some("/emoji"));
6017        assert_eq!(ctx.get_reference_url("ÉMOJI"), Some("/emoji")); // uppercase
6018    }
6019}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs