rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12/// Macro for profiling sections - only active in non-WASM builds
13#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15    ($name:expr, $profile:expr, $code:expr) => {{
16        let start = std::time::Instant::now();
17        let result = $code;
18        if $profile {
19            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20        }
21        result
22    }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30// Comprehensive link pattern that captures both inline and reference links
31// Use (?s) flag to make . match newlines
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        r#"(?sx)
35        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36        (?:
37            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
38            |
39            \[([^\]]*)\]      # Reference ID in group 6
40        )"#
41    ).unwrap()
42});
43
44// Image pattern (similar to links but with ! prefix)
45// Use (?s) flag to make . match newlines
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(
48        r#"(?sx)
49        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50        (?:
51            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
52            |
53            \[([^\]]*)\]      # Reference ID in group 6
54        )"#
55    ).unwrap()
56});
57
58// Reference definition pattern
59static REF_DEF_PATTERN: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62// Pattern for bare URLs - uses centralized URL pattern from regex_cache
63
64// Pattern for email addresses
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68// Pattern for blockquote prefix in parse_list_blocks
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71/// Pre-computed information about a line
72#[derive(Debug, Clone)]
73pub struct LineInfo {
74    /// Byte offset where this line starts in the document
75    pub byte_offset: usize,
76    /// Length of the line in bytes (without newline)
77    pub byte_len: usize,
78    /// Number of bytes of leading whitespace (for substring extraction)
79    pub indent: usize,
80    /// Visual column width of leading whitespace (with proper tab expansion)
81    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
82    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
83    pub visual_indent: usize,
84    /// Whether the line is blank (empty or only whitespace)
85    pub is_blank: bool,
86    /// Whether this line is inside a code block
87    pub in_code_block: bool,
88    /// Whether this line is inside front matter
89    pub in_front_matter: bool,
90    /// Whether this line is inside an HTML block
91    pub in_html_block: bool,
92    /// Whether this line is inside an HTML comment
93    pub in_html_comment: bool,
94    /// List item information if this line starts a list item
95    pub list_item: Option<ListItemInfo>,
96    /// Heading information if this line is a heading
97    pub heading: Option<HeadingInfo>,
98    /// Blockquote information if this line is a blockquote
99    pub blockquote: Option<BlockquoteInfo>,
100    /// Whether this line is inside a mkdocstrings autodoc block
101    pub in_mkdocstrings: bool,
102    /// Whether this line is part of an ESM import/export block (MDX only)
103    pub in_esm_block: bool,
104    /// Whether this line is a continuation of a multi-line code span from a previous line
105    pub in_code_span_continuation: bool,
106    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
107    /// Pre-computed for consistent detection across all rules
108    pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112    /// Get the line content as a string slice from the source document
113    pub fn content<'a>(&self, source: &'a str) -> &'a str {
114        &source[self.byte_offset..self.byte_offset + self.byte_len]
115    }
116}
117
118/// Information about a list item
119#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121    /// The marker used (*, -, +, or number with . or ))
122    pub marker: String,
123    /// Whether it's ordered (true) or unordered (false)
124    pub is_ordered: bool,
125    /// The number for ordered lists
126    pub number: Option<usize>,
127    /// Column where the marker starts (0-based)
128    pub marker_column: usize,
129    /// Column where content after marker starts
130    pub content_column: usize,
131}
132
133/// Heading style type
134#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136    /// ATX style heading (# Heading)
137    ATX,
138    /// Setext style heading with = underline
139    Setext1,
140    /// Setext style heading with - underline
141    Setext2,
142}
143
144/// Parsed link information
145#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147    /// Line number (1-indexed)
148    pub line: usize,
149    /// Start column (0-indexed) in the line
150    pub start_col: usize,
151    /// End column (0-indexed) in the line
152    pub end_col: usize,
153    /// Byte offset in document
154    pub byte_offset: usize,
155    /// End byte offset in document
156    pub byte_end: usize,
157    /// Link text
158    pub text: Cow<'a, str>,
159    /// Link URL or reference
160    pub url: Cow<'a, str>,
161    /// Whether this is a reference link [text][ref] vs inline [text](url)
162    pub is_reference: bool,
163    /// Reference ID for reference links
164    pub reference_id: Option<Cow<'a, str>>,
165    /// Link type from pulldown-cmark
166    pub link_type: LinkType,
167}
168
169/// Information about a broken link reported by pulldown-cmark
170#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172    /// The reference text that couldn't be resolved
173    pub reference: String,
174    /// Byte span in the source document
175    pub span: std::ops::Range<usize>,
176}
177
178/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
179#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181    /// The footnote ID (without the ^ prefix)
182    pub id: String,
183    /// Line number (1-indexed)
184    pub line: usize,
185    /// Start byte offset in document
186    pub byte_offset: usize,
187    /// End byte offset in document
188    pub byte_end: usize,
189}
190
191/// Parsed image information
192#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194    /// Line number (1-indexed)
195    pub line: usize,
196    /// Start column (0-indexed) in the line
197    pub start_col: usize,
198    /// End column (0-indexed) in the line
199    pub end_col: usize,
200    /// Byte offset in document
201    pub byte_offset: usize,
202    /// End byte offset in document
203    pub byte_end: usize,
204    /// Alt text
205    pub alt_text: Cow<'a, str>,
206    /// Image URL or reference
207    pub url: Cow<'a, str>,
208    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
209    pub is_reference: bool,
210    /// Reference ID for reference images
211    pub reference_id: Option<Cow<'a, str>>,
212    /// Link type from pulldown-cmark
213    pub link_type: LinkType,
214}
215
216/// Reference definition [ref]: url "title"
217#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219    /// Line number (1-indexed)
220    pub line: usize,
221    /// Reference ID (normalized to lowercase)
222    pub id: String,
223    /// URL
224    pub url: String,
225    /// Optional title
226    pub title: Option<String>,
227    /// Byte offset where the reference definition starts
228    pub byte_offset: usize,
229    /// Byte offset where the reference definition ends
230    pub byte_end: usize,
231    /// Byte offset where the title starts (if present, includes quote)
232    pub title_byte_start: Option<usize>,
233    /// Byte offset where the title ends (if present, includes quote)
234    pub title_byte_end: Option<usize>,
235}
236
237/// Parsed code span information
238#[derive(Debug, Clone)]
239pub struct CodeSpan {
240    /// Line number where the code span starts (1-indexed)
241    pub line: usize,
242    /// Line number where the code span ends (1-indexed)
243    pub end_line: usize,
244    /// Start column (0-indexed) in the line
245    pub start_col: usize,
246    /// End column (0-indexed) in the line
247    pub end_col: usize,
248    /// Byte offset in document
249    pub byte_offset: usize,
250    /// End byte offset in document
251    pub byte_end: usize,
252    /// Number of backticks used (1, 2, 3, etc.)
253    pub backtick_count: usize,
254    /// Content inside the code span (without backticks)
255    pub content: String,
256}
257
258/// Information about a heading
259#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261    /// Heading level (1-6 for ATX, 1-2 for Setext)
262    pub level: u8,
263    /// Style of heading
264    pub style: HeadingStyle,
265    /// The heading marker (# characters or underline)
266    pub marker: String,
267    /// Column where the marker starts (0-based)
268    pub marker_column: usize,
269    /// Column where heading text starts
270    pub content_column: usize,
271    /// The heading text (without markers and without custom ID syntax)
272    pub text: String,
273    /// Custom header ID if present (e.g., from {#custom-id} syntax)
274    pub custom_id: Option<String>,
275    /// Original heading text including custom ID syntax
276    pub raw_text: String,
277    /// Whether it has a closing sequence (for ATX)
278    pub has_closing_sequence: bool,
279    /// The closing sequence if present
280    pub closing_sequence: String,
281    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
282    /// False for malformed headings like `#NoSpace` that MD018 should flag
283    pub is_valid: bool,
284}
285
286/// A valid heading from a filtered iteration
287///
288/// Only includes headings that are CommonMark-compliant (have space after #).
289/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
290#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292    /// The 1-indexed line number in the document
293    pub line_num: usize,
294    /// Reference to the heading information
295    pub heading: &'a HeadingInfo,
296    /// Reference to the full line info (for rules that need additional context)
297    pub line_info: &'a LineInfo,
298}
299
300/// Iterator over valid CommonMark headings in a document
301///
302/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
303/// but should not be processed by other heading rules.
304pub struct ValidHeadingsIter<'a> {
305    lines: &'a [LineInfo],
306    current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310    fn new(lines: &'a [LineInfo]) -> Self {
311        Self {
312            lines,
313            current_index: 0,
314        }
315    }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319    type Item = ValidHeading<'a>;
320
321    fn next(&mut self) -> Option<Self::Item> {
322        while self.current_index < self.lines.len() {
323            let idx = self.current_index;
324            self.current_index += 1;
325
326            let line_info = &self.lines[idx];
327            if let Some(heading) = &line_info.heading
328                && heading.is_valid
329            {
330                return Some(ValidHeading {
331                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
332                    heading,
333                    line_info,
334                });
335            }
336        }
337        None
338    }
339}
340
341/// Information about a blockquote line
342#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344    /// Nesting level (1 for >, 2 for >>, etc.)
345    pub nesting_level: usize,
346    /// The indentation before the blockquote marker
347    pub indent: String,
348    /// Column where the first > starts (0-based)
349    pub marker_column: usize,
350    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
351    pub prefix: String,
352    /// Content after the blockquote marker(s)
353    pub content: String,
354    /// Whether the line has no space after the marker
355    pub has_no_space_after_marker: bool,
356    /// Whether the line has multiple spaces after the marker
357    pub has_multiple_spaces_after_marker: bool,
358    /// Whether this is an empty blockquote line needing MD028 fix
359    pub needs_md028_fix: bool,
360}
361
362/// Information about a list block
363#[derive(Debug, Clone)]
364pub struct ListBlock {
365    /// Line number where the list starts (1-indexed)
366    pub start_line: usize,
367    /// Line number where the list ends (1-indexed)
368    pub end_line: usize,
369    /// Whether it's ordered or unordered
370    pub is_ordered: bool,
371    /// The consistent marker for unordered lists (if any)
372    pub marker: Option<String>,
373    /// Blockquote prefix for this list (empty if not in blockquote)
374    pub blockquote_prefix: String,
375    /// Lines that are list items within this block
376    pub item_lines: Vec<usize>,
377    /// Nesting level (0 for top-level lists)
378    pub nesting_level: usize,
379    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
380    pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385/// Character frequency data for fast content analysis
386#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388    /// Count of # characters (headings)
389    pub hash_count: usize,
390    /// Count of * characters (emphasis, lists, horizontal rules)
391    pub asterisk_count: usize,
392    /// Count of _ characters (emphasis, horizontal rules)
393    pub underscore_count: usize,
394    /// Count of - characters (lists, horizontal rules, setext headings)
395    pub hyphen_count: usize,
396    /// Count of + characters (lists)
397    pub plus_count: usize,
398    /// Count of > characters (blockquotes)
399    pub gt_count: usize,
400    /// Count of | characters (tables)
401    pub pipe_count: usize,
402    /// Count of [ characters (links, images)
403    pub bracket_count: usize,
404    /// Count of ` characters (code spans, code blocks)
405    pub backtick_count: usize,
406    /// Count of < characters (HTML tags, autolinks)
407    pub lt_count: usize,
408    /// Count of ! characters (images)
409    pub exclamation_count: usize,
410    /// Count of newline characters
411    pub newline_count: usize,
412}
413
414/// Pre-parsed HTML tag information
415#[derive(Debug, Clone)]
416pub struct HtmlTag {
417    /// Line number (1-indexed)
418    pub line: usize,
419    /// Start column (0-indexed) in the line
420    pub start_col: usize,
421    /// End column (0-indexed) in the line
422    pub end_col: usize,
423    /// Byte offset in document
424    pub byte_offset: usize,
425    /// End byte offset in document
426    pub byte_end: usize,
427    /// Tag name (e.g., "div", "img", "br")
428    pub tag_name: String,
429    /// Whether it's a closing tag (`</tag>`)
430    pub is_closing: bool,
431    /// Whether it's self-closing (`<tag />`)
432    pub is_self_closing: bool,
433    /// Raw tag content
434    pub raw_content: String,
435}
436
437/// Pre-parsed emphasis span information
438#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440    /// Line number (1-indexed)
441    pub line: usize,
442    /// Start column (0-indexed) in the line
443    pub start_col: usize,
444    /// End column (0-indexed) in the line
445    pub end_col: usize,
446    /// Byte offset in document
447    pub byte_offset: usize,
448    /// End byte offset in document
449    pub byte_end: usize,
450    /// Type of emphasis ('*' or '_')
451    pub marker: char,
452    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
453    pub marker_count: usize,
454    /// Content inside the emphasis
455    pub content: String,
456}
457
458/// Pre-parsed table row information
459#[derive(Debug, Clone)]
460pub struct TableRow {
461    /// Line number (1-indexed)
462    pub line: usize,
463    /// Whether this is a separator row (contains only |, -, :, and spaces)
464    pub is_separator: bool,
465    /// Number of columns (pipe-separated cells)
466    pub column_count: usize,
467    /// Alignment info from separator row
468    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
469}
470
471/// Pre-parsed bare URL information (not in links)
472#[derive(Debug, Clone)]
473pub struct BareUrl {
474    /// Line number (1-indexed)
475    pub line: usize,
476    /// Start column (0-indexed) in the line
477    pub start_col: usize,
478    /// End column (0-indexed) in the line
479    pub end_col: usize,
480    /// Byte offset in document
481    pub byte_offset: usize,
482    /// End byte offset in document
483    pub byte_end: usize,
484    /// The URL string
485    pub url: String,
486    /// Type of URL ("http", "https", "ftp", "email")
487    pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491    pub content: &'a str,
492    pub line_offsets: Vec<usize>,
493    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
494    pub lines: Vec<LineInfo>,             // Pre-computed line information
495    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
496    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
497    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
498    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
499    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
500    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
501    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
502    pub char_frequency: CharFrequency,    // Character frequency analysis
503    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
504    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
505    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
506    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
507    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
508    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
509    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
510    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
511    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
512    pub flavor: MarkdownFlavor,           // Markdown flavor being used
513    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
514}
515
516/// Detailed blockquote parse result with all components
517struct BlockquoteComponents<'a> {
518    indent: &'a str,
519    markers: &'a str,
520    spaces_after: &'a str,
521    content: &'a str,
522}
523
524/// Parse blockquote prefix with detailed components using manual parsing
525#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527    let bytes = line.as_bytes();
528    let mut pos = 0;
529
530    // Parse leading whitespace (indent)
531    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532        pos += 1;
533    }
534    let indent_end = pos;
535
536    // Must have at least one '>' marker
537    if pos >= bytes.len() || bytes[pos] != b'>' {
538        return None;
539    }
540
541    // Parse '>' markers
542    while pos < bytes.len() && bytes[pos] == b'>' {
543        pos += 1;
544    }
545    let markers_end = pos;
546
547    // Parse spaces after markers
548    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549        pos += 1;
550    }
551    let spaces_end = pos;
552
553    Some(BlockquoteComponents {
554        indent: &line[0..indent_end],
555        markers: &line[indent_end..markers_end],
556        spaces_after: &line[markers_end..spaces_end],
557        content: &line[spaces_end..],
558    })
559}
560
561impl<'a> LintContext<'a> {
562    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563        #[cfg(not(target_arch = "wasm32"))]
564        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565        #[cfg(target_arch = "wasm32")]
566        let profile = false;
567
568        let line_offsets = profile_section!("Line offsets", profile, {
569            let mut offsets = vec![0];
570            for (i, c) in content.char_indices() {
571                if c == '\n' {
572                    offsets.push(i + 1);
573                }
574            }
575            offsets
576        });
577
578        // Detect code blocks once and cache them
579        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581        // Pre-compute HTML comment ranges ONCE for all operations
582        let html_comment_ranges = profile_section!(
583            "HTML comment ranges",
584            profile,
585            crate::utils::skip_context::compute_html_comment_ranges(content)
586        );
587
588        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
589        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590            if flavor == MarkdownFlavor::MkDocs {
591                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592            } else {
593                Vec::new()
594            }
595        });
596
597        // Pre-compute line information (without headings/blockquotes yet)
598        let mut lines = profile_section!(
599            "Basic line info",
600            profile,
601            Self::compute_basic_line_info(
602                content,
603                &line_offsets,
604                &code_blocks,
605                flavor,
606                &html_comment_ranges,
607                &autodoc_ranges,
608            )
609        );
610
611        // Detect HTML blocks BEFORE heading detection
612        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614        // Detect ESM import/export blocks in MDX files BEFORE heading detection
615        profile_section!(
616            "ESM blocks",
617            profile,
618            Self::detect_esm_blocks(content, &mut lines, flavor)
619        );
620
621        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
622        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624        // Now detect headings and blockquotes
625        profile_section!(
626            "Headings & blockquotes",
627            profile,
628            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629        );
630
631        // Parse code spans early so we can exclude them from link/image parsing
632        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634        // Mark lines that are continuations of multi-line code spans
635        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
636        for span in &code_spans {
637            if span.end_line > span.line {
638                // Mark lines after the first line as continuations
639                for line_num in (span.line + 1)..=span.end_line {
640                    if let Some(line_info) = lines.get_mut(line_num - 1) {
641                        line_info.in_code_span_continuation = true;
642                    }
643                }
644            }
645        }
646
647        // Parse links, images, references, and list blocks
648        let (links, broken_links, footnote_refs) = profile_section!(
649            "Links",
650            profile,
651            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652        );
653
654        let images = profile_section!(
655            "Images",
656            profile,
657            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658        );
659
660        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664        // Compute character frequency for fast content analysis
665        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
668        let table_blocks = profile_section!(
669            "Table blocks",
670            profile,
671            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672                content,
673                &code_blocks,
674                &code_spans,
675                &html_comment_ranges,
676            )
677        );
678
679        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
680        let line_index = profile_section!(
681            "Line index",
682            profile,
683            crate::utils::range_utils::LineIndex::new(content)
684        );
685
686        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
687        let jinja_ranges = profile_section!(
688            "Jinja ranges",
689            profile,
690            crate::utils::jinja_utils::find_jinja_ranges(content)
691        );
692
693        Self {
694            content,
695            line_offsets,
696            code_blocks,
697            lines,
698            links,
699            images,
700            broken_links,
701            footnote_refs,
702            reference_defs,
703            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704            list_blocks,
705            char_frequency,
706            html_tags_cache: OnceLock::new(),
707            emphasis_spans_cache: OnceLock::new(),
708            table_rows_cache: OnceLock::new(),
709            bare_urls_cache: OnceLock::new(),
710            has_mixed_list_nesting_cache: OnceLock::new(),
711            html_comment_ranges,
712            table_blocks,
713            line_index,
714            jinja_ranges,
715            flavor,
716            source_file,
717        }
718    }
719
720    /// Get code spans - computed lazily on first access
721    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722        Arc::clone(
723            self.code_spans_cache
724                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725        )
726    }
727
728    /// Get HTML comment ranges - pre-computed during LintContext construction
729    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730        &self.html_comment_ranges
731    }
732
733    /// Get HTML tags - computed lazily on first access
734    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735        Arc::clone(self.html_tags_cache.get_or_init(|| {
736            Arc::new(Self::parse_html_tags(
737                self.content,
738                &self.lines,
739                &self.code_blocks,
740                self.flavor,
741            ))
742        }))
743    }
744
745    /// Get emphasis spans - computed lazily on first access
746    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747        Arc::clone(
748            self.emphasis_spans_cache
749                .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750        )
751    }
752
753    /// Get table rows - computed lazily on first access
754    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755        Arc::clone(
756            self.table_rows_cache
757                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758        )
759    }
760
761    /// Get bare URLs - computed lazily on first access
762    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763        Arc::clone(
764            self.bare_urls_cache
765                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766        )
767    }
768
769    /// Check if document has mixed ordered/unordered list nesting.
770    /// Result is cached after first computation (document-level invariant).
771    /// This is used by MD007 for smart style auto-detection.
772    pub fn has_mixed_list_nesting(&self) -> bool {
773        *self
774            .has_mixed_list_nesting_cache
775            .get_or_init(|| self.compute_mixed_list_nesting())
776    }
777
778    /// Internal computation for mixed list nesting (only called once per LintContext).
779    fn compute_mixed_list_nesting(&self) -> bool {
780        // Track parent list items by their marker position and type
781        // Using marker_column instead of indent because it works correctly
782        // for blockquoted content where indent doesn't account for the prefix
783        // Stack stores: (marker_column, is_ordered)
784        let mut stack: Vec<(usize, bool)> = Vec::new();
785        let mut last_was_blank = false;
786
787        for line_info in &self.lines {
788            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
789            if line_info.in_code_block
790                || line_info.in_front_matter
791                || line_info.in_mkdocstrings
792                || line_info.in_html_comment
793                || line_info.in_esm_block
794            {
795                continue;
796            }
797
798            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
799            if line_info.is_blank {
800                last_was_blank = true;
801                continue;
802            }
803
804            if let Some(list_item) = &line_info.list_item {
805                // Normalize column 1 to column 0 (consistent with MD007 check function)
806                let current_pos = if list_item.marker_column == 1 {
807                    0
808                } else {
809                    list_item.marker_column
810                };
811
812                // If there was a blank line and this item is at root level, reset stack
813                if last_was_blank && current_pos == 0 {
814                    stack.clear();
815                }
816                last_was_blank = false;
817
818                // Pop items at same or greater position (they're siblings or deeper, not parents)
819                while let Some(&(pos, _)) = stack.last() {
820                    if pos >= current_pos {
821                        stack.pop();
822                    } else {
823                        break;
824                    }
825                }
826
827                // Check if immediate parent has different type - this is mixed nesting
828                if let Some(&(_, parent_is_ordered)) = stack.last()
829                    && parent_is_ordered != list_item.is_ordered
830                {
831                    return true; // Found mixed nesting - early exit
832                }
833
834                stack.push((current_pos, list_item.is_ordered));
835            } else {
836                // Non-list line (but not blank) - could be paragraph or other content
837                last_was_blank = false;
838            }
839        }
840
841        false
842    }
843
844    /// Map a byte offset to (line, column)
845    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846        match self.line_offsets.binary_search(&offset) {
847            Ok(line) => (line + 1, 1),
848            Err(line) => {
849                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850                (line, offset - line_start + 1)
851            }
852        }
853    }
854
855    /// Check if a position is within a code block or code span
856    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857        // Check code blocks first
858        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859            return true;
860        }
861
862        // Check inline code spans (lazy load if needed)
863        self.code_spans()
864            .iter()
865            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866    }
867
868    /// Get line information by line number (1-indexed)
869    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870        if line_num > 0 {
871            self.lines.get(line_num - 1)
872        } else {
873            None
874        }
875    }
876
877    /// Get byte offset for a line number (1-indexed)
878    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879        self.line_info(line_num).map(|info| info.byte_offset)
880    }
881
882    /// Get URL for a reference link/image by its ID
883    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884        let normalized_id = ref_id.to_lowercase();
885        self.reference_defs
886            .iter()
887            .find(|def| def.id == normalized_id)
888            .map(|def| def.url.as_str())
889    }
890
891    /// Check if a line is part of a list block
892    pub fn is_in_list_block(&self, line_num: usize) -> bool {
893        self.list_blocks
894            .iter()
895            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896    }
897
898    /// Get the list block containing a specific line
899    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900        self.list_blocks
901            .iter()
902            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903    }
904
905    // Compatibility methods for DocumentStructure migration
906
907    /// Check if a line is within a code block
908    pub fn is_in_code_block(&self, line_num: usize) -> bool {
909        if line_num == 0 || line_num > self.lines.len() {
910            return false;
911        }
912        self.lines[line_num - 1].in_code_block
913    }
914
915    /// Check if a line is within front matter
916    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917        if line_num == 0 || line_num > self.lines.len() {
918            return false;
919        }
920        self.lines[line_num - 1].in_front_matter
921    }
922
923    /// Check if a line is within an HTML block
924    pub fn is_in_html_block(&self, line_num: usize) -> bool {
925        if line_num == 0 || line_num > self.lines.len() {
926            return false;
927        }
928        self.lines[line_num - 1].in_html_block
929    }
930
931    /// Check if a line and column is within a code span
932    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933        if line_num == 0 || line_num > self.lines.len() {
934            return false;
935        }
936
937        // Use the code spans cache to check
938        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
939        // Convert col to 0-indexed for comparison
940        let col_0indexed = if col > 0 { col - 1 } else { 0 };
941        let code_spans = self.code_spans();
942        code_spans.iter().any(|span| {
943            // Check if line is within the span's line range
944            if line_num < span.line || line_num > span.end_line {
945                return false;
946            }
947
948            if span.line == span.end_line {
949                // Single-line span: check column bounds
950                col_0indexed >= span.start_col && col_0indexed < span.end_col
951            } else if line_num == span.line {
952                // First line of multi-line span: anything after start_col is in span
953                col_0indexed >= span.start_col
954            } else if line_num == span.end_line {
955                // Last line of multi-line span: anything before end_col is in span
956                col_0indexed < span.end_col
957            } else {
958                // Middle line of multi-line span: entire line is in span
959                true
960            }
961        })
962    }
963
964    /// Check if a byte offset is within a code span
965    #[inline]
966    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967        let code_spans = self.code_spans();
968        code_spans
969            .iter()
970            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971    }
972
973    /// Check if a byte position is within a reference definition
974    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
975    #[inline]
976    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977        self.reference_defs
978            .iter()
979            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980    }
981
982    /// Check if a byte position is within an HTML comment
983    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
984    /// where k is the number of HTML comments (typically very small)
985    #[inline]
986    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987        self.html_comment_ranges
988            .iter()
989            .any(|range| byte_pos >= range.start && byte_pos < range.end)
990    }
991
992    /// Check if a byte position is within an HTML tag (including multiline tags)
993    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
994    #[inline]
995    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996        self.html_tags()
997            .iter()
998            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999    }
1000
1001    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1002    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003        self.jinja_ranges
1004            .iter()
1005            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006    }
1007
1008    /// Check if a byte position is within a link reference definition title
1009    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010        self.reference_defs.iter().any(|def| {
1011            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012                byte_pos >= start && byte_pos < end
1013            } else {
1014                false
1015            }
1016        })
1017    }
1018
1019    /// Check if content has any instances of a specific character (fast)
1020    pub fn has_char(&self, ch: char) -> bool {
1021        match ch {
1022            '#' => self.char_frequency.hash_count > 0,
1023            '*' => self.char_frequency.asterisk_count > 0,
1024            '_' => self.char_frequency.underscore_count > 0,
1025            '-' => self.char_frequency.hyphen_count > 0,
1026            '+' => self.char_frequency.plus_count > 0,
1027            '>' => self.char_frequency.gt_count > 0,
1028            '|' => self.char_frequency.pipe_count > 0,
1029            '[' => self.char_frequency.bracket_count > 0,
1030            '`' => self.char_frequency.backtick_count > 0,
1031            '<' => self.char_frequency.lt_count > 0,
1032            '!' => self.char_frequency.exclamation_count > 0,
1033            '\n' => self.char_frequency.newline_count > 0,
1034            _ => self.content.contains(ch), // Fallback for other characters
1035        }
1036    }
1037
1038    /// Get count of a specific character (fast)
1039    pub fn char_count(&self, ch: char) -> usize {
1040        match ch {
1041            '#' => self.char_frequency.hash_count,
1042            '*' => self.char_frequency.asterisk_count,
1043            '_' => self.char_frequency.underscore_count,
1044            '-' => self.char_frequency.hyphen_count,
1045            '+' => self.char_frequency.plus_count,
1046            '>' => self.char_frequency.gt_count,
1047            '|' => self.char_frequency.pipe_count,
1048            '[' => self.char_frequency.bracket_count,
1049            '`' => self.char_frequency.backtick_count,
1050            '<' => self.char_frequency.lt_count,
1051            '!' => self.char_frequency.exclamation_count,
1052            '\n' => self.char_frequency.newline_count,
1053            _ => self.content.matches(ch).count(), // Fallback for other characters
1054        }
1055    }
1056
1057    /// Check if content likely contains headings (fast)
1058    pub fn likely_has_headings(&self) -> bool {
1059        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1060    }
1061
1062    /// Check if content likely contains lists (fast)
1063    pub fn likely_has_lists(&self) -> bool {
1064        self.char_frequency.asterisk_count > 0
1065            || self.char_frequency.hyphen_count > 0
1066            || self.char_frequency.plus_count > 0
1067    }
1068
1069    /// Check if content likely contains emphasis (fast)
1070    pub fn likely_has_emphasis(&self) -> bool {
1071        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072    }
1073
1074    /// Check if content likely contains tables (fast)
1075    pub fn likely_has_tables(&self) -> bool {
1076        self.char_frequency.pipe_count > 2
1077    }
1078
1079    /// Check if content likely contains blockquotes (fast)
1080    pub fn likely_has_blockquotes(&self) -> bool {
1081        self.char_frequency.gt_count > 0
1082    }
1083
1084    /// Check if content likely contains code (fast)
1085    pub fn likely_has_code(&self) -> bool {
1086        self.char_frequency.backtick_count > 0
1087    }
1088
1089    /// Check if content likely contains links or images (fast)
1090    pub fn likely_has_links_or_images(&self) -> bool {
1091        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092    }
1093
1094    /// Check if content likely contains HTML (fast)
1095    pub fn likely_has_html(&self) -> bool {
1096        self.char_frequency.lt_count > 0
1097    }
1098
1099    /// Get HTML tags on a specific line
1100    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101        self.html_tags()
1102            .iter()
1103            .filter(|tag| tag.line == line_num)
1104            .cloned()
1105            .collect()
1106    }
1107
1108    /// Get emphasis spans on a specific line
1109    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110        self.emphasis_spans()
1111            .iter()
1112            .filter(|span| span.line == line_num)
1113            .cloned()
1114            .collect()
1115    }
1116
1117    /// Get table rows on a specific line
1118    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119        self.table_rows()
1120            .iter()
1121            .filter(|row| row.line == line_num)
1122            .cloned()
1123            .collect()
1124    }
1125
1126    /// Get bare URLs on a specific line
1127    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128        self.bare_urls()
1129            .iter()
1130            .filter(|url| url.line == line_num)
1131            .cloned()
1132            .collect()
1133    }
1134
1135    /// Find the line index for a given byte offset using binary search.
1136    /// Returns (line_index, line_number, column) where:
1137    /// - line_index is the 0-based index in the lines array
1138    /// - line_number is the 1-based line number
1139    /// - column is the byte offset within that line
1140    #[inline]
1141    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142        // Binary search to find the line containing this byte offset
1143        let idx = match lines.binary_search_by(|line| {
1144            if byte_offset < line.byte_offset {
1145                std::cmp::Ordering::Greater
1146            } else if byte_offset > line.byte_offset + line.byte_len {
1147                std::cmp::Ordering::Less
1148            } else {
1149                std::cmp::Ordering::Equal
1150            }
1151        }) {
1152            Ok(idx) => idx,
1153            Err(idx) => idx.saturating_sub(1),
1154        };
1155
1156        let line = &lines[idx];
1157        let line_num = idx + 1;
1158        let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160        (idx, line_num, col)
1161    }
1162
1163    /// Check if a byte offset is within a code span using binary search
1164    #[inline]
1165    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166        // Since spans are sorted by byte_offset, use partition_point for binary search
1167        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169        // Check the span that starts at or before our offset
1170        if idx > 0 {
1171            let span = &code_spans[idx - 1];
1172            if offset >= span.byte_offset && offset < span.byte_end {
1173                return true;
1174            }
1175        }
1176
1177        false
1178    }
1179
1180    /// Collect byte ranges of all links using pulldown-cmark
1181    /// This is used to skip heading detection for lines that fall within link syntax
1182    /// (e.g., multiline links like `[text](url\n#fragment)`)
1183    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186        let mut link_ranges = Vec::new();
1187        let mut options = Options::empty();
1188        options.insert(Options::ENABLE_WIKILINKS);
1189        options.insert(Options::ENABLE_FOOTNOTES);
1190
1191        let parser = Parser::new_ext(content, options).into_offset_iter();
1192        let mut link_stack: Vec<usize> = Vec::new();
1193
1194        for (event, range) in parser {
1195            match event {
1196                Event::Start(Tag::Link { .. }) => {
1197                    link_stack.push(range.start);
1198                }
1199                Event::End(TagEnd::Link) => {
1200                    if let Some(start_pos) = link_stack.pop() {
1201                        link_ranges.push((start_pos, range.end));
1202                    }
1203                }
1204                _ => {}
1205            }
1206        }
1207
1208        link_ranges
1209    }
1210
1211    /// Parse all links in the content
1212    fn parse_links(
1213        content: &'a str,
1214        lines: &[LineInfo],
1215        code_blocks: &[(usize, usize)],
1216        code_spans: &[CodeSpan],
1217        flavor: MarkdownFlavor,
1218        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221        use std::collections::HashSet;
1222
1223        let mut links = Vec::with_capacity(content.len() / 500);
1224        let mut broken_links = Vec::new();
1225        let mut footnote_refs = Vec::new();
1226
1227        // Track byte positions of links found by pulldown-cmark
1228        let mut found_positions = HashSet::new();
1229
1230        // Use pulldown-cmark's streaming parser with BrokenLink callback
1231        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1232        // This automatically handles:
1233        // - Escaped links (won't generate events)
1234        // - Links in code blocks/spans (won't generate Link events)
1235        // - Images (generates Tag::Image instead)
1236        // - Reference resolution (dest_url is already resolved!)
1237        // - Broken references (callback is invoked)
1238        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1239        let mut options = Options::empty();
1240        options.insert(Options::ENABLE_WIKILINKS);
1241        options.insert(Options::ENABLE_FOOTNOTES);
1242
1243        let parser = Parser::new_with_broken_link_callback(
1244            content,
1245            options,
1246            Some(|link: BrokenLink<'_>| {
1247                broken_links.push(BrokenLinkInfo {
1248                    reference: link.reference.to_string(),
1249                    span: link.span.clone(),
1250                });
1251                None
1252            }),
1253        )
1254        .into_offset_iter();
1255
1256        let mut link_stack: Vec<(
1257            usize,
1258            usize,
1259            pulldown_cmark::CowStr<'a>,
1260            LinkType,
1261            pulldown_cmark::CowStr<'a>,
1262        )> = Vec::new();
1263        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1264
1265        for (event, range) in parser {
1266            match event {
1267                Event::Start(Tag::Link {
1268                    link_type,
1269                    dest_url,
1270                    id,
1271                    ..
1272                }) => {
1273                    // Link start - record position, URL, and reference ID
1274                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1275                    text_chunks.clear();
1276                }
1277                Event::Text(text) if !link_stack.is_empty() => {
1278                    // Track text content with its byte range
1279                    text_chunks.push((text.to_string(), range.start, range.end));
1280                }
1281                Event::Code(code) if !link_stack.is_empty() => {
1282                    // Include inline code in link text (with backticks)
1283                    let code_text = format!("`{code}`");
1284                    text_chunks.push((code_text, range.start, range.end));
1285                }
1286                Event::End(TagEnd::Link) => {
1287                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288                        // Skip if in HTML comment
1289                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290                            text_chunks.clear();
1291                            continue;
1292                        }
1293
1294                        // Find line and column information
1295                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297                        // Skip if this link is on a MkDocs snippet line
1298                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299                            text_chunks.clear();
1300                            continue;
1301                        }
1302
1303                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305                        let is_reference = matches!(
1306                            link_type,
1307                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308                        );
1309
1310                        // Extract link text directly from source bytes to preserve escaping
1311                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1312                        let link_text = if start_pos < content.len() {
1313                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315                            // Find MATCHING ] by tracking bracket depth for nested brackets
1316                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1317                            // Brackets inside code spans (between backticks) should be ignored
1318                            let mut close_pos = None;
1319                            let mut depth = 0;
1320                            let mut in_code_span = false;
1321
1322                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323                                // Count preceding backslashes
1324                                let mut backslash_count = 0;
1325                                let mut j = i;
1326                                while j > 0 && link_bytes[j - 1] == b'\\' {
1327                                    backslash_count += 1;
1328                                    j -= 1;
1329                                }
1330                                let is_escaped = backslash_count % 2 != 0;
1331
1332                                // Track code spans - backticks toggle in/out of code
1333                                if byte == b'`' && !is_escaped {
1334                                    in_code_span = !in_code_span;
1335                                }
1336
1337                                // Only count brackets when NOT in a code span
1338                                if !is_escaped && !in_code_span {
1339                                    if byte == b'[' {
1340                                        depth += 1;
1341                                    } else if byte == b']' {
1342                                        if depth == 0 {
1343                                            // Found the matching closing bracket
1344                                            close_pos = Some(i);
1345                                            break;
1346                                        } else {
1347                                            depth -= 1;
1348                                        }
1349                                    }
1350                                }
1351                            }
1352
1353                            if let Some(pos) = close_pos {
1354                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355                            } else {
1356                                Cow::Borrowed("")
1357                            }
1358                        } else {
1359                            Cow::Borrowed("")
1360                        };
1361
1362                        // For reference links, use the actual reference ID from pulldown-cmark
1363                        let reference_id = if is_reference && !ref_id.is_empty() {
1364                            Some(Cow::Owned(ref_id.to_lowercase()))
1365                        } else if is_reference {
1366                            // For collapsed/shortcut references without explicit ID, use the link text
1367                            Some(Cow::Owned(link_text.to_lowercase()))
1368                        } else {
1369                            None
1370                        };
1371
1372                        // Track this position as found
1373                        found_positions.insert(start_pos);
1374
1375                        links.push(ParsedLink {
1376                            line: line_num,
1377                            start_col: col_start,
1378                            end_col: col_end,
1379                            byte_offset: start_pos,
1380                            byte_end: range.end,
1381                            text: link_text,
1382                            url: Cow::Owned(url.to_string()),
1383                            is_reference,
1384                            reference_id,
1385                            link_type,
1386                        });
1387
1388                        text_chunks.clear();
1389                    }
1390                }
1391                Event::FootnoteReference(footnote_id) => {
1392                    // Capture footnote references like [^1], [^note]
1393                    // Skip if in HTML comment
1394                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395                        continue;
1396                    }
1397
1398                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399                    footnote_refs.push(FootnoteRef {
1400                        id: footnote_id.to_string(),
1401                        line: line_num,
1402                        byte_offset: range.start,
1403                        byte_end: range.end,
1404                    });
1405                }
1406                _ => {}
1407            }
1408        }
1409
1410        // Also find undefined references using regex
1411        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1412        // because the reference is undefined
1413        for cap in LINK_PATTERN.captures_iter(content) {
1414            let full_match = cap.get(0).unwrap();
1415            let match_start = full_match.start();
1416            let match_end = full_match.end();
1417
1418            // Skip if this was already found by pulldown-cmark (it's a valid link)
1419            if found_positions.contains(&match_start) {
1420                continue;
1421            }
1422
1423            // Skip if escaped
1424            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425                continue;
1426            }
1427
1428            // Skip if it's an image
1429            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430                continue;
1431            }
1432
1433            // Skip if in code block
1434            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435                continue;
1436            }
1437
1438            // Skip if in code span
1439            if Self::is_offset_in_code_span(code_spans, match_start) {
1440                continue;
1441            }
1442
1443            // Skip if in HTML comment
1444            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445                continue;
1446            }
1447
1448            // Find line and column information
1449            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451            // Skip if this link is on a MkDocs snippet line
1452            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453                continue;
1454            }
1455
1456            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458            let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460            // Only process reference links (group 6)
1461            if let Some(ref_id) = cap.get(6) {
1462                let ref_id_str = ref_id.as_str();
1463                let normalized_ref = if ref_id_str.is_empty() {
1464                    Cow::Owned(text.to_lowercase()) // Implicit reference
1465                } else {
1466                    Cow::Owned(ref_id_str.to_lowercase())
1467                };
1468
1469                // This is an undefined reference (pulldown-cmark didn't parse it)
1470                links.push(ParsedLink {
1471                    line: line_num,
1472                    start_col: col_start,
1473                    end_col: col_end,
1474                    byte_offset: match_start,
1475                    byte_end: match_end,
1476                    text: Cow::Borrowed(text),
1477                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1478                    is_reference: true,
1479                    reference_id: Some(normalized_ref),
1480                    link_type: LinkType::Reference, // Undefined references are reference-style
1481                });
1482            }
1483        }
1484
1485        (links, broken_links, footnote_refs)
1486    }
1487
1488    /// Parse all images in the content
1489    fn parse_images(
1490        content: &'a str,
1491        lines: &[LineInfo],
1492        code_blocks: &[(usize, usize)],
1493        code_spans: &[CodeSpan],
1494        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495    ) -> Vec<ParsedImage<'a>> {
1496        use crate::utils::skip_context::is_in_html_comment_ranges;
1497        use std::collections::HashSet;
1498
1499        // Pre-size based on a heuristic: images are less common than links
1500        let mut images = Vec::with_capacity(content.len() / 1000);
1501        let mut found_positions = HashSet::new();
1502
1503        // Use pulldown-cmark for parsing - more accurate and faster
1504        let parser = Parser::new(content).into_offset_iter();
1505        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506            Vec::new();
1507        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1508
1509        for (event, range) in parser {
1510            match event {
1511                Event::Start(Tag::Image {
1512                    link_type,
1513                    dest_url,
1514                    id,
1515                    ..
1516                }) => {
1517                    image_stack.push((range.start, dest_url, link_type, id));
1518                    text_chunks.clear();
1519                }
1520                Event::Text(text) if !image_stack.is_empty() => {
1521                    text_chunks.push((text.to_string(), range.start, range.end));
1522                }
1523                Event::Code(code) if !image_stack.is_empty() => {
1524                    let code_text = format!("`{code}`");
1525                    text_chunks.push((code_text, range.start, range.end));
1526                }
1527                Event::End(TagEnd::Image) => {
1528                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529                        // Skip if in code block
1530                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531                            continue;
1532                        }
1533
1534                        // Skip if in code span
1535                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1536                            continue;
1537                        }
1538
1539                        // Skip if in HTML comment
1540                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541                            continue;
1542                        }
1543
1544                        // Find line and column using binary search
1545                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548                        let is_reference = matches!(
1549                            link_type,
1550                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551                        );
1552
1553                        // Extract alt text directly from source bytes to preserve escaping
1554                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1555                        let alt_text = if start_pos < content.len() {
1556                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558                            // Find MATCHING ] by tracking bracket depth for nested brackets
1559                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1560                            let mut close_pos = None;
1561                            let mut depth = 0;
1562
1563                            if image_bytes.len() > 2 {
1564                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565                                    // Count preceding backslashes
1566                                    let mut backslash_count = 0;
1567                                    let mut j = i;
1568                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1569                                        backslash_count += 1;
1570                                        j -= 1;
1571                                    }
1572                                    let is_escaped = backslash_count % 2 != 0;
1573
1574                                    if !is_escaped {
1575                                        if byte == b'[' {
1576                                            depth += 1;
1577                                        } else if byte == b']' {
1578                                            if depth == 0 {
1579                                                // Found the matching closing bracket
1580                                                close_pos = Some(i);
1581                                                break;
1582                                            } else {
1583                                                depth -= 1;
1584                                            }
1585                                        }
1586                                    }
1587                                }
1588                            }
1589
1590                            if let Some(pos) = close_pos {
1591                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592                            } else {
1593                                Cow::Borrowed("")
1594                            }
1595                        } else {
1596                            Cow::Borrowed("")
1597                        };
1598
1599                        let reference_id = if is_reference && !ref_id.is_empty() {
1600                            Some(Cow::Owned(ref_id.to_lowercase()))
1601                        } else if is_reference {
1602                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1603                        } else {
1604                            None
1605                        };
1606
1607                        found_positions.insert(start_pos);
1608                        images.push(ParsedImage {
1609                            line: line_num,
1610                            start_col: col_start,
1611                            end_col: col_end,
1612                            byte_offset: start_pos,
1613                            byte_end: range.end,
1614                            alt_text,
1615                            url: Cow::Owned(url.to_string()),
1616                            is_reference,
1617                            reference_id,
1618                            link_type,
1619                        });
1620                    }
1621                }
1622                _ => {}
1623            }
1624        }
1625
1626        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1627        for cap in IMAGE_PATTERN.captures_iter(content) {
1628            let full_match = cap.get(0).unwrap();
1629            let match_start = full_match.start();
1630            let match_end = full_match.end();
1631
1632            // Skip if already found by pulldown-cmark
1633            if found_positions.contains(&match_start) {
1634                continue;
1635            }
1636
1637            // Skip if the ! is escaped
1638            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639                continue;
1640            }
1641
1642            // Skip if in code block, code span, or HTML comment
1643            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644                || Self::is_offset_in_code_span(code_spans, match_start)
1645                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646            {
1647                continue;
1648            }
1649
1650            // Only process reference images (undefined references not found by pulldown-cmark)
1651            if let Some(ref_id) = cap.get(6) {
1652                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655                let ref_id_str = ref_id.as_str();
1656                let normalized_ref = if ref_id_str.is_empty() {
1657                    Cow::Owned(alt_text.to_lowercase())
1658                } else {
1659                    Cow::Owned(ref_id_str.to_lowercase())
1660                };
1661
1662                images.push(ParsedImage {
1663                    line: line_num,
1664                    start_col: col_start,
1665                    end_col: col_end,
1666                    byte_offset: match_start,
1667                    byte_end: match_end,
1668                    alt_text: Cow::Borrowed(alt_text),
1669                    url: Cow::Borrowed(""),
1670                    is_reference: true,
1671                    reference_id: Some(normalized_ref),
1672                    link_type: LinkType::Reference, // Undefined references are reference-style
1673                });
1674            }
1675        }
1676
1677        images
1678    }
1679
1680    /// Parse reference definitions
1681    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682        // Pre-size based on lines count as reference definitions are line-based
1683        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1684
1685        for (line_idx, line_info) in lines.iter().enumerate() {
1686            // Skip lines in code blocks
1687            if line_info.in_code_block {
1688                continue;
1689            }
1690
1691            let line = line_info.content(content);
1692            let line_num = line_idx + 1;
1693
1694            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695                let id = cap.get(1).unwrap().as_str().to_lowercase();
1696                let url = cap.get(2).unwrap().as_str().to_string();
1697                let title_match = cap.get(3).or_else(|| cap.get(4));
1698                let title = title_match.map(|m| m.as_str().to_string());
1699
1700                // Calculate byte positions
1701                // The match starts at the beginning of the line (0) and extends to the end
1702                let match_obj = cap.get(0).unwrap();
1703                let byte_offset = line_info.byte_offset + match_obj.start();
1704                let byte_end = line_info.byte_offset + match_obj.end();
1705
1706                // Calculate title byte positions (includes the quote character before content)
1707                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708                    // The match is the content inside quotes, so we include the quote before
1709                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1710                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1711                    (Some(start), Some(end))
1712                } else {
1713                    (None, None)
1714                };
1715
1716                refs.push(ReferenceDef {
1717                    line: line_num,
1718                    id,
1719                    url,
1720                    title,
1721                    byte_offset,
1722                    byte_end,
1723                    title_byte_start,
1724                    title_byte_end,
1725                });
1726            }
1727        }
1728
1729        refs
1730    }
1731
1732    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1733    /// Handles nested blockquotes like `> > > content`
1734    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1735    #[inline]
1736    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737        let trimmed_start = line.trim_start();
1738        if !trimmed_start.starts_with('>') {
1739            return None;
1740        }
1741
1742        // Track total prefix length to handle nested blockquotes
1743        let mut remaining = line;
1744        let mut total_prefix_len = 0;
1745
1746        loop {
1747            let trimmed = remaining.trim_start();
1748            if !trimmed.starts_with('>') {
1749                break;
1750            }
1751
1752            // Add leading whitespace + '>' to prefix
1753            let leading_ws_len = remaining.len() - trimmed.len();
1754            total_prefix_len += leading_ws_len + 1;
1755
1756            let after_gt = &trimmed[1..];
1757
1758            // Handle optional whitespace after '>' (space or tab)
1759            if let Some(stripped) = after_gt.strip_prefix(' ') {
1760                total_prefix_len += 1;
1761                remaining = stripped;
1762            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763                total_prefix_len += 1;
1764                remaining = stripped;
1765            } else {
1766                remaining = after_gt;
1767            }
1768        }
1769
1770        Some((&line[..total_prefix_len], remaining))
1771    }
1772
1773    /// Detect list items using pulldown-cmark for CommonMark-compliant parsing.
1774    ///
1775    /// Returns a HashMap keyed by line byte offset, containing:
1776    /// `(is_ordered, marker, marker_column, content_column, number)`
1777    ///
1778    /// ## Why pulldown-cmark?
1779    /// Using pulldown-cmark instead of regex ensures we only detect actual list items,
1780    /// not lines that merely look like lists (e.g., continuation paragraphs, code blocks).
1781    /// This fixes issue #253 where continuation lines were falsely detected.
1782    ///
1783    /// ## Tab indentation quirk
1784    /// Pulldown-cmark reports nested list items at the newline character position
1785    /// when tab indentation is used. For example, in `"* Item\n\t- Nested"`,
1786    /// the nested item is reported at byte 7 (the `\n`), not byte 8 (the `\t`).
1787    /// We detect this and advance to the correct line.
1788    ///
1789    /// ## HashMap key strategy
1790    /// We use `entry().or_insert()` because pulldown-cmark may emit multiple events
1791    /// that resolve to the same line (after newline adjustment). The first event
1792    /// for each line is authoritative.
1793    fn detect_list_items_with_pulldown(
1794        content: &str,
1795        line_offsets: &[usize],
1796        flavor: MarkdownFlavor,
1797        front_matter_end: usize,
1798    ) -> std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)> {
1799        use std::collections::HashMap;
1800
1801        let mut list_items = HashMap::new();
1802
1803        let mut options = Options::empty();
1804        options.insert(Options::ENABLE_TABLES);
1805        options.insert(Options::ENABLE_FOOTNOTES);
1806        options.insert(Options::ENABLE_STRIKETHROUGH);
1807        options.insert(Options::ENABLE_TASKLISTS);
1808        // Always enable GFM features for consistency with existing behavior
1809        options.insert(Options::ENABLE_GFM);
1810
1811        // Suppress unused variable warning
1812        let _ = flavor;
1813
1814        let parser = Parser::new_ext(content, options).into_offset_iter();
1815        let mut list_depth: usize = 0;
1816        let mut list_stack: Vec<bool> = Vec::new();
1817
1818        for (event, range) in parser {
1819            match event {
1820                Event::Start(Tag::List(start_number)) => {
1821                    list_depth += 1;
1822                    list_stack.push(start_number.is_some());
1823                }
1824                Event::End(TagEnd::List(_)) => {
1825                    list_depth = list_depth.saturating_sub(1);
1826                    list_stack.pop();
1827                }
1828                Event::Start(Tag::Item) if list_depth > 0 => {
1829                    // Get the ordered state for the CURRENT (innermost) list
1830                    let current_list_is_ordered = list_stack.last().copied().unwrap_or(false);
1831                    // Find which line this byte offset corresponds to
1832                    let item_start = range.start;
1833
1834                    // Binary search to find the line number
1835                    let mut line_idx = match line_offsets.binary_search(&item_start) {
1836                        Ok(idx) => idx,
1837                        Err(idx) => idx.saturating_sub(1),
1838                    };
1839
1840                    // Pulldown-cmark reports nested list items at the newline before the item
1841                    // when using tab indentation (e.g., "* Item\n\t- Nested").
1842                    // Advance to the actual content line in this case.
1843                    if item_start < content.len() && content.as_bytes()[item_start] == b'\n' {
1844                        line_idx += 1;
1845                    }
1846
1847                    // Skip list items in frontmatter (they are YAML/TOML syntax, not Markdown)
1848                    if front_matter_end > 0 && line_idx < front_matter_end {
1849                        continue;
1850                    }
1851
1852                    if line_idx < line_offsets.len() {
1853                        let line_start_byte = line_offsets[line_idx];
1854                        let line_end = line_offsets.get(line_idx + 1).copied().unwrap_or(content.len());
1855                        let line = &content[line_start_byte..line_end.min(content.len())];
1856
1857                        // Strip trailing newline
1858                        let line = line
1859                            .strip_suffix('\n')
1860                            .or_else(|| line.strip_suffix("\r\n"))
1861                            .unwrap_or(line);
1862
1863                        // Strip blockquote prefix if present
1864                        let blockquote_parse = Self::parse_blockquote_prefix(line);
1865                        let (blockquote_prefix_len, line_to_parse) = if let Some((prefix, content)) = blockquote_parse {
1866                            (prefix.len(), content)
1867                        } else {
1868                            (0, line)
1869                        };
1870
1871                        // Parse the list marker from the actual line
1872                        if current_list_is_ordered {
1873                            if let Some((leading_spaces, number_str, delimiter, spacing, _content)) =
1874                                Self::parse_ordered_list(line_to_parse)
1875                            {
1876                                let marker = format!("{number_str}{delimiter}");
1877                                let marker_column = blockquote_prefix_len + leading_spaces.len();
1878                                let content_column = marker_column + marker.len() + spacing.len();
1879                                let number = number_str.parse().ok();
1880
1881                                list_items.entry(line_start_byte).or_insert((
1882                                    true,
1883                                    marker,
1884                                    marker_column,
1885                                    content_column,
1886                                    number,
1887                                ));
1888                            }
1889                        } else if let Some((leading_spaces, marker, spacing, _content)) =
1890                            Self::parse_unordered_list(line_to_parse)
1891                        {
1892                            let marker_column = blockquote_prefix_len + leading_spaces.len();
1893                            let content_column = marker_column + 1 + spacing.len();
1894
1895                            list_items.entry(line_start_byte).or_insert((
1896                                false,
1897                                marker.to_string(),
1898                                marker_column,
1899                                content_column,
1900                                None,
1901                            ));
1902                        }
1903                    }
1904                }
1905                _ => {}
1906            }
1907        }
1908
1909        list_items
1910    }
1911
1912    /// Fast unordered list parser - replaces regex for 5-10x speedup
1913    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1914    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1915    #[inline]
1916    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1917        let bytes = line.as_bytes();
1918        let mut i = 0;
1919
1920        // Skip leading whitespace
1921        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1922            i += 1;
1923        }
1924
1925        // Check for marker
1926        if i >= bytes.len() {
1927            return None;
1928        }
1929        let marker = bytes[i] as char;
1930        if marker != '-' && marker != '*' && marker != '+' {
1931            return None;
1932        }
1933        let marker_pos = i;
1934        i += 1;
1935
1936        // Collect spacing after marker (space or tab only)
1937        let spacing_start = i;
1938        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1939            i += 1;
1940        }
1941
1942        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1943    }
1944
1945    /// Fast ordered list parser - replaces regex for 5-10x speedup
1946    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
1947    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
1948    #[inline]
1949    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1950        let bytes = line.as_bytes();
1951        let mut i = 0;
1952
1953        // Skip leading whitespace
1954        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1955            i += 1;
1956        }
1957
1958        // Collect digits
1959        let number_start = i;
1960        while i < bytes.len() && bytes[i].is_ascii_digit() {
1961            i += 1;
1962        }
1963        if i == number_start {
1964            return None; // No digits found
1965        }
1966
1967        // Check for delimiter
1968        if i >= bytes.len() {
1969            return None;
1970        }
1971        let delimiter = bytes[i] as char;
1972        if delimiter != '.' && delimiter != ')' {
1973            return None;
1974        }
1975        let delimiter_pos = i;
1976        i += 1;
1977
1978        // Collect spacing after delimiter (space or tab only)
1979        let spacing_start = i;
1980        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1981            i += 1;
1982        }
1983
1984        Some((
1985            &line[..number_start],
1986            &line[number_start..delimiter_pos],
1987            delimiter,
1988            &line[spacing_start..i],
1989            &line[i..],
1990        ))
1991    }
1992
1993    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
1994    /// Returns a Vec<bool> where index i indicates if line i is in a code block
1995    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1996        let num_lines = line_offsets.len();
1997        let mut in_code_block = vec![false; num_lines];
1998
1999        // For each code block, mark all lines within it
2000        for &(start, end) in code_blocks {
2001            // Ensure we're at valid UTF-8 boundaries
2002            let safe_start = if start > 0 && !content.is_char_boundary(start) {
2003                let mut boundary = start;
2004                while boundary > 0 && !content.is_char_boundary(boundary) {
2005                    boundary -= 1;
2006                }
2007                boundary
2008            } else {
2009                start
2010            };
2011
2012            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
2013                let mut boundary = end;
2014                while boundary < content.len() && !content.is_char_boundary(boundary) {
2015                    boundary += 1;
2016                }
2017                boundary
2018            } else {
2019                end.min(content.len())
2020            };
2021
2022            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
2023            // That function now has proper list context awareness (see code_block_utils.rs)
2024            // and correctly distinguishes between:
2025            // - Fenced code blocks (``` or ~~~)
2026            // - Indented code blocks at document level (4 spaces + blank line before)
2027            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
2028            //
2029            // We no longer need to re-validate here. The original validation logic
2030            // was causing false positives by marking list continuation paragraphs as
2031            // code blocks when they have 4 spaces of indentation.
2032
2033            // Use binary search to find the first and last line indices
2034            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
2035            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
2036            //
2037            // Find the line that CONTAINS safe_start: the line with the largest
2038            // start offset that is <= safe_start. partition_point gives us the
2039            // first line that starts AFTER safe_start, so we subtract 1.
2040            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
2041            let first_line = first_line_after.saturating_sub(1);
2042            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
2043
2044            // Mark all lines in the range at once
2045            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
2046                *flag = true;
2047            }
2048        }
2049
2050        in_code_block
2051    }
2052
2053    /// Pre-compute basic line information (without headings/blockquotes)
2054    fn compute_basic_line_info(
2055        content: &str,
2056        line_offsets: &[usize],
2057        code_blocks: &[(usize, usize)],
2058        flavor: MarkdownFlavor,
2059        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2060        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
2061    ) -> Vec<LineInfo> {
2062        let content_lines: Vec<&str> = content.lines().collect();
2063        let mut lines = Vec::with_capacity(content_lines.len());
2064
2065        // Pre-compute which lines are in code blocks
2066        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
2067
2068        // Detect front matter boundaries FIRST, before any other parsing
2069        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
2070        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2071
2072        // Use pulldown-cmark to detect list items (context-aware, eliminates false positives)
2073        let list_item_map = Self::detect_list_items_with_pulldown(content, line_offsets, flavor, front_matter_end);
2074
2075        for (i, line) in content_lines.iter().enumerate() {
2076            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
2077            let indent = line.len() - line.trim_start().len();
2078            // Compute visual indent with proper CommonMark tab expansion
2079            let visual_indent = ElementCache::calculate_indentation_width_default(line);
2080
2081            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
2082            let blockquote_parse = Self::parse_blockquote_prefix(line);
2083
2084            // For blank detection, consider blockquote context
2085            let is_blank = if let Some((_, content)) = blockquote_parse {
2086                // In blockquote context, check if content after prefix is blank
2087                content.trim().is_empty()
2088            } else {
2089                line.trim().is_empty()
2090            };
2091
2092            // Use pre-computed map for O(1) lookup instead of O(m) iteration
2093            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
2094
2095            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
2096            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
2097                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
2098            // Check if the ENTIRE line is within an HTML comment (not just the line start)
2099            // This ensures content after `-->` on the same line is not incorrectly skipped
2100            let line_end_offset = byte_offset + line.len();
2101            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
2102                html_comment_ranges,
2103                byte_offset,
2104                line_end_offset,
2105            );
2106            // Use pulldown-cmark's list detection for context-aware parsing
2107            // This eliminates false positives on continuation lines (issue #253)
2108            let list_item =
2109                list_item_map
2110                    .get(&byte_offset)
2111                    .map(
2112                        |(is_ordered, marker, marker_column, content_column, number)| ListItemInfo {
2113                            marker: marker.clone(),
2114                            is_ordered: *is_ordered,
2115                            number: *number,
2116                            marker_column: *marker_column,
2117                            content_column: *content_column,
2118                        },
2119                    );
2120
2121            // Detect horizontal rules (only outside code blocks and frontmatter)
2122            // Uses CommonMark-compliant check including leading indentation validation
2123            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2124            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2125
2126            lines.push(LineInfo {
2127                byte_offset,
2128                byte_len: line.len(),
2129                indent,
2130                visual_indent,
2131                is_blank,
2132                in_code_block,
2133                in_front_matter,
2134                in_html_block: false, // Will be populated after line creation
2135                in_html_comment,
2136                list_item,
2137                heading: None,    // Will be populated in second pass for Setext headings
2138                blockquote: None, // Will be populated after line creation
2139                in_mkdocstrings,
2140                in_esm_block: false, // Will be populated after line creation for MDX files
2141                in_code_span_continuation: false, // Will be populated after code spans are parsed
2142                is_horizontal_rule: is_hr,
2143            });
2144        }
2145
2146        lines
2147    }
2148
2149    /// Detect headings and blockquotes (called after HTML block detection)
2150    fn detect_headings_and_blockquotes(
2151        content: &str,
2152        lines: &mut [LineInfo],
2153        flavor: MarkdownFlavor,
2154        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2155        link_byte_ranges: &[(usize, usize)],
2156    ) {
2157        // Regex for heading detection
2158        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2159            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2160        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2161            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2162
2163        let content_lines: Vec<&str> = content.lines().collect();
2164
2165        // Detect front matter boundaries to skip those lines
2166        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2167
2168        // Detect headings (including Setext which needs look-ahead) and blockquotes
2169        for i in 0..lines.len() {
2170            if lines[i].in_code_block {
2171                continue;
2172            }
2173
2174            // Skip lines in front matter
2175            if front_matter_end > 0 && i < front_matter_end {
2176                continue;
2177            }
2178
2179            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2180            if lines[i].in_html_block {
2181                continue;
2182            }
2183
2184            let line = content_lines[i];
2185
2186            // Check for blockquotes (even on blank lines within blockquotes)
2187            if let Some(bq) = parse_blockquote_detailed(line) {
2188                let nesting_level = bq.markers.len(); // Each '>' is one level
2189                let marker_column = bq.indent.len();
2190
2191                // Build the prefix (indentation + markers + space)
2192                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2193
2194                // Check for various blockquote issues
2195                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2196                // Only flag multiple literal spaces, not tabs
2197                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
2198                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2199
2200                // Check if needs MD028 fix (empty blockquote line without proper spacing)
2201                // MD028 flags empty blockquote lines that don't have a single space after the marker
2202                // Lines like "> " or ">> " are already correct and don't need fixing
2203                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2204
2205                lines[i].blockquote = Some(BlockquoteInfo {
2206                    nesting_level,
2207                    indent: bq.indent.to_string(),
2208                    marker_column,
2209                    prefix,
2210                    content: bq.content.to_string(),
2211                    has_no_space_after_marker: has_no_space,
2212                    has_multiple_spaces_after_marker: has_multiple_spaces,
2213                    needs_md028_fix,
2214                });
2215            }
2216
2217            // Skip heading detection for blank lines
2218            if lines[i].is_blank {
2219                continue;
2220            }
2221
2222            // Check for ATX headings (but skip MkDocs snippet lines)
2223            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2224            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2225                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2226                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2227            } else {
2228                false
2229            };
2230
2231            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2232                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2233                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2234                    continue;
2235                }
2236                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2237                // This prevents false positives where `#fragment` is detected as a heading
2238                let line_offset = lines[i].byte_offset;
2239                if link_byte_ranges
2240                    .iter()
2241                    .any(|&(start, end)| line_offset > start && line_offset < end)
2242                {
2243                    continue;
2244                }
2245                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2246                let hashes = caps.get(2).map_or("", |m| m.as_str());
2247                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2248                let rest = caps.get(4).map_or("", |m| m.as_str());
2249
2250                let level = hashes.len() as u8;
2251                let marker_column = leading_spaces.len();
2252
2253                // Check for closing sequence, but handle custom IDs that might come after
2254                let (text, has_closing, closing_seq) = {
2255                    // First check if there's a custom ID at the end
2256                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2257                        // Check if this looks like a valid custom ID (ends with })
2258                        if rest[id_start..].trim_end().ends_with('}') {
2259                            // Split off the custom ID
2260                            (&rest[..id_start], &rest[id_start..])
2261                        } else {
2262                            (rest, "")
2263                        }
2264                    } else {
2265                        (rest, "")
2266                    };
2267
2268                    // Now look for closing hashes in the part before the custom ID
2269                    let trimmed_rest = rest_without_id.trim_end();
2270                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2271                        // Find the start of the hash sequence by walking backwards
2272                        // Use char_indices to get byte positions at char boundaries
2273                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2274
2275                        // Find which char index corresponds to last_hash_byte_pos
2276                        let last_hash_char_idx = char_positions
2277                            .iter()
2278                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2279
2280                        if let Some(mut char_idx) = last_hash_char_idx {
2281                            // Walk backwards to find start of hash sequence
2282                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2283                                char_idx -= 1;
2284                            }
2285
2286                            // Get the byte position of the start of hashes
2287                            let start_of_hashes = char_positions[char_idx].0;
2288
2289                            // Check if there's at least one space before the closing hashes
2290                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2291
2292                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2293                            let potential_closing = &trimmed_rest[start_of_hashes..];
2294                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2295
2296                            if is_all_hashes && has_space_before {
2297                                // This is a closing sequence
2298                                let closing_hashes = potential_closing.to_string();
2299                                // The text is everything before the closing hashes
2300                                // Don't include the custom ID here - it will be extracted later
2301                                let text_part = if !custom_id_part.is_empty() {
2302                                    // If we have a custom ID, append it back to get the full rest
2303                                    // This allows the extract_header_id function to handle it properly
2304                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2305                                } else {
2306                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2307                                };
2308                                (text_part, true, closing_hashes)
2309                            } else {
2310                                // Not a valid closing sequence, return the full content
2311                                (rest.to_string(), false, String::new())
2312                            }
2313                        } else {
2314                            // Couldn't find char boundary, return the full content
2315                            (rest.to_string(), false, String::new())
2316                        }
2317                    } else {
2318                        // No hashes found, return the full content
2319                        (rest.to_string(), false, String::new())
2320                    }
2321                };
2322
2323                let content_column = marker_column + hashes.len() + spaces_after.len();
2324
2325                // Extract custom header ID if present
2326                let raw_text = text.trim().to_string();
2327                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2328
2329                // If no custom ID was found on the header line, check the next line for standalone attr-list
2330                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2331                    let next_line = content_lines[i + 1];
2332                    if !lines[i + 1].in_code_block
2333                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2334                        && let Some(next_line_id) =
2335                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2336                    {
2337                        custom_id = Some(next_line_id);
2338                    }
2339                }
2340
2341                // ATX heading is "valid" for processing by heading rules if:
2342                // 1. Has space after # (CommonMark compliant): `# Heading`
2343                // 2. Is empty (just hashes): `#`
2344                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2345                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2346                //
2347                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2348                // - `#tag` - single # with lowercase (social hashtag)
2349                // - `#123` - single # with number (GitHub issue ref)
2350                let is_valid = !spaces_after.is_empty()
2351                    || rest.is_empty()
2352                    || level > 1
2353                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2354
2355                lines[i].heading = Some(HeadingInfo {
2356                    level,
2357                    style: HeadingStyle::ATX,
2358                    marker: hashes.to_string(),
2359                    marker_column,
2360                    content_column,
2361                    text: clean_text,
2362                    custom_id,
2363                    raw_text,
2364                    has_closing_sequence: has_closing,
2365                    closing_sequence: closing_seq,
2366                    is_valid,
2367                });
2368            }
2369            // Check for Setext headings (need to look at next line)
2370            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2371                let next_line = content_lines[i + 1];
2372                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2373                    // Skip if next line is front matter delimiter
2374                    if front_matter_end > 0 && i < front_matter_end {
2375                        continue;
2376                    }
2377
2378                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2379                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2380                    {
2381                        continue;
2382                    }
2383
2384                    // Per CommonMark spec 4.3, setext heading content cannot be interpretable as:
2385                    // list item, ATX heading, block quote, thematic break, code fence, or HTML block
2386                    let content_line = line.trim();
2387
2388                    // Skip list items (-, *, +) and thematic breaks (---, ***, etc.)
2389                    if content_line.starts_with('-') || content_line.starts_with('*') || content_line.starts_with('+') {
2390                        continue;
2391                    }
2392
2393                    // Skip underscore thematic breaks (___)
2394                    if content_line.starts_with('_') {
2395                        let non_ws: String = content_line.chars().filter(|c| !c.is_whitespace()).collect();
2396                        if non_ws.len() >= 3 && non_ws.chars().all(|c| c == '_') {
2397                            continue;
2398                        }
2399                    }
2400
2401                    // Skip numbered lists (1. Item, 2. Item, etc.)
2402                    if let Some(first_char) = content_line.chars().next()
2403                        && first_char.is_ascii_digit()
2404                    {
2405                        let num_end = content_line.chars().take_while(|c| c.is_ascii_digit()).count();
2406                        if num_end < content_line.len() {
2407                            let next = content_line.chars().nth(num_end);
2408                            if next == Some('.') || next == Some(')') {
2409                                continue;
2410                            }
2411                        }
2412                    }
2413
2414                    // Skip ATX headings
2415                    if ATX_HEADING_REGEX.is_match(line) {
2416                        continue;
2417                    }
2418
2419                    // Skip blockquotes
2420                    if content_line.starts_with('>') {
2421                        continue;
2422                    }
2423
2424                    // Skip code fences
2425                    let trimmed_start = line.trim_start();
2426                    if trimmed_start.len() >= 3 {
2427                        let first_three: String = trimmed_start.chars().take(3).collect();
2428                        if first_three == "```" || first_three == "~~~" {
2429                            continue;
2430                        }
2431                    }
2432
2433                    // Skip HTML blocks
2434                    if content_line.starts_with('<') {
2435                        continue;
2436                    }
2437
2438                    let underline = next_line.trim();
2439
2440                    let level = if underline.starts_with('=') { 1 } else { 2 };
2441                    let style = if level == 1 {
2442                        HeadingStyle::Setext1
2443                    } else {
2444                        HeadingStyle::Setext2
2445                    };
2446
2447                    // Extract custom header ID if present
2448                    let raw_text = line.trim().to_string();
2449                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2450
2451                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2452                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2453                        let attr_line = content_lines[i + 2];
2454                        if !lines[i + 2].in_code_block
2455                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2456                            && let Some(attr_line_id) =
2457                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2458                        {
2459                            custom_id = Some(attr_line_id);
2460                        }
2461                    }
2462
2463                    lines[i].heading = Some(HeadingInfo {
2464                        level,
2465                        style,
2466                        marker: underline.to_string(),
2467                        marker_column: next_line.len() - next_line.trim_start().len(),
2468                        content_column: lines[i].indent,
2469                        text: clean_text,
2470                        custom_id,
2471                        raw_text,
2472                        has_closing_sequence: false,
2473                        closing_sequence: String::new(),
2474                        is_valid: true, // Setext headings are always valid
2475                    });
2476                }
2477            }
2478        }
2479    }
2480
2481    /// Detect HTML blocks in the content
2482    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2483        // HTML block elements that trigger block context
2484        // Includes HTML5 media, embedded content, and interactive elements
2485        const BLOCK_ELEMENTS: &[&str] = &[
2486            "address",
2487            "article",
2488            "aside",
2489            "audio",
2490            "blockquote",
2491            "canvas",
2492            "details",
2493            "dialog",
2494            "dd",
2495            "div",
2496            "dl",
2497            "dt",
2498            "embed",
2499            "fieldset",
2500            "figcaption",
2501            "figure",
2502            "footer",
2503            "form",
2504            "h1",
2505            "h2",
2506            "h3",
2507            "h4",
2508            "h5",
2509            "h6",
2510            "header",
2511            "hr",
2512            "iframe",
2513            "li",
2514            "main",
2515            "menu",
2516            "nav",
2517            "noscript",
2518            "object",
2519            "ol",
2520            "p",
2521            "picture",
2522            "pre",
2523            "script",
2524            "search",
2525            "section",
2526            "source",
2527            "style",
2528            "summary",
2529            "svg",
2530            "table",
2531            "tbody",
2532            "td",
2533            "template",
2534            "textarea",
2535            "tfoot",
2536            "th",
2537            "thead",
2538            "tr",
2539            "track",
2540            "ul",
2541            "video",
2542        ];
2543
2544        let mut i = 0;
2545        while i < lines.len() {
2546            // Skip if already in code block or front matter
2547            if lines[i].in_code_block || lines[i].in_front_matter {
2548                i += 1;
2549                continue;
2550            }
2551
2552            let trimmed = lines[i].content(content).trim_start();
2553
2554            // Check if line starts with an HTML tag
2555            if trimmed.starts_with('<') && trimmed.len() > 1 {
2556                // Extract tag name safely
2557                let after_bracket = &trimmed[1..];
2558                let is_closing = after_bracket.starts_with('/');
2559                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2560
2561                // Extract tag name (stop at space, >, /, or end of string)
2562                let tag_name = tag_start
2563                    .chars()
2564                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2565                    .collect::<String>()
2566                    .to_lowercase();
2567
2568                // Check if it's a block element
2569                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2570                    // Mark this line as in HTML block
2571                    lines[i].in_html_block = true;
2572
2573                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2574                    // This avoids complex nesting logic that might cause infinite loops
2575                    if !is_closing {
2576                        let closing_tag = format!("</{tag_name}>");
2577                        // style and script tags can contain blank lines (CSS/JS formatting)
2578                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2579                        let mut j = i + 1;
2580                        let mut found_closing_tag = false;
2581                        while j < lines.len() && j < i + 100 {
2582                            // Limit search to 100 lines
2583                            // Stop at blank lines (except for style/script tags)
2584                            if !allow_blank_lines && lines[j].is_blank {
2585                                break;
2586                            }
2587
2588                            lines[j].in_html_block = true;
2589
2590                            // Check if this line contains the closing tag
2591                            if lines[j].content(content).contains(&closing_tag) {
2592                                found_closing_tag = true;
2593                            }
2594
2595                            // After finding closing tag, continue marking lines as
2596                            // in_html_block until blank line (per CommonMark spec)
2597                            if found_closing_tag {
2598                                j += 1;
2599                                // Continue marking subsequent lines until blank
2600                                while j < lines.len() && j < i + 100 {
2601                                    if lines[j].is_blank {
2602                                        break;
2603                                    }
2604                                    lines[j].in_html_block = true;
2605                                    j += 1;
2606                                }
2607                                break;
2608                            }
2609                            j += 1;
2610                        }
2611                    }
2612                }
2613            }
2614
2615            i += 1;
2616        }
2617    }
2618
2619    /// Detect ESM import/export blocks in MDX files
2620    /// ESM blocks consist of contiguous import/export statements at the top of the file
2621    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2622        // Only process MDX files
2623        if !flavor.supports_esm_blocks() {
2624            return;
2625        }
2626
2627        let mut in_multiline_comment = false;
2628
2629        for line in lines.iter_mut() {
2630            // Skip blank lines and HTML comments
2631            if line.is_blank || line.in_html_comment {
2632                continue;
2633            }
2634
2635            let trimmed = line.content(content).trim_start();
2636
2637            // Handle continuation of multi-line JS comments
2638            if in_multiline_comment {
2639                if trimmed.contains("*/") {
2640                    in_multiline_comment = false;
2641                }
2642                continue;
2643            }
2644
2645            // Skip single-line JS comments (// and ///)
2646            if trimmed.starts_with("//") {
2647                continue;
2648            }
2649
2650            // Handle start of multi-line JS comment
2651            if trimmed.starts_with("/*") {
2652                if !trimmed.contains("*/") {
2653                    in_multiline_comment = true;
2654                }
2655                continue;
2656            }
2657
2658            // Check if line starts with import or export
2659            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2660                line.in_esm_block = true;
2661            } else {
2662                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2663                break;
2664            }
2665        }
2666    }
2667
2668    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2669    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2670        let mut code_spans = Vec::new();
2671
2672        // Quick check - if no backticks, no code spans
2673        if !content.contains('`') {
2674            return code_spans;
2675        }
2676
2677        // Use pulldown-cmark's streaming parser with byte offsets
2678        let parser = Parser::new(content).into_offset_iter();
2679
2680        for (event, range) in parser {
2681            if let Event::Code(_) = event {
2682                let start_pos = range.start;
2683                let end_pos = range.end;
2684
2685                // The range includes the backticks, extract the actual content
2686                let full_span = &content[start_pos..end_pos];
2687                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2688
2689                // Extract content between backticks, preserving spaces
2690                let content_start = start_pos + backtick_count;
2691                let content_end = end_pos - backtick_count;
2692                let span_content = if content_start < content_end {
2693                    content[content_start..content_end].to_string()
2694                } else {
2695                    String::new()
2696                };
2697
2698                // Use binary search to find line number - O(log n) instead of O(n)
2699                // Find the rightmost line whose byte_offset <= start_pos
2700                let line_idx = lines
2701                    .partition_point(|line| line.byte_offset <= start_pos)
2702                    .saturating_sub(1);
2703                let line_num = line_idx + 1;
2704                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2705
2706                // Find end column using binary search
2707                let end_line_idx = lines
2708                    .partition_point(|line| line.byte_offset <= end_pos)
2709                    .saturating_sub(1);
2710                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2711
2712                // Convert byte offsets to character positions for correct Unicode handling
2713                // This ensures consistency with warning.column which uses character positions
2714                let line_content = lines[line_idx].content(content);
2715                let col_start = if byte_col_start <= line_content.len() {
2716                    line_content[..byte_col_start].chars().count()
2717                } else {
2718                    line_content.chars().count()
2719                };
2720
2721                let end_line_content = lines[end_line_idx].content(content);
2722                let col_end = if byte_col_end <= end_line_content.len() {
2723                    end_line_content[..byte_col_end].chars().count()
2724                } else {
2725                    end_line_content.chars().count()
2726                };
2727
2728                code_spans.push(CodeSpan {
2729                    line: line_num,
2730                    end_line: end_line_idx + 1,
2731                    start_col: col_start,
2732                    end_col: col_end,
2733                    byte_offset: start_pos,
2734                    byte_end: end_pos,
2735                    backtick_count,
2736                    content: span_content,
2737                });
2738            }
2739        }
2740
2741        // Sort by position to ensure consistent ordering
2742        code_spans.sort_by_key(|span| span.byte_offset);
2743
2744        code_spans
2745    }
2746
2747    /// Parse all list blocks in the content (legacy line-by-line approach)
2748    ///
2749    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2750    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
2751    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
2752    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
2753    ///   treated as list continuation (based on the list marker width)
2754    ///
2755    /// When a new list item is encountered, we check if list-breaking content was seen
2756    /// since the last item. If so, we start a new list block.
2757    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2758        // Minimum indentation for unordered list continuation per CommonMark spec
2759        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2760
2761        /// Initialize or reset the forward-scanning tracking state.
2762        /// This helper eliminates code duplication across three initialization sites.
2763        #[inline]
2764        fn reset_tracking_state(
2765            list_item: &ListItemInfo,
2766            has_list_breaking_content: &mut bool,
2767            min_continuation: &mut usize,
2768        ) {
2769            *has_list_breaking_content = false;
2770            let marker_width = if list_item.is_ordered {
2771                list_item.marker.len() + 1 // Ordered markers need space after period/paren
2772            } else {
2773                list_item.marker.len()
2774            };
2775            *min_continuation = if list_item.is_ordered {
2776                marker_width
2777            } else {
2778                UNORDERED_LIST_MIN_CONTINUATION_INDENT
2779            };
2780        }
2781
2782        // Pre-size based on lines that could be list items
2783        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
2784        let mut current_block: Option<ListBlock> = None;
2785        let mut last_list_item_line = 0;
2786        let mut current_indent_level = 0;
2787        let mut last_marker_width = 0;
2788
2789        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
2790        let mut has_list_breaking_content_since_last_item = false;
2791        let mut min_continuation_for_tracking = 0;
2792
2793        for (line_idx, line_info) in lines.iter().enumerate() {
2794            let line_num = line_idx + 1;
2795
2796            // Enhanced code block handling using Design #3's context analysis
2797            if line_info.in_code_block {
2798                if let Some(ref mut block) = current_block {
2799                    // Calculate minimum indentation for list continuation
2800                    let min_continuation_indent =
2801                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2802
2803                    // Analyze code block context using the three-tier classification
2804                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2805
2806                    match context {
2807                        CodeBlockContext::Indented => {
2808                            // Code block is properly indented - continues the list
2809                            block.end_line = line_num;
2810                            continue;
2811                        }
2812                        CodeBlockContext::Standalone => {
2813                            // Code block separates lists - end current block
2814                            let completed_block = current_block.take().unwrap();
2815                            list_blocks.push(completed_block);
2816                            continue;
2817                        }
2818                        CodeBlockContext::Adjacent => {
2819                            // Edge case - use conservative behavior (continue list)
2820                            block.end_line = line_num;
2821                            continue;
2822                        }
2823                    }
2824                } else {
2825                    // No current list block - skip code block lines
2826                    continue;
2827                }
2828            }
2829
2830            // Extract blockquote prefix if any
2831            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2832                caps.get(0).unwrap().as_str().to_string()
2833            } else {
2834                String::new()
2835            };
2836
2837            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
2838            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
2839            if current_block.is_some()
2840                && line_info.list_item.is_none()
2841                && !line_info.is_blank
2842                && !line_info.in_code_span_continuation
2843            {
2844                let line_content = line_info.content(content).trim();
2845
2846                // Check for structural separators that break lists
2847                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
2848                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
2849                // as they indicate improper indentation rather than lazy continuation.
2850                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2851                let breaks_list = line_info.heading.is_some()
2852                    || line_content.starts_with("---")
2853                    || line_content.starts_with("***")
2854                    || line_content.starts_with("___")
2855                    || crate::utils::skip_context::is_table_line(line_content)
2856                    || line_content.starts_with(">")
2857                    || (line_info.indent > 0
2858                        && line_info.indent < min_continuation_for_tracking
2859                        && !is_lazy_continuation);
2860
2861                if breaks_list {
2862                    has_list_breaking_content_since_last_item = true;
2863                }
2864            }
2865
2866            // If this line is a code span continuation within an active list block,
2867            // extend the block's end_line to include this line (maintains list continuity)
2868            if line_info.in_code_span_continuation
2869                && line_info.list_item.is_none()
2870                && let Some(ref mut block) = current_block
2871            {
2872                block.end_line = line_num;
2873            }
2874
2875            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
2876            // properly indented lines within the list). This ensures the workaround at line 2448
2877            // works correctly when there are multiple continuation lines before a nested list item.
2878            // Also include lazy continuation lines (indent=0) per CommonMark spec.
2879            let is_valid_continuation =
2880                line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
2881            if !line_info.in_code_span_continuation
2882                && line_info.list_item.is_none()
2883                && !line_info.is_blank
2884                && !line_info.in_code_block
2885                && is_valid_continuation
2886                && let Some(ref mut block) = current_block
2887            {
2888                block.end_line = line_num;
2889            }
2890
2891            // Check if this line is a list item
2892            if let Some(list_item) = &line_info.list_item {
2893                // Calculate nesting level based on indentation
2894                let item_indent = list_item.marker_column;
2895                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
2896
2897                if let Some(ref mut block) = current_block {
2898                    // Check if this continues the current block
2899                    // For nested lists, we need to check if this is a nested item (higher nesting level)
2900                    // or a continuation at the same or lower level
2901                    let is_nested = nesting > block.nesting_level;
2902                    let same_type =
2903                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2904                    let same_context = block.blockquote_prefix == blockquote_prefix;
2905                    // Allow one blank line after last item, or lines immediately after block content
2906                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2907
2908                    // For unordered lists, also check marker consistency
2909                    let marker_compatible =
2910                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2911
2912                    // O(1) check: Use the tracked variable instead of O(n) nested loop
2913                    // This eliminates the quadratic bottleneck from issue #148
2914                    let has_non_list_content = has_list_breaking_content_since_last_item;
2915
2916                    // A list continues if:
2917                    // 1. It's a nested item (indented more than the parent), OR
2918                    // 2. It's the same type at the same level with reasonable distance
2919                    let mut continues_list = if is_nested {
2920                        // Nested items always continue the list if they're in the same context
2921                        same_context && reasonable_distance && !has_non_list_content
2922                    } else {
2923                        // Same-level items need to match type and markers
2924                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2925                    };
2926
2927                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
2928                    // This handles edge cases where content patterns might otherwise split lists incorrectly
2929                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2930                        // Check if the previous line was a list item or a continuation of a list item
2931                        // (including lazy continuation lines)
2932                        if block.item_lines.contains(&(line_num - 1)) {
2933                            // They're consecutive list items - force them to be in the same list
2934                            continues_list = true;
2935                        } else {
2936                            // Previous line is a continuation line within this block
2937                            // (e.g., lazy continuation with indent=0)
2938                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
2939                            continues_list = true;
2940                        }
2941                    }
2942
2943                    if continues_list {
2944                        // Extend current block
2945                        block.end_line = line_num;
2946                        block.item_lines.push(line_num);
2947
2948                        // Update max marker width
2949                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2950                            list_item.marker.len() + 1
2951                        } else {
2952                            list_item.marker.len()
2953                        });
2954
2955                        // Update marker consistency for unordered lists
2956                        if !block.is_ordered
2957                            && block.marker.is_some()
2958                            && block.marker.as_ref() != Some(&list_item.marker)
2959                        {
2960                            // Mixed markers, clear the marker field
2961                            block.marker = None;
2962                        }
2963
2964                        // Reset tracked state for issue #148 optimization
2965                        reset_tracking_state(
2966                            list_item,
2967                            &mut has_list_breaking_content_since_last_item,
2968                            &mut min_continuation_for_tracking,
2969                        );
2970                    } else {
2971                        // End current block and start a new one
2972
2973                        list_blocks.push(block.clone());
2974
2975                        *block = ListBlock {
2976                            start_line: line_num,
2977                            end_line: line_num,
2978                            is_ordered: list_item.is_ordered,
2979                            marker: if list_item.is_ordered {
2980                                None
2981                            } else {
2982                                Some(list_item.marker.clone())
2983                            },
2984                            blockquote_prefix: blockquote_prefix.clone(),
2985                            item_lines: vec![line_num],
2986                            nesting_level: nesting,
2987                            max_marker_width: if list_item.is_ordered {
2988                                list_item.marker.len() + 1
2989                            } else {
2990                                list_item.marker.len()
2991                            },
2992                        };
2993
2994                        // Initialize tracked state for new block (issue #148 optimization)
2995                        reset_tracking_state(
2996                            list_item,
2997                            &mut has_list_breaking_content_since_last_item,
2998                            &mut min_continuation_for_tracking,
2999                        );
3000                    }
3001                } else {
3002                    // Start a new block
3003                    current_block = Some(ListBlock {
3004                        start_line: line_num,
3005                        end_line: line_num,
3006                        is_ordered: list_item.is_ordered,
3007                        marker: if list_item.is_ordered {
3008                            None
3009                        } else {
3010                            Some(list_item.marker.clone())
3011                        },
3012                        blockquote_prefix,
3013                        item_lines: vec![line_num],
3014                        nesting_level: nesting,
3015                        max_marker_width: list_item.marker.len(),
3016                    });
3017
3018                    // Initialize tracked state for new block (issue #148 optimization)
3019                    reset_tracking_state(
3020                        list_item,
3021                        &mut has_list_breaking_content_since_last_item,
3022                        &mut min_continuation_for_tracking,
3023                    );
3024                }
3025
3026                last_list_item_line = line_num;
3027                current_indent_level = item_indent;
3028                last_marker_width = if list_item.is_ordered {
3029                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
3030                } else {
3031                    list_item.marker.len()
3032                };
3033            } else if let Some(ref mut block) = current_block {
3034                // Not a list item - check if it continues the current block
3035
3036                // For MD032 compatibility, we use a simple approach:
3037                // - Indented lines continue the list
3038                // - Blank lines followed by indented content continue the list
3039                // - Everything else ends the list
3040
3041                // Check if the last line in the list block ended with a backslash (hard line break)
3042                // This handles cases where list items use backslash for hard line breaks
3043                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
3044                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
3045                } else {
3046                    false
3047                };
3048
3049                // Calculate minimum indentation for list continuation
3050                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
3051                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
3052                let min_continuation_indent = if block.is_ordered {
3053                    current_indent_level + last_marker_width
3054                } else {
3055                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
3056                };
3057
3058                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
3059                    // Indented line or backslash continuation continues the list
3060                    block.end_line = line_num;
3061                } else if line_info.is_blank {
3062                    // Blank line - check if it's internal to the list or ending it
3063                    // We only include blank lines that are followed by more list content
3064                    let mut check_idx = line_idx + 1;
3065                    let mut found_continuation = false;
3066
3067                    // Skip additional blank lines
3068                    while check_idx < lines.len() && lines[check_idx].is_blank {
3069                        check_idx += 1;
3070                    }
3071
3072                    if check_idx < lines.len() {
3073                        let next_line = &lines[check_idx];
3074                        // Check if followed by indented content (list continuation)
3075                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
3076                            found_continuation = true;
3077                        }
3078                        // Check if followed by another list item at the same level
3079                        else if !next_line.in_code_block
3080                            && next_line.list_item.is_some()
3081                            && let Some(item) = &next_line.list_item
3082                        {
3083                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
3084                                .find(next_line.content(content))
3085                                .map_or(String::new(), |m| m.as_str().to_string());
3086                            if item.marker_column == current_indent_level
3087                                && item.is_ordered == block.is_ordered
3088                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
3089                            {
3090                                // Check if there was meaningful content between the list items (unused now)
3091                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
3092                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
3093                                    if let Some(between_line) = lines.get(idx) {
3094                                        let between_content = between_line.content(content);
3095                                        let trimmed = between_content.trim();
3096                                        // Skip empty lines
3097                                        if trimmed.is_empty() {
3098                                            return false;
3099                                        }
3100                                        // Check for meaningful content
3101                                        let line_indent = between_content.len() - between_content.trim_start().len();
3102
3103                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
3104                                        if trimmed.starts_with("```")
3105                                            || trimmed.starts_with("~~~")
3106                                            || trimmed.starts_with("---")
3107                                            || trimmed.starts_with("***")
3108                                            || trimmed.starts_with("___")
3109                                            || trimmed.starts_with(">")
3110                                            || crate::utils::skip_context::is_table_line(trimmed)
3111                                            || between_line.heading.is_some()
3112                                        {
3113                                            return true; // These are structural separators - meaningful content that breaks lists
3114                                        }
3115
3116                                        // Only properly indented content continues the list
3117                                        line_indent >= min_continuation_indent
3118                                    } else {
3119                                        false
3120                                    }
3121                                });
3122
3123                                if block.is_ordered {
3124                                    // For ordered lists: don't continue if there are structural separators
3125                                    // Check if there are structural separators between the list items
3126                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3127                                        if let Some(between_line) = lines.get(idx) {
3128                                            let trimmed = between_line.content(content).trim();
3129                                            if trimmed.is_empty() {
3130                                                return false;
3131                                            }
3132                                            // Check for structural separators that break lists
3133                                            trimmed.starts_with("```")
3134                                                || trimmed.starts_with("~~~")
3135                                                || trimmed.starts_with("---")
3136                                                || trimmed.starts_with("***")
3137                                                || trimmed.starts_with("___")
3138                                                || trimmed.starts_with(">")
3139                                                || crate::utils::skip_context::is_table_line(trimmed)
3140                                                || between_line.heading.is_some()
3141                                        } else {
3142                                            false
3143                                        }
3144                                    });
3145                                    found_continuation = !has_structural_separators;
3146                                } else {
3147                                    // For unordered lists: also check for structural separators
3148                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3149                                        if let Some(between_line) = lines.get(idx) {
3150                                            let trimmed = between_line.content(content).trim();
3151                                            if trimmed.is_empty() {
3152                                                return false;
3153                                            }
3154                                            // Check for structural separators that break lists
3155                                            trimmed.starts_with("```")
3156                                                || trimmed.starts_with("~~~")
3157                                                || trimmed.starts_with("---")
3158                                                || trimmed.starts_with("***")
3159                                                || trimmed.starts_with("___")
3160                                                || trimmed.starts_with(">")
3161                                                || crate::utils::skip_context::is_table_line(trimmed)
3162                                                || between_line.heading.is_some()
3163                                        } else {
3164                                            false
3165                                        }
3166                                    });
3167                                    found_continuation = !has_structural_separators;
3168                                }
3169                            }
3170                        }
3171                    }
3172
3173                    if found_continuation {
3174                        // Include the blank line in the block
3175                        block.end_line = line_num;
3176                    } else {
3177                        // Blank line ends the list - don't include it
3178                        list_blocks.push(block.clone());
3179                        current_block = None;
3180                    }
3181                } else {
3182                    // Check for lazy continuation - non-indented line immediately after a list item
3183                    // But only if the line has sufficient indentation for the list type
3184                    let min_required_indent = if block.is_ordered {
3185                        current_indent_level + last_marker_width
3186                    } else {
3187                        current_indent_level + 2
3188                    };
3189
3190                    // For lazy continuation to apply, the line must either:
3191                    // 1. Have no indentation (true lazy continuation)
3192                    // 2. Have sufficient indentation for the list type
3193                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3194                    let line_content = line_info.content(content).trim();
3195
3196                    // Check for table-like patterns
3197                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3198
3199                    // Check if blockquote level changed (not just if line starts with ">")
3200                    // Lines within the same blockquote level are NOT structural separators
3201                    let block_bq_level = block.blockquote_prefix.chars().filter(|&c| c == '>').count();
3202                    let current_bq_level = blockquote_prefix.chars().filter(|&c| c == '>').count();
3203                    let blockquote_level_changed = line_content.starts_with(">") && current_bq_level != block_bq_level;
3204
3205                    let is_structural_separator = line_info.heading.is_some()
3206                        || line_content.starts_with("```")
3207                        || line_content.starts_with("~~~")
3208                        || line_content.starts_with("---")
3209                        || line_content.starts_with("***")
3210                        || line_content.starts_with("___")
3211                        || blockquote_level_changed
3212                        || looks_like_table;
3213
3214                    // Allow lazy continuation if we're still within the same list block
3215                    // (not just immediately after a list item)
3216                    let is_lazy_continuation = !is_structural_separator
3217                        && !line_info.is_blank
3218                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3219
3220                    if is_lazy_continuation {
3221                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3222                        // it's probably not a continuation
3223                        let content_to_check = if !blockquote_prefix.is_empty() {
3224                            // Strip blockquote prefix to check the actual content
3225                            line_info
3226                                .content(content)
3227                                .strip_prefix(&blockquote_prefix)
3228                                .unwrap_or(line_info.content(content))
3229                                .trim()
3230                        } else {
3231                            line_info.content(content).trim()
3232                        };
3233
3234                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3235
3236                        // If it starts with uppercase and the previous line ended with punctuation,
3237                        // it's likely a new paragraph, not a continuation
3238                        if starts_with_uppercase && last_list_item_line > 0 {
3239                            // This looks like a new paragraph
3240                            list_blocks.push(block.clone());
3241                            current_block = None;
3242                        } else {
3243                            // This is a lazy continuation line
3244                            block.end_line = line_num;
3245                        }
3246                    } else {
3247                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3248                        list_blocks.push(block.clone());
3249                        current_block = None;
3250                    }
3251                }
3252            }
3253        }
3254
3255        // Don't forget the last block
3256        if let Some(block) = current_block {
3257            list_blocks.push(block);
3258        }
3259
3260        // Merge adjacent blocks that should be one
3261        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3262
3263        list_blocks
3264    }
3265
3266    /// Compute character frequency for fast content analysis
3267    fn compute_char_frequency(content: &str) -> CharFrequency {
3268        let mut frequency = CharFrequency::default();
3269
3270        for ch in content.chars() {
3271            match ch {
3272                '#' => frequency.hash_count += 1,
3273                '*' => frequency.asterisk_count += 1,
3274                '_' => frequency.underscore_count += 1,
3275                '-' => frequency.hyphen_count += 1,
3276                '+' => frequency.plus_count += 1,
3277                '>' => frequency.gt_count += 1,
3278                '|' => frequency.pipe_count += 1,
3279                '[' => frequency.bracket_count += 1,
3280                '`' => frequency.backtick_count += 1,
3281                '<' => frequency.lt_count += 1,
3282                '!' => frequency.exclamation_count += 1,
3283                '\n' => frequency.newline_count += 1,
3284                _ => {}
3285            }
3286        }
3287
3288        frequency
3289    }
3290
3291    /// Parse HTML tags in the content
3292    fn parse_html_tags(
3293        content: &str,
3294        lines: &[LineInfo],
3295        code_blocks: &[(usize, usize)],
3296        flavor: MarkdownFlavor,
3297    ) -> Vec<HtmlTag> {
3298        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3299            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3300
3301        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3302
3303        for cap in HTML_TAG_REGEX.captures_iter(content) {
3304            let full_match = cap.get(0).unwrap();
3305            let match_start = full_match.start();
3306            let match_end = full_match.end();
3307
3308            // Skip if in code block
3309            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3310                continue;
3311            }
3312
3313            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3314            let tag_name_original = cap.get(2).unwrap().as_str();
3315            let tag_name = tag_name_original.to_lowercase();
3316            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3317
3318            // Skip JSX components in MDX files (tags starting with uppercase letter)
3319            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3320            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3321                continue;
3322            }
3323
3324            // Find which line this tag is on
3325            let mut line_num = 1;
3326            let mut col_start = match_start;
3327            let mut col_end = match_end;
3328            for (idx, line_info) in lines.iter().enumerate() {
3329                if match_start >= line_info.byte_offset {
3330                    line_num = idx + 1;
3331                    col_start = match_start - line_info.byte_offset;
3332                    col_end = match_end - line_info.byte_offset;
3333                } else {
3334                    break;
3335                }
3336            }
3337
3338            html_tags.push(HtmlTag {
3339                line: line_num,
3340                start_col: col_start,
3341                end_col: col_end,
3342                byte_offset: match_start,
3343                byte_end: match_end,
3344                tag_name,
3345                is_closing,
3346                is_self_closing,
3347                raw_content: full_match.as_str().to_string(),
3348            });
3349        }
3350
3351        html_tags
3352    }
3353
3354    /// Parse emphasis spans in the content
3355    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3356        static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3357            LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3358
3359        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3360
3361        for cap in EMPHASIS_REGEX.captures_iter(content) {
3362            let full_match = cap.get(0).unwrap();
3363            let match_start = full_match.start();
3364            let match_end = full_match.end();
3365
3366            // Skip if in code block
3367            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3368                continue;
3369            }
3370
3371            let opening_markers = cap.get(1).unwrap().as_str();
3372            let content_part = cap.get(2).unwrap().as_str();
3373            let closing_markers = cap.get(3).unwrap().as_str();
3374
3375            // Validate matching markers
3376            if opening_markers.chars().next() != closing_markers.chars().next()
3377                || opening_markers.len() != closing_markers.len()
3378            {
3379                continue;
3380            }
3381
3382            let marker = opening_markers.chars().next().unwrap();
3383            let marker_count = opening_markers.len();
3384
3385            // Find which line this emphasis is on
3386            let mut line_num = 1;
3387            let mut col_start = match_start;
3388            let mut col_end = match_end;
3389            for (idx, line_info) in lines.iter().enumerate() {
3390                if match_start >= line_info.byte_offset {
3391                    line_num = idx + 1;
3392                    col_start = match_start - line_info.byte_offset;
3393                    col_end = match_end - line_info.byte_offset;
3394                } else {
3395                    break;
3396                }
3397            }
3398
3399            emphasis_spans.push(EmphasisSpan {
3400                line: line_num,
3401                start_col: col_start,
3402                end_col: col_end,
3403                byte_offset: match_start,
3404                byte_end: match_end,
3405                marker,
3406                marker_count,
3407                content: content_part.to_string(),
3408            });
3409        }
3410
3411        emphasis_spans
3412    }
3413
3414    /// Parse table rows in the content
3415    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3416        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3417
3418        for (line_idx, line_info) in lines.iter().enumerate() {
3419            // Skip lines in code blocks or blank lines
3420            if line_info.in_code_block || line_info.is_blank {
3421                continue;
3422            }
3423
3424            let line = line_info.content(content);
3425            let line_num = line_idx + 1;
3426
3427            // Check if this line contains pipes (potential table row)
3428            if !line.contains('|') {
3429                continue;
3430            }
3431
3432            // Count columns by splitting on pipes
3433            let parts: Vec<&str> = line.split('|').collect();
3434            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3435
3436            // Check if this is a separator row
3437            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3438            let mut column_alignments = Vec::new();
3439
3440            if is_separator {
3441                for part in &parts[1..parts.len() - 1] {
3442                    // Skip first and last empty parts
3443                    let trimmed = part.trim();
3444                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3445                        "center".to_string()
3446                    } else if trimmed.ends_with(':') {
3447                        "right".to_string()
3448                    } else if trimmed.starts_with(':') {
3449                        "left".to_string()
3450                    } else {
3451                        "none".to_string()
3452                    };
3453                    column_alignments.push(alignment);
3454                }
3455            }
3456
3457            table_rows.push(TableRow {
3458                line: line_num,
3459                is_separator,
3460                column_count,
3461                column_alignments,
3462            });
3463        }
3464
3465        table_rows
3466    }
3467
3468    /// Parse bare URLs and emails in the content
3469    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3470        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3471
3472        // Check for bare URLs (not in angle brackets or markdown links)
3473        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3474            let full_match = cap.get(0).unwrap();
3475            let match_start = full_match.start();
3476            let match_end = full_match.end();
3477
3478            // Skip if in code block
3479            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3480                continue;
3481            }
3482
3483            // Skip if already in angle brackets or markdown links
3484            let preceding_char = if match_start > 0 {
3485                content.chars().nth(match_start - 1)
3486            } else {
3487                None
3488            };
3489            let following_char = content.chars().nth(match_end);
3490
3491            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3492                continue;
3493            }
3494            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3495                continue;
3496            }
3497
3498            let url = full_match.as_str();
3499            let url_type = if url.starts_with("https://") {
3500                "https"
3501            } else if url.starts_with("http://") {
3502                "http"
3503            } else if url.starts_with("ftp://") {
3504                "ftp"
3505            } else {
3506                "other"
3507            };
3508
3509            // Find which line this URL is on
3510            let mut line_num = 1;
3511            let mut col_start = match_start;
3512            let mut col_end = match_end;
3513            for (idx, line_info) in lines.iter().enumerate() {
3514                if match_start >= line_info.byte_offset {
3515                    line_num = idx + 1;
3516                    col_start = match_start - line_info.byte_offset;
3517                    col_end = match_end - line_info.byte_offset;
3518                } else {
3519                    break;
3520                }
3521            }
3522
3523            bare_urls.push(BareUrl {
3524                line: line_num,
3525                start_col: col_start,
3526                end_col: col_end,
3527                byte_offset: match_start,
3528                byte_end: match_end,
3529                url: url.to_string(),
3530                url_type: url_type.to_string(),
3531            });
3532        }
3533
3534        // Check for bare email addresses
3535        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3536            let full_match = cap.get(0).unwrap();
3537            let match_start = full_match.start();
3538            let match_end = full_match.end();
3539
3540            // Skip if in code block
3541            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3542                continue;
3543            }
3544
3545            // Skip if already in angle brackets or markdown links
3546            let preceding_char = if match_start > 0 {
3547                content.chars().nth(match_start - 1)
3548            } else {
3549                None
3550            };
3551            let following_char = content.chars().nth(match_end);
3552
3553            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3554                continue;
3555            }
3556            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3557                continue;
3558            }
3559
3560            let email = full_match.as_str();
3561
3562            // Find which line this email is on
3563            let mut line_num = 1;
3564            let mut col_start = match_start;
3565            let mut col_end = match_end;
3566            for (idx, line_info) in lines.iter().enumerate() {
3567                if match_start >= line_info.byte_offset {
3568                    line_num = idx + 1;
3569                    col_start = match_start - line_info.byte_offset;
3570                    col_end = match_end - line_info.byte_offset;
3571                } else {
3572                    break;
3573                }
3574            }
3575
3576            bare_urls.push(BareUrl {
3577                line: line_num,
3578                start_col: col_start,
3579                end_col: col_end,
3580                byte_offset: match_start,
3581                byte_end: match_end,
3582                url: email.to_string(),
3583                url_type: "email".to_string(),
3584            });
3585        }
3586
3587        bare_urls
3588    }
3589
3590    /// Get an iterator over valid CommonMark headings
3591    ///
3592    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
3593    /// that should be flagged by MD018 but should not be processed by other heading rules.
3594    ///
3595    /// # Examples
3596    ///
3597    /// ```rust
3598    /// use rumdl_lib::lint_context::LintContext;
3599    /// use rumdl_lib::config::MarkdownFlavor;
3600    ///
3601    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
3602    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3603    ///
3604    /// for heading in ctx.valid_headings() {
3605    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
3606    /// }
3607    /// // Only prints valid headings, skips `#NoSpace`
3608    /// ```
3609    #[must_use]
3610    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3611        ValidHeadingsIter::new(&self.lines)
3612    }
3613
3614    /// Check if the document contains any valid CommonMark headings
3615    ///
3616    /// Returns `true` if there is at least one heading with proper space after `#`.
3617    #[must_use]
3618    pub fn has_valid_headings(&self) -> bool {
3619        self.lines
3620            .iter()
3621            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3622    }
3623}
3624
3625/// Merge adjacent list blocks that should be treated as one
3626fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3627    if list_blocks.len() < 2 {
3628        return;
3629    }
3630
3631    let mut merger = ListBlockMerger::new(content, lines);
3632    *list_blocks = merger.merge(list_blocks);
3633}
3634
3635/// Helper struct to manage the complex logic of merging list blocks
3636struct ListBlockMerger<'a> {
3637    content: &'a str,
3638    lines: &'a [LineInfo],
3639}
3640
3641impl<'a> ListBlockMerger<'a> {
3642    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3643        Self { content, lines }
3644    }
3645
3646    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3647        let mut merged = Vec::with_capacity(list_blocks.len());
3648        let mut current = list_blocks[0].clone();
3649
3650        for next in list_blocks.iter().skip(1) {
3651            if self.should_merge_blocks(&current, next) {
3652                current = self.merge_two_blocks(current, next);
3653            } else {
3654                merged.push(current);
3655                current = next.clone();
3656            }
3657        }
3658
3659        merged.push(current);
3660        merged
3661    }
3662
3663    /// Determine if two adjacent list blocks should be merged
3664    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3665        // Basic compatibility checks
3666        if !self.blocks_are_compatible(current, next) {
3667            return false;
3668        }
3669
3670        // Check spacing and content between blocks
3671        let spacing = self.analyze_spacing_between(current, next);
3672        match spacing {
3673            BlockSpacing::Consecutive => true,
3674            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3675            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3676                self.can_merge_with_content_between(current, next)
3677            }
3678        }
3679    }
3680
3681    /// Check if blocks have compatible structure for merging
3682    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3683        current.is_ordered == next.is_ordered
3684            && current.blockquote_prefix == next.blockquote_prefix
3685            && current.nesting_level == next.nesting_level
3686    }
3687
3688    /// Analyze the spacing between two list blocks
3689    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3690        let gap = next.start_line - current.end_line;
3691
3692        match gap {
3693            1 => BlockSpacing::Consecutive,
3694            2 => BlockSpacing::SingleBlank,
3695            _ if gap > 2 => {
3696                if self.has_only_blank_lines_between(current, next) {
3697                    BlockSpacing::MultipleBlanks
3698                } else {
3699                    BlockSpacing::ContentBetween
3700                }
3701            }
3702            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
3703        }
3704    }
3705
3706    /// Check if unordered lists can be merged with a single blank line between
3707    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3708        // Check if there are structural separators between the blocks
3709        // If has_meaningful_content_between returns true, it means there are structural separators
3710        if has_meaningful_content_between(self.content, current, next, self.lines) {
3711            return false; // Structural separators prevent merging
3712        }
3713
3714        // Only merge unordered lists with same marker across single blank
3715        !current.is_ordered && current.marker == next.marker
3716    }
3717
3718    /// Check if ordered lists can be merged when there's content between them
3719    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3720        // Do not merge lists if there are structural separators between them
3721        if has_meaningful_content_between(self.content, current, next, self.lines) {
3722            return false; // Structural separators prevent merging
3723        }
3724
3725        // Only consider merging ordered lists if there's no structural content between
3726        current.is_ordered && next.is_ordered
3727    }
3728
3729    /// Check if there are only blank lines between blocks
3730    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3731        for line_num in (current.end_line + 1)..next.start_line {
3732            if let Some(line_info) = self.lines.get(line_num - 1)
3733                && !line_info.content(self.content).trim().is_empty()
3734            {
3735                return false;
3736            }
3737        }
3738        true
3739    }
3740
3741    /// Merge two compatible list blocks into one
3742    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3743        current.end_line = next.end_line;
3744        current.item_lines.extend_from_slice(&next.item_lines);
3745
3746        // Update max marker width
3747        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3748
3749        // Handle marker consistency for unordered lists
3750        if !current.is_ordered && self.markers_differ(&current, next) {
3751            current.marker = None; // Mixed markers
3752        }
3753
3754        current
3755    }
3756
3757    /// Check if two blocks have different markers
3758    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3759        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3760    }
3761}
3762
3763/// Types of spacing between list blocks
3764#[derive(Debug, PartialEq)]
3765enum BlockSpacing {
3766    Consecutive,    // No gap between blocks
3767    SingleBlank,    // One blank line between blocks
3768    MultipleBlanks, // Multiple blank lines but no content
3769    ContentBetween, // Content exists between blocks
3770}
3771
3772/// Check if there's meaningful content (not just blank lines) between two list blocks
3773fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3774    // Check lines between current.end_line and next.start_line
3775    for line_num in (current.end_line + 1)..next.start_line {
3776        if let Some(line_info) = lines.get(line_num - 1) {
3777            // Convert to 0-indexed
3778            let trimmed = line_info.content(content).trim();
3779
3780            // Skip empty lines
3781            if trimmed.is_empty() {
3782                continue;
3783            }
3784
3785            // Check for structural separators that should separate lists (CommonMark compliant)
3786
3787            // Headings separate lists
3788            if line_info.heading.is_some() {
3789                return true; // Has meaningful content - headings separate lists
3790            }
3791
3792            // Horizontal rules separate lists (---, ***, ___)
3793            if is_horizontal_rule(trimmed) {
3794                return true; // Has meaningful content - horizontal rules separate lists
3795            }
3796
3797            // Tables separate lists
3798            if crate::utils::skip_context::is_table_line(trimmed) {
3799                return true; // Has meaningful content - tables separate lists
3800            }
3801
3802            // Blockquotes separate lists
3803            if trimmed.starts_with('>') {
3804                return true; // Has meaningful content - blockquotes separate lists
3805            }
3806
3807            // Code block fences separate lists (unless properly indented as list content)
3808            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3809                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3810
3811                // Check if this code block is properly indented as list continuation
3812                let min_continuation_indent = if current.is_ordered {
3813                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
3814                } else {
3815                    current.nesting_level + 2
3816                };
3817
3818                if line_indent < min_continuation_indent {
3819                    // This is a standalone code block that separates lists
3820                    return true; // Has meaningful content - standalone code blocks separate lists
3821                }
3822            }
3823
3824            // Check if this line has proper indentation for list continuation
3825            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3826
3827            // Calculate minimum indentation needed to be list continuation
3828            let min_indent = if current.is_ordered {
3829                current.nesting_level + current.max_marker_width
3830            } else {
3831                current.nesting_level + 2
3832            };
3833
3834            // If the line is not indented enough to be list continuation, it's meaningful content
3835            if line_indent < min_indent {
3836                return true; // Has meaningful content - content not indented as list continuation
3837            }
3838
3839            // If we reach here, the line is properly indented as list continuation
3840            // Continue checking other lines
3841        }
3842    }
3843
3844    // Only blank lines or properly indented list continuation content between blocks
3845    false
3846}
3847
3848/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
3849/// CommonMark rules for thematic breaks (horizontal rules):
3850/// - May have 0-3 spaces of leading indentation (but NOT tabs)
3851/// - Must have 3+ of the same character (-, *, or _)
3852/// - May have spaces between characters
3853/// - No other characters allowed
3854pub fn is_horizontal_rule_line(line: &str) -> bool {
3855    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
3856    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3857    if leading_spaces > 3 || line.starts_with('\t') {
3858        return false;
3859    }
3860
3861    is_horizontal_rule_content(line.trim())
3862}
3863
3864/// Check if trimmed content matches horizontal rule pattern.
3865/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
3866pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3867    if trimmed.len() < 3 {
3868        return false;
3869    }
3870
3871    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
3872    let chars: Vec<char> = trimmed.chars().collect();
3873    if let Some(&first_char) = chars.first()
3874        && (first_char == '-' || first_char == '*' || first_char == '_')
3875    {
3876        let mut count = 0;
3877        for &ch in &chars {
3878            if ch == first_char {
3879                count += 1;
3880            } else if ch != ' ' && ch != '\t' {
3881                return false; // Non-matching, non-whitespace character
3882            }
3883        }
3884        return count >= 3;
3885    }
3886    false
3887}
3888
3889/// Backwards-compatible alias for `is_horizontal_rule_content`
3890pub fn is_horizontal_rule(trimmed: &str) -> bool {
3891    is_horizontal_rule_content(trimmed)
3892}
3893
3894/// Check if content contains patterns that cause the markdown crate to panic
3895#[cfg(test)]
3896mod tests {
3897    use super::*;
3898
3899    #[test]
3900    fn test_empty_content() {
3901        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3902        assert_eq!(ctx.content, "");
3903        assert_eq!(ctx.line_offsets, vec![0]);
3904        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3905        assert_eq!(ctx.lines.len(), 0);
3906    }
3907
3908    #[test]
3909    fn test_single_line() {
3910        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3911        assert_eq!(ctx.content, "# Hello");
3912        assert_eq!(ctx.line_offsets, vec![0]);
3913        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3914        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3915    }
3916
3917    #[test]
3918    fn test_multi_line() {
3919        let content = "# Title\n\nSecond line\nThird line";
3920        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3921        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3922        // Test offset to line/col
3923        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
3924        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
3925        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
3926        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
3927        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
3928    }
3929
3930    #[test]
3931    fn test_line_info() {
3932        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
3933        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3934
3935        // Test line info
3936        assert_eq!(ctx.lines.len(), 7);
3937
3938        // Line 1: "# Title"
3939        let line1 = &ctx.lines[0];
3940        assert_eq!(line1.content(ctx.content), "# Title");
3941        assert_eq!(line1.byte_offset, 0);
3942        assert_eq!(line1.indent, 0);
3943        assert!(!line1.is_blank);
3944        assert!(!line1.in_code_block);
3945        assert!(line1.list_item.is_none());
3946
3947        // Line 2: "    indented"
3948        let line2 = &ctx.lines[1];
3949        assert_eq!(line2.content(ctx.content), "    indented");
3950        assert_eq!(line2.byte_offset, 8);
3951        assert_eq!(line2.indent, 4);
3952        assert!(!line2.is_blank);
3953
3954        // Line 3: "" (blank)
3955        let line3 = &ctx.lines[2];
3956        assert_eq!(line3.content(ctx.content), "");
3957        assert!(line3.is_blank);
3958
3959        // Test helper methods
3960        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3961        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3962        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3963        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3964    }
3965
3966    #[test]
3967    fn test_list_item_detection() {
3968        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
3969        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3970
3971        // Line 1: "- Unordered item"
3972        let line1 = &ctx.lines[0];
3973        assert!(line1.list_item.is_some());
3974        let list1 = line1.list_item.as_ref().unwrap();
3975        assert_eq!(list1.marker, "-");
3976        assert!(!list1.is_ordered);
3977        assert_eq!(list1.marker_column, 0);
3978        assert_eq!(list1.content_column, 2);
3979
3980        // Line 2: "  * Nested item"
3981        let line2 = &ctx.lines[1];
3982        assert!(line2.list_item.is_some());
3983        let list2 = line2.list_item.as_ref().unwrap();
3984        assert_eq!(list2.marker, "*");
3985        assert_eq!(list2.marker_column, 2);
3986
3987        // Line 3: "1. Ordered item"
3988        let line3 = &ctx.lines[2];
3989        assert!(line3.list_item.is_some());
3990        let list3 = line3.list_item.as_ref().unwrap();
3991        assert_eq!(list3.marker, "1.");
3992        assert!(list3.is_ordered);
3993        assert_eq!(list3.number, Some(1));
3994
3995        // Line 6: "Not a list"
3996        let line6 = &ctx.lines[5];
3997        assert!(line6.list_item.is_none());
3998    }
3999
4000    #[test]
4001    fn test_offset_to_line_col_edge_cases() {
4002        let content = "a\nb\nc";
4003        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4004        // line_offsets: [0, 2, 4]
4005        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
4006        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
4007        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
4008        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
4009        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
4010        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
4011    }
4012
4013    #[test]
4014    fn test_mdx_esm_blocks() {
4015        let content = r##"import {Chart} from './snowfall.js'
4016export const year = 2023
4017
4018# Last year's snowfall
4019
4020In {year}, the snowfall was above average.
4021It was followed by a warm spring which caused
4022flood conditions in many of the nearby rivers.
4023
4024<Chart color="#fcb32c" year={year} />
4025"##;
4026
4027        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
4028
4029        // Check that lines 1 and 2 are marked as ESM blocks
4030        assert_eq!(ctx.lines.len(), 10);
4031        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
4032        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
4033        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
4034        assert!(
4035            !ctx.lines[3].in_esm_block,
4036            "Line 4 (heading) should NOT be in_esm_block"
4037        );
4038        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
4039        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
4040    }
4041
4042    #[test]
4043    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
4044        let content = r#"import {Chart} from './snowfall.js'
4045export const year = 2023
4046
4047# Last year's snowfall
4048"#;
4049
4050        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
4051
4052        // ESM blocks should NOT be detected in Standard flavor
4053        assert!(
4054            !ctx.lines[0].in_esm_block,
4055            "Line 1 should NOT be in_esm_block in Standard flavor"
4056        );
4057        assert!(
4058            !ctx.lines[1].in_esm_block,
4059            "Line 2 should NOT be in_esm_block in Standard flavor"
4060        );
4061    }
4062}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs