rumdl_lib/
lint_context.rs

1use crate::config::MarkdownFlavor;
2use crate::rules::front_matter_utils::FrontMatterUtils;
3use crate::utils::code_block_utils::{CodeBlockContext, CodeBlockUtils};
4use crate::utils::element_cache::ElementCache;
5use crate::utils::regex_cache::URL_SIMPLE_REGEX;
6use pulldown_cmark::{BrokenLink, Event, LinkType, Options, Parser, Tag, TagEnd};
7use regex::Regex;
8use std::borrow::Cow;
9use std::path::PathBuf;
10use std::sync::LazyLock;
11
12/// Macro for profiling sections - only active in non-WASM builds
13#[cfg(not(target_arch = "wasm32"))]
14macro_rules! profile_section {
15    ($name:expr, $profile:expr, $code:expr) => {{
16        let start = std::time::Instant::now();
17        let result = $code;
18        if $profile {
19            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
20        }
21        result
22    }};
23}
24
25#[cfg(target_arch = "wasm32")]
26macro_rules! profile_section {
27    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
28}
29
30// Comprehensive link pattern that captures both inline and reference links
31// Use (?s) flag to make . match newlines
32static LINK_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
33    Regex::new(
34        r#"(?sx)
35        \[((?:[^\[\]\\]|\\.)*)\]          # Link text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
36        (?:
37            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
38            |
39            \[([^\]]*)\]      # Reference ID in group 6
40        )"#
41    ).unwrap()
42});
43
44// Image pattern (similar to links but with ! prefix)
45// Use (?s) flag to make . match newlines
46static IMAGE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(
48        r#"(?sx)
49        !\[((?:[^\[\]\\]|\\.)*)\]         # Alt text in group 1 (optimized - no nested brackets to prevent catastrophic backtracking)
50        (?:
51            \((?:<([^<>\n]*)>|([^)"']*))(?:\s+(?:"([^"]*)"|'([^']*)'))?\)  # URL in group 2 (angle) or 3 (bare), title in 4/5
52            |
53            \[([^\]]*)\]      # Reference ID in group 6
54        )"#
55    ).unwrap()
56});
57
58// Reference definition pattern
59static REF_DEF_PATTERN: LazyLock<Regex> =
60    LazyLock::new(|| Regex::new(r#"(?m)^[ ]{0,3}\[([^\]]+)\]:\s*([^\s]+)(?:\s+(?:"([^"]*)"|'([^']*)'))?$"#).unwrap());
61
62// Pattern for bare URLs - uses centralized URL pattern from regex_cache
63
64// Pattern for email addresses
65static BARE_EMAIL_PATTERN: LazyLock<Regex> =
66    LazyLock::new(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap());
67
68// Pattern for blockquote prefix in parse_list_blocks
69static BLOCKQUOTE_PREFIX_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(\s*>+\s*)").unwrap());
70
71/// Pre-computed information about a line
72#[derive(Debug, Clone)]
73pub struct LineInfo {
74    /// Byte offset where this line starts in the document
75    pub byte_offset: usize,
76    /// Length of the line in bytes (without newline)
77    pub byte_len: usize,
78    /// Number of bytes of leading whitespace (for substring extraction)
79    pub indent: usize,
80    /// Visual column width of leading whitespace (with proper tab expansion)
81    /// Per CommonMark, tabs expand to the next column that is a multiple of 4.
82    /// Use this for numeric comparisons like checking for indented code blocks (>= 4).
83    pub visual_indent: usize,
84    /// Whether the line is blank (empty or only whitespace)
85    pub is_blank: bool,
86    /// Whether this line is inside a code block
87    pub in_code_block: bool,
88    /// Whether this line is inside front matter
89    pub in_front_matter: bool,
90    /// Whether this line is inside an HTML block
91    pub in_html_block: bool,
92    /// Whether this line is inside an HTML comment
93    pub in_html_comment: bool,
94    /// List item information if this line starts a list item
95    pub list_item: Option<ListItemInfo>,
96    /// Heading information if this line is a heading
97    pub heading: Option<HeadingInfo>,
98    /// Blockquote information if this line is a blockquote
99    pub blockquote: Option<BlockquoteInfo>,
100    /// Whether this line is inside a mkdocstrings autodoc block
101    pub in_mkdocstrings: bool,
102    /// Whether this line is part of an ESM import/export block (MDX only)
103    pub in_esm_block: bool,
104    /// Whether this line is a continuation of a multi-line code span from a previous line
105    pub in_code_span_continuation: bool,
106    /// Whether this line is a horizontal rule (---, ***, ___, etc.)
107    /// Pre-computed for consistent detection across all rules
108    pub is_horizontal_rule: bool,
109}
110
111impl LineInfo {
112    /// Get the line content as a string slice from the source document
113    pub fn content<'a>(&self, source: &'a str) -> &'a str {
114        &source[self.byte_offset..self.byte_offset + self.byte_len]
115    }
116}
117
118/// Information about a list item
119#[derive(Debug, Clone)]
120pub struct ListItemInfo {
121    /// The marker used (*, -, +, or number with . or ))
122    pub marker: String,
123    /// Whether it's ordered (true) or unordered (false)
124    pub is_ordered: bool,
125    /// The number for ordered lists
126    pub number: Option<usize>,
127    /// Column where the marker starts (0-based)
128    pub marker_column: usize,
129    /// Column where content after marker starts
130    pub content_column: usize,
131}
132
133/// Heading style type
134#[derive(Debug, Clone, PartialEq)]
135pub enum HeadingStyle {
136    /// ATX style heading (# Heading)
137    ATX,
138    /// Setext style heading with = underline
139    Setext1,
140    /// Setext style heading with - underline
141    Setext2,
142}
143
144/// Parsed link information
145#[derive(Debug, Clone)]
146pub struct ParsedLink<'a> {
147    /// Line number (1-indexed)
148    pub line: usize,
149    /// Start column (0-indexed) in the line
150    pub start_col: usize,
151    /// End column (0-indexed) in the line
152    pub end_col: usize,
153    /// Byte offset in document
154    pub byte_offset: usize,
155    /// End byte offset in document
156    pub byte_end: usize,
157    /// Link text
158    pub text: Cow<'a, str>,
159    /// Link URL or reference
160    pub url: Cow<'a, str>,
161    /// Whether this is a reference link [text][ref] vs inline [text](url)
162    pub is_reference: bool,
163    /// Reference ID for reference links
164    pub reference_id: Option<Cow<'a, str>>,
165    /// Link type from pulldown-cmark
166    pub link_type: LinkType,
167}
168
169/// Information about a broken link reported by pulldown-cmark
170#[derive(Debug, Clone)]
171pub struct BrokenLinkInfo {
172    /// The reference text that couldn't be resolved
173    pub reference: String,
174    /// Byte span in the source document
175    pub span: std::ops::Range<usize>,
176}
177
178/// Parsed footnote reference (e.g., `[^1]`, `[^note]`)
179#[derive(Debug, Clone)]
180pub struct FootnoteRef {
181    /// The footnote ID (without the ^ prefix)
182    pub id: String,
183    /// Line number (1-indexed)
184    pub line: usize,
185    /// Start byte offset in document
186    pub byte_offset: usize,
187    /// End byte offset in document
188    pub byte_end: usize,
189}
190
191/// Parsed image information
192#[derive(Debug, Clone)]
193pub struct ParsedImage<'a> {
194    /// Line number (1-indexed)
195    pub line: usize,
196    /// Start column (0-indexed) in the line
197    pub start_col: usize,
198    /// End column (0-indexed) in the line
199    pub end_col: usize,
200    /// Byte offset in document
201    pub byte_offset: usize,
202    /// End byte offset in document
203    pub byte_end: usize,
204    /// Alt text
205    pub alt_text: Cow<'a, str>,
206    /// Image URL or reference
207    pub url: Cow<'a, str>,
208    /// Whether this is a reference image ![alt][ref] vs inline ![alt](url)
209    pub is_reference: bool,
210    /// Reference ID for reference images
211    pub reference_id: Option<Cow<'a, str>>,
212    /// Link type from pulldown-cmark
213    pub link_type: LinkType,
214}
215
216/// Reference definition [ref]: url "title"
217#[derive(Debug, Clone)]
218pub struct ReferenceDef {
219    /// Line number (1-indexed)
220    pub line: usize,
221    /// Reference ID (normalized to lowercase)
222    pub id: String,
223    /// URL
224    pub url: String,
225    /// Optional title
226    pub title: Option<String>,
227    /// Byte offset where the reference definition starts
228    pub byte_offset: usize,
229    /// Byte offset where the reference definition ends
230    pub byte_end: usize,
231    /// Byte offset where the title starts (if present, includes quote)
232    pub title_byte_start: Option<usize>,
233    /// Byte offset where the title ends (if present, includes quote)
234    pub title_byte_end: Option<usize>,
235}
236
237/// Parsed code span information
238#[derive(Debug, Clone)]
239pub struct CodeSpan {
240    /// Line number where the code span starts (1-indexed)
241    pub line: usize,
242    /// Line number where the code span ends (1-indexed)
243    pub end_line: usize,
244    /// Start column (0-indexed) in the line
245    pub start_col: usize,
246    /// End column (0-indexed) in the line
247    pub end_col: usize,
248    /// Byte offset in document
249    pub byte_offset: usize,
250    /// End byte offset in document
251    pub byte_end: usize,
252    /// Number of backticks used (1, 2, 3, etc.)
253    pub backtick_count: usize,
254    /// Content inside the code span (without backticks)
255    pub content: String,
256}
257
258/// Information about a heading
259#[derive(Debug, Clone)]
260pub struct HeadingInfo {
261    /// Heading level (1-6 for ATX, 1-2 for Setext)
262    pub level: u8,
263    /// Style of heading
264    pub style: HeadingStyle,
265    /// The heading marker (# characters or underline)
266    pub marker: String,
267    /// Column where the marker starts (0-based)
268    pub marker_column: usize,
269    /// Column where heading text starts
270    pub content_column: usize,
271    /// The heading text (without markers and without custom ID syntax)
272    pub text: String,
273    /// Custom header ID if present (e.g., from {#custom-id} syntax)
274    pub custom_id: Option<String>,
275    /// Original heading text including custom ID syntax
276    pub raw_text: String,
277    /// Whether it has a closing sequence (for ATX)
278    pub has_closing_sequence: bool,
279    /// The closing sequence if present
280    pub closing_sequence: String,
281    /// Whether this is a valid CommonMark heading (ATX headings require space after #)
282    /// False for malformed headings like `#NoSpace` that MD018 should flag
283    pub is_valid: bool,
284}
285
286/// A valid heading from a filtered iteration
287///
288/// Only includes headings that are CommonMark-compliant (have space after #).
289/// Hashtag-like patterns (`#tag`, `#123`) are excluded.
290#[derive(Debug, Clone)]
291pub struct ValidHeading<'a> {
292    /// The 1-indexed line number in the document
293    pub line_num: usize,
294    /// Reference to the heading information
295    pub heading: &'a HeadingInfo,
296    /// Reference to the full line info (for rules that need additional context)
297    pub line_info: &'a LineInfo,
298}
299
300/// Iterator over valid CommonMark headings in a document
301///
302/// Filters out malformed headings like `#NoSpace` that should be flagged by MD018
303/// but should not be processed by other heading rules.
304pub struct ValidHeadingsIter<'a> {
305    lines: &'a [LineInfo],
306    current_index: usize,
307}
308
309impl<'a> ValidHeadingsIter<'a> {
310    fn new(lines: &'a [LineInfo]) -> Self {
311        Self {
312            lines,
313            current_index: 0,
314        }
315    }
316}
317
318impl<'a> Iterator for ValidHeadingsIter<'a> {
319    type Item = ValidHeading<'a>;
320
321    fn next(&mut self) -> Option<Self::Item> {
322        while self.current_index < self.lines.len() {
323            let idx = self.current_index;
324            self.current_index += 1;
325
326            let line_info = &self.lines[idx];
327            if let Some(heading) = &line_info.heading
328                && heading.is_valid
329            {
330                return Some(ValidHeading {
331                    line_num: idx + 1, // Convert 0-indexed to 1-indexed
332                    heading,
333                    line_info,
334                });
335            }
336        }
337        None
338    }
339}
340
341/// Information about a blockquote line
342#[derive(Debug, Clone)]
343pub struct BlockquoteInfo {
344    /// Nesting level (1 for >, 2 for >>, etc.)
345    pub nesting_level: usize,
346    /// The indentation before the blockquote marker
347    pub indent: String,
348    /// Column where the first > starts (0-based)
349    pub marker_column: usize,
350    /// The blockquote prefix (e.g., "> ", ">> ", etc.)
351    pub prefix: String,
352    /// Content after the blockquote marker(s)
353    pub content: String,
354    /// Whether the line has no space after the marker
355    pub has_no_space_after_marker: bool,
356    /// Whether the line has multiple spaces after the marker
357    pub has_multiple_spaces_after_marker: bool,
358    /// Whether this is an empty blockquote line needing MD028 fix
359    pub needs_md028_fix: bool,
360}
361
362/// Information about a list block
363#[derive(Debug, Clone)]
364pub struct ListBlock {
365    /// Line number where the list starts (1-indexed)
366    pub start_line: usize,
367    /// Line number where the list ends (1-indexed)
368    pub end_line: usize,
369    /// Whether it's ordered or unordered
370    pub is_ordered: bool,
371    /// The consistent marker for unordered lists (if any)
372    pub marker: Option<String>,
373    /// Blockquote prefix for this list (empty if not in blockquote)
374    pub blockquote_prefix: String,
375    /// Lines that are list items within this block
376    pub item_lines: Vec<usize>,
377    /// Nesting level (0 for top-level lists)
378    pub nesting_level: usize,
379    /// Maximum marker width seen in this block (e.g., 3 for "1. ", 4 for "10. ")
380    pub max_marker_width: usize,
381}
382
383use std::sync::{Arc, OnceLock};
384
385/// Character frequency data for fast content analysis
386#[derive(Debug, Clone, Default)]
387pub struct CharFrequency {
388    /// Count of # characters (headings)
389    pub hash_count: usize,
390    /// Count of * characters (emphasis, lists, horizontal rules)
391    pub asterisk_count: usize,
392    /// Count of _ characters (emphasis, horizontal rules)
393    pub underscore_count: usize,
394    /// Count of - characters (lists, horizontal rules, setext headings)
395    pub hyphen_count: usize,
396    /// Count of + characters (lists)
397    pub plus_count: usize,
398    /// Count of > characters (blockquotes)
399    pub gt_count: usize,
400    /// Count of | characters (tables)
401    pub pipe_count: usize,
402    /// Count of [ characters (links, images)
403    pub bracket_count: usize,
404    /// Count of ` characters (code spans, code blocks)
405    pub backtick_count: usize,
406    /// Count of < characters (HTML tags, autolinks)
407    pub lt_count: usize,
408    /// Count of ! characters (images)
409    pub exclamation_count: usize,
410    /// Count of newline characters
411    pub newline_count: usize,
412}
413
414/// Pre-parsed HTML tag information
415#[derive(Debug, Clone)]
416pub struct HtmlTag {
417    /// Line number (1-indexed)
418    pub line: usize,
419    /// Start column (0-indexed) in the line
420    pub start_col: usize,
421    /// End column (0-indexed) in the line
422    pub end_col: usize,
423    /// Byte offset in document
424    pub byte_offset: usize,
425    /// End byte offset in document
426    pub byte_end: usize,
427    /// Tag name (e.g., "div", "img", "br")
428    pub tag_name: String,
429    /// Whether it's a closing tag (`</tag>`)
430    pub is_closing: bool,
431    /// Whether it's self-closing (`<tag />`)
432    pub is_self_closing: bool,
433    /// Raw tag content
434    pub raw_content: String,
435}
436
437/// Pre-parsed emphasis span information
438#[derive(Debug, Clone)]
439pub struct EmphasisSpan {
440    /// Line number (1-indexed)
441    pub line: usize,
442    /// Start column (0-indexed) in the line
443    pub start_col: usize,
444    /// End column (0-indexed) in the line
445    pub end_col: usize,
446    /// Byte offset in document
447    pub byte_offset: usize,
448    /// End byte offset in document
449    pub byte_end: usize,
450    /// Type of emphasis ('*' or '_')
451    pub marker: char,
452    /// Number of markers (1 for italic, 2 for bold, 3+ for bold+italic)
453    pub marker_count: usize,
454    /// Content inside the emphasis
455    pub content: String,
456}
457
458/// Pre-parsed table row information
459#[derive(Debug, Clone)]
460pub struct TableRow {
461    /// Line number (1-indexed)
462    pub line: usize,
463    /// Whether this is a separator row (contains only |, -, :, and spaces)
464    pub is_separator: bool,
465    /// Number of columns (pipe-separated cells)
466    pub column_count: usize,
467    /// Alignment info from separator row
468    pub column_alignments: Vec<String>, // "left", "center", "right", "none"
469}
470
471/// Pre-parsed bare URL information (not in links)
472#[derive(Debug, Clone)]
473pub struct BareUrl {
474    /// Line number (1-indexed)
475    pub line: usize,
476    /// Start column (0-indexed) in the line
477    pub start_col: usize,
478    /// End column (0-indexed) in the line
479    pub end_col: usize,
480    /// Byte offset in document
481    pub byte_offset: usize,
482    /// End byte offset in document
483    pub byte_end: usize,
484    /// The URL string
485    pub url: String,
486    /// Type of URL ("http", "https", "ftp", "email")
487    pub url_type: String,
488}
489
490pub struct LintContext<'a> {
491    pub content: &'a str,
492    pub line_offsets: Vec<usize>,
493    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
494    pub lines: Vec<LineInfo>,             // Pre-computed line information
495    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
496    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
497    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
498    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
499    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
500    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
501    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
502    pub char_frequency: CharFrequency,    // Character frequency analysis
503    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
504    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
505    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
506    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
507    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
508    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
509    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
510    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
511    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
512    pub flavor: MarkdownFlavor,           // Markdown flavor being used
513    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
514}
515
516/// Detailed blockquote parse result with all components
517struct BlockquoteComponents<'a> {
518    indent: &'a str,
519    markers: &'a str,
520    spaces_after: &'a str,
521    content: &'a str,
522}
523
524/// Parse blockquote prefix with detailed components using manual parsing
525#[inline]
526fn parse_blockquote_detailed(line: &str) -> Option<BlockquoteComponents<'_>> {
527    let bytes = line.as_bytes();
528    let mut pos = 0;
529
530    // Parse leading whitespace (indent)
531    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
532        pos += 1;
533    }
534    let indent_end = pos;
535
536    // Must have at least one '>' marker
537    if pos >= bytes.len() || bytes[pos] != b'>' {
538        return None;
539    }
540
541    // Parse '>' markers
542    while pos < bytes.len() && bytes[pos] == b'>' {
543        pos += 1;
544    }
545    let markers_end = pos;
546
547    // Parse spaces after markers
548    while pos < bytes.len() && (bytes[pos] == b' ' || bytes[pos] == b'\t') {
549        pos += 1;
550    }
551    let spaces_end = pos;
552
553    Some(BlockquoteComponents {
554        indent: &line[0..indent_end],
555        markers: &line[indent_end..markers_end],
556        spaces_after: &line[markers_end..spaces_end],
557        content: &line[spaces_end..],
558    })
559}
560
561impl<'a> LintContext<'a> {
562    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
563        #[cfg(not(target_arch = "wasm32"))]
564        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
565        #[cfg(target_arch = "wasm32")]
566        let profile = false;
567
568        let line_offsets = profile_section!("Line offsets", profile, {
569            let mut offsets = vec![0];
570            for (i, c) in content.char_indices() {
571                if c == '\n' {
572                    offsets.push(i + 1);
573                }
574            }
575            offsets
576        });
577
578        // Detect code blocks once and cache them
579        let code_blocks = profile_section!("Code blocks", profile, CodeBlockUtils::detect_code_blocks(content));
580
581        // Pre-compute HTML comment ranges ONCE for all operations
582        let html_comment_ranges = profile_section!(
583            "HTML comment ranges",
584            profile,
585            crate::utils::skip_context::compute_html_comment_ranges(content)
586        );
587
588        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n²) scaling)
589        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
590            if flavor == MarkdownFlavor::MkDocs {
591                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
592            } else {
593                Vec::new()
594            }
595        });
596
597        // Pre-compute line information (without headings/blockquotes yet)
598        let mut lines = profile_section!(
599            "Basic line info",
600            profile,
601            Self::compute_basic_line_info(
602                content,
603                &line_offsets,
604                &code_blocks,
605                flavor,
606                &html_comment_ranges,
607                &autodoc_ranges,
608            )
609        );
610
611        // Detect HTML blocks BEFORE heading detection
612        profile_section!("HTML blocks", profile, Self::detect_html_blocks(content, &mut lines));
613
614        // Detect ESM import/export blocks in MDX files BEFORE heading detection
615        profile_section!(
616            "ESM blocks",
617            profile,
618            Self::detect_esm_blocks(content, &mut lines, flavor)
619        );
620
621        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
622        let link_byte_ranges = profile_section!("Link byte ranges", profile, Self::collect_link_byte_ranges(content));
623
624        // Now detect headings and blockquotes
625        profile_section!(
626            "Headings & blockquotes",
627            profile,
628            Self::detect_headings_and_blockquotes(content, &mut lines, flavor, &html_comment_ranges, &link_byte_ranges)
629        );
630
631        // Parse code spans early so we can exclude them from link/image parsing
632        let code_spans = profile_section!("Code spans", profile, Self::parse_code_spans(content, &lines));
633
634        // Mark lines that are continuations of multi-line code spans
635        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
636        for span in &code_spans {
637            if span.end_line > span.line {
638                // Mark lines after the first line as continuations
639                for line_num in (span.line + 1)..=span.end_line {
640                    if let Some(line_info) = lines.get_mut(line_num - 1) {
641                        line_info.in_code_span_continuation = true;
642                    }
643                }
644            }
645        }
646
647        // Parse links, images, references, and list blocks
648        let (links, broken_links, footnote_refs) = profile_section!(
649            "Links",
650            profile,
651            Self::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
652        );
653
654        let images = profile_section!(
655            "Images",
656            profile,
657            Self::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
658        );
659
660        let reference_defs = profile_section!("Reference defs", profile, Self::parse_reference_defs(content, &lines));
661
662        let list_blocks = profile_section!("List blocks", profile, Self::parse_list_blocks(content, &lines));
663
664        // Compute character frequency for fast content analysis
665        let char_frequency = profile_section!("Char frequency", profile, Self::compute_char_frequency(content));
666
667        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
668        let table_blocks = profile_section!(
669            "Table blocks",
670            profile,
671            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
672                content,
673                &code_blocks,
674                &code_spans,
675                &html_comment_ranges,
676            )
677        );
678
679        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
680        let line_index = profile_section!(
681            "Line index",
682            profile,
683            crate::utils::range_utils::LineIndex::new(content)
684        );
685
686        // Pre-compute Jinja template ranges once for all rules (eliminates O(n×m) in MD011)
687        let jinja_ranges = profile_section!(
688            "Jinja ranges",
689            profile,
690            crate::utils::jinja_utils::find_jinja_ranges(content)
691        );
692
693        Self {
694            content,
695            line_offsets,
696            code_blocks,
697            lines,
698            links,
699            images,
700            broken_links,
701            footnote_refs,
702            reference_defs,
703            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
704            list_blocks,
705            char_frequency,
706            html_tags_cache: OnceLock::new(),
707            emphasis_spans_cache: OnceLock::new(),
708            table_rows_cache: OnceLock::new(),
709            bare_urls_cache: OnceLock::new(),
710            has_mixed_list_nesting_cache: OnceLock::new(),
711            html_comment_ranges,
712            table_blocks,
713            line_index,
714            jinja_ranges,
715            flavor,
716            source_file,
717        }
718    }
719
720    /// Get code spans - computed lazily on first access
721    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
722        Arc::clone(
723            self.code_spans_cache
724                .get_or_init(|| Arc::new(Self::parse_code_spans(self.content, &self.lines))),
725        )
726    }
727
728    /// Get HTML comment ranges - pre-computed during LintContext construction
729    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
730        &self.html_comment_ranges
731    }
732
733    /// Get HTML tags - computed lazily on first access
734    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
735        Arc::clone(self.html_tags_cache.get_or_init(|| {
736            Arc::new(Self::parse_html_tags(
737                self.content,
738                &self.lines,
739                &self.code_blocks,
740                self.flavor,
741            ))
742        }))
743    }
744
745    /// Get emphasis spans - computed lazily on first access
746    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
747        Arc::clone(
748            self.emphasis_spans_cache
749                .get_or_init(|| Arc::new(Self::parse_emphasis_spans(self.content, &self.lines, &self.code_blocks))),
750        )
751    }
752
753    /// Get table rows - computed lazily on first access
754    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
755        Arc::clone(
756            self.table_rows_cache
757                .get_or_init(|| Arc::new(Self::parse_table_rows(self.content, &self.lines))),
758        )
759    }
760
761    /// Get bare URLs - computed lazily on first access
762    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
763        Arc::clone(
764            self.bare_urls_cache
765                .get_or_init(|| Arc::new(Self::parse_bare_urls(self.content, &self.lines, &self.code_blocks))),
766        )
767    }
768
769    /// Check if document has mixed ordered/unordered list nesting.
770    /// Result is cached after first computation (document-level invariant).
771    /// This is used by MD007 for smart style auto-detection.
772    pub fn has_mixed_list_nesting(&self) -> bool {
773        *self
774            .has_mixed_list_nesting_cache
775            .get_or_init(|| self.compute_mixed_list_nesting())
776    }
777
778    /// Internal computation for mixed list nesting (only called once per LintContext).
779    fn compute_mixed_list_nesting(&self) -> bool {
780        // Track parent list items by their marker position and type
781        // Using marker_column instead of indent because it works correctly
782        // for blockquoted content where indent doesn't account for the prefix
783        // Stack stores: (marker_column, is_ordered)
784        let mut stack: Vec<(usize, bool)> = Vec::new();
785        let mut last_was_blank = false;
786
787        for line_info in &self.lines {
788            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
789            if line_info.in_code_block
790                || line_info.in_front_matter
791                || line_info.in_mkdocstrings
792                || line_info.in_html_comment
793                || line_info.in_esm_block
794            {
795                continue;
796            }
797
798            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
799            if line_info.is_blank {
800                last_was_blank = true;
801                continue;
802            }
803
804            if let Some(list_item) = &line_info.list_item {
805                // Normalize column 1 to column 0 (consistent with MD007 check function)
806                let current_pos = if list_item.marker_column == 1 {
807                    0
808                } else {
809                    list_item.marker_column
810                };
811
812                // If there was a blank line and this item is at root level, reset stack
813                if last_was_blank && current_pos == 0 {
814                    stack.clear();
815                }
816                last_was_blank = false;
817
818                // Pop items at same or greater position (they're siblings or deeper, not parents)
819                while let Some(&(pos, _)) = stack.last() {
820                    if pos >= current_pos {
821                        stack.pop();
822                    } else {
823                        break;
824                    }
825                }
826
827                // Check if immediate parent has different type - this is mixed nesting
828                if let Some(&(_, parent_is_ordered)) = stack.last()
829                    && parent_is_ordered != list_item.is_ordered
830                {
831                    return true; // Found mixed nesting - early exit
832                }
833
834                stack.push((current_pos, list_item.is_ordered));
835            } else {
836                // Non-list line (but not blank) - could be paragraph or other content
837                last_was_blank = false;
838            }
839        }
840
841        false
842    }
843
844    /// Map a byte offset to (line, column)
845    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
846        match self.line_offsets.binary_search(&offset) {
847            Ok(line) => (line + 1, 1),
848            Err(line) => {
849                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
850                (line, offset - line_start + 1)
851            }
852        }
853    }
854
855    /// Check if a position is within a code block or code span
856    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
857        // Check code blocks first
858        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
859            return true;
860        }
861
862        // Check inline code spans (lazy load if needed)
863        self.code_spans()
864            .iter()
865            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
866    }
867
868    /// Get line information by line number (1-indexed)
869    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
870        if line_num > 0 {
871            self.lines.get(line_num - 1)
872        } else {
873            None
874        }
875    }
876
877    /// Get byte offset for a line number (1-indexed)
878    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
879        self.line_info(line_num).map(|info| info.byte_offset)
880    }
881
882    /// Get URL for a reference link/image by its ID
883    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
884        let normalized_id = ref_id.to_lowercase();
885        self.reference_defs
886            .iter()
887            .find(|def| def.id == normalized_id)
888            .map(|def| def.url.as_str())
889    }
890
891    /// Check if a line is part of a list block
892    pub fn is_in_list_block(&self, line_num: usize) -> bool {
893        self.list_blocks
894            .iter()
895            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
896    }
897
898    /// Get the list block containing a specific line
899    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
900        self.list_blocks
901            .iter()
902            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
903    }
904
905    // Compatibility methods for DocumentStructure migration
906
907    /// Check if a line is within a code block
908    pub fn is_in_code_block(&self, line_num: usize) -> bool {
909        if line_num == 0 || line_num > self.lines.len() {
910            return false;
911        }
912        self.lines[line_num - 1].in_code_block
913    }
914
915    /// Check if a line is within front matter
916    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
917        if line_num == 0 || line_num > self.lines.len() {
918            return false;
919        }
920        self.lines[line_num - 1].in_front_matter
921    }
922
923    /// Check if a line is within an HTML block
924    pub fn is_in_html_block(&self, line_num: usize) -> bool {
925        if line_num == 0 || line_num > self.lines.len() {
926            return false;
927        }
928        self.lines[line_num - 1].in_html_block
929    }
930
931    /// Check if a line and column is within a code span
932    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
933        if line_num == 0 || line_num > self.lines.len() {
934            return false;
935        }
936
937        // Use the code spans cache to check
938        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
939        // Convert col to 0-indexed for comparison
940        let col_0indexed = if col > 0 { col - 1 } else { 0 };
941        let code_spans = self.code_spans();
942        code_spans.iter().any(|span| {
943            // Check if line is within the span's line range
944            if line_num < span.line || line_num > span.end_line {
945                return false;
946            }
947
948            if span.line == span.end_line {
949                // Single-line span: check column bounds
950                col_0indexed >= span.start_col && col_0indexed < span.end_col
951            } else if line_num == span.line {
952                // First line of multi-line span: anything after start_col is in span
953                col_0indexed >= span.start_col
954            } else if line_num == span.end_line {
955                // Last line of multi-line span: anything before end_col is in span
956                col_0indexed < span.end_col
957            } else {
958                // Middle line of multi-line span: entire line is in span
959                true
960            }
961        })
962    }
963
964    /// Check if a byte offset is within a code span
965    #[inline]
966    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
967        let code_spans = self.code_spans();
968        code_spans
969            .iter()
970            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
971    }
972
973    /// Check if a byte position is within a reference definition
974    /// This is much faster than scanning the content with regex for each check (O(1) vs O(n))
975    #[inline]
976    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
977        self.reference_defs
978            .iter()
979            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
980    }
981
982    /// Check if a byte position is within an HTML comment
983    /// This is much faster than scanning the content with regex for each check (O(k) vs O(n))
984    /// where k is the number of HTML comments (typically very small)
985    #[inline]
986    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
987        self.html_comment_ranges
988            .iter()
989            .any(|range| byte_pos >= range.start && byte_pos < range.end)
990    }
991
992    /// Check if a byte position is within an HTML tag (including multiline tags)
993    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
994    #[inline]
995    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
996        self.html_tags()
997            .iter()
998            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
999    }
1000
1001    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
1002    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1003        self.jinja_ranges
1004            .iter()
1005            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
1006    }
1007
1008    /// Check if a byte position is within a link reference definition title
1009    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1010        self.reference_defs.iter().any(|def| {
1011            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
1012                byte_pos >= start && byte_pos < end
1013            } else {
1014                false
1015            }
1016        })
1017    }
1018
1019    /// Check if content has any instances of a specific character (fast)
1020    pub fn has_char(&self, ch: char) -> bool {
1021        match ch {
1022            '#' => self.char_frequency.hash_count > 0,
1023            '*' => self.char_frequency.asterisk_count > 0,
1024            '_' => self.char_frequency.underscore_count > 0,
1025            '-' => self.char_frequency.hyphen_count > 0,
1026            '+' => self.char_frequency.plus_count > 0,
1027            '>' => self.char_frequency.gt_count > 0,
1028            '|' => self.char_frequency.pipe_count > 0,
1029            '[' => self.char_frequency.bracket_count > 0,
1030            '`' => self.char_frequency.backtick_count > 0,
1031            '<' => self.char_frequency.lt_count > 0,
1032            '!' => self.char_frequency.exclamation_count > 0,
1033            '\n' => self.char_frequency.newline_count > 0,
1034            _ => self.content.contains(ch), // Fallback for other characters
1035        }
1036    }
1037
1038    /// Get count of a specific character (fast)
1039    pub fn char_count(&self, ch: char) -> usize {
1040        match ch {
1041            '#' => self.char_frequency.hash_count,
1042            '*' => self.char_frequency.asterisk_count,
1043            '_' => self.char_frequency.underscore_count,
1044            '-' => self.char_frequency.hyphen_count,
1045            '+' => self.char_frequency.plus_count,
1046            '>' => self.char_frequency.gt_count,
1047            '|' => self.char_frequency.pipe_count,
1048            '[' => self.char_frequency.bracket_count,
1049            '`' => self.char_frequency.backtick_count,
1050            '<' => self.char_frequency.lt_count,
1051            '!' => self.char_frequency.exclamation_count,
1052            '\n' => self.char_frequency.newline_count,
1053            _ => self.content.matches(ch).count(), // Fallback for other characters
1054        }
1055    }
1056
1057    /// Check if content likely contains headings (fast)
1058    pub fn likely_has_headings(&self) -> bool {
1059        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1060    }
1061
1062    /// Check if content likely contains lists (fast)
1063    pub fn likely_has_lists(&self) -> bool {
1064        self.char_frequency.asterisk_count > 0
1065            || self.char_frequency.hyphen_count > 0
1066            || self.char_frequency.plus_count > 0
1067    }
1068
1069    /// Check if content likely contains emphasis (fast)
1070    pub fn likely_has_emphasis(&self) -> bool {
1071        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1072    }
1073
1074    /// Check if content likely contains tables (fast)
1075    pub fn likely_has_tables(&self) -> bool {
1076        self.char_frequency.pipe_count > 2
1077    }
1078
1079    /// Check if content likely contains blockquotes (fast)
1080    pub fn likely_has_blockquotes(&self) -> bool {
1081        self.char_frequency.gt_count > 0
1082    }
1083
1084    /// Check if content likely contains code (fast)
1085    pub fn likely_has_code(&self) -> bool {
1086        self.char_frequency.backtick_count > 0
1087    }
1088
1089    /// Check if content likely contains links or images (fast)
1090    pub fn likely_has_links_or_images(&self) -> bool {
1091        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1092    }
1093
1094    /// Check if content likely contains HTML (fast)
1095    pub fn likely_has_html(&self) -> bool {
1096        self.char_frequency.lt_count > 0
1097    }
1098
1099    /// Get HTML tags on a specific line
1100    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1101        self.html_tags()
1102            .iter()
1103            .filter(|tag| tag.line == line_num)
1104            .cloned()
1105            .collect()
1106    }
1107
1108    /// Get emphasis spans on a specific line
1109    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1110        self.emphasis_spans()
1111            .iter()
1112            .filter(|span| span.line == line_num)
1113            .cloned()
1114            .collect()
1115    }
1116
1117    /// Get table rows on a specific line
1118    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1119        self.table_rows()
1120            .iter()
1121            .filter(|row| row.line == line_num)
1122            .cloned()
1123            .collect()
1124    }
1125
1126    /// Get bare URLs on a specific line
1127    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1128        self.bare_urls()
1129            .iter()
1130            .filter(|url| url.line == line_num)
1131            .cloned()
1132            .collect()
1133    }
1134
1135    /// Find the line index for a given byte offset using binary search.
1136    /// Returns (line_index, line_number, column) where:
1137    /// - line_index is the 0-based index in the lines array
1138    /// - line_number is the 1-based line number
1139    /// - column is the byte offset within that line
1140    #[inline]
1141    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142        // Binary search to find the line containing this byte offset
1143        let idx = match lines.binary_search_by(|line| {
1144            if byte_offset < line.byte_offset {
1145                std::cmp::Ordering::Greater
1146            } else if byte_offset > line.byte_offset + line.byte_len {
1147                std::cmp::Ordering::Less
1148            } else {
1149                std::cmp::Ordering::Equal
1150            }
1151        }) {
1152            Ok(idx) => idx,
1153            Err(idx) => idx.saturating_sub(1),
1154        };
1155
1156        let line = &lines[idx];
1157        let line_num = idx + 1;
1158        let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160        (idx, line_num, col)
1161    }
1162
1163    /// Check if a byte offset is within a code span using binary search
1164    #[inline]
1165    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166        // Since spans are sorted by byte_offset, use partition_point for binary search
1167        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169        // Check the span that starts at or before our offset
1170        if idx > 0 {
1171            let span = &code_spans[idx - 1];
1172            if offset >= span.byte_offset && offset < span.byte_end {
1173                return true;
1174            }
1175        }
1176
1177        false
1178    }
1179
1180    /// Collect byte ranges of all links using pulldown-cmark
1181    /// This is used to skip heading detection for lines that fall within link syntax
1182    /// (e.g., multiline links like `[text](url\n#fragment)`)
1183    fn collect_link_byte_ranges(content: &str) -> Vec<(usize, usize)> {
1184        use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
1185
1186        let mut link_ranges = Vec::new();
1187        let mut options = Options::empty();
1188        options.insert(Options::ENABLE_WIKILINKS);
1189        options.insert(Options::ENABLE_FOOTNOTES);
1190
1191        let parser = Parser::new_ext(content, options).into_offset_iter();
1192        let mut link_stack: Vec<usize> = Vec::new();
1193
1194        for (event, range) in parser {
1195            match event {
1196                Event::Start(Tag::Link { .. }) => {
1197                    link_stack.push(range.start);
1198                }
1199                Event::End(TagEnd::Link) => {
1200                    if let Some(start_pos) = link_stack.pop() {
1201                        link_ranges.push((start_pos, range.end));
1202                    }
1203                }
1204                _ => {}
1205            }
1206        }
1207
1208        link_ranges
1209    }
1210
1211    /// Parse all links in the content
1212    fn parse_links(
1213        content: &'a str,
1214        lines: &[LineInfo],
1215        code_blocks: &[(usize, usize)],
1216        code_spans: &[CodeSpan],
1217        flavor: MarkdownFlavor,
1218        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1219    ) -> (Vec<ParsedLink<'a>>, Vec<BrokenLinkInfo>, Vec<FootnoteRef>) {
1220        use crate::utils::skip_context::{is_in_html_comment_ranges, is_mkdocs_snippet_line};
1221        use std::collections::HashSet;
1222
1223        let mut links = Vec::with_capacity(content.len() / 500);
1224        let mut broken_links = Vec::new();
1225        let mut footnote_refs = Vec::new();
1226
1227        // Track byte positions of links found by pulldown-cmark
1228        let mut found_positions = HashSet::new();
1229
1230        // Use pulldown-cmark's streaming parser with BrokenLink callback
1231        // The callback captures undefined references: [text][undefined], [shortcut], [text][]
1232        // This automatically handles:
1233        // - Escaped links (won't generate events)
1234        // - Links in code blocks/spans (won't generate Link events)
1235        // - Images (generates Tag::Image instead)
1236        // - Reference resolution (dest_url is already resolved!)
1237        // - Broken references (callback is invoked)
1238        // - Wiki-links (enabled via ENABLE_WIKILINKS)
1239        let mut options = Options::empty();
1240        options.insert(Options::ENABLE_WIKILINKS);
1241        options.insert(Options::ENABLE_FOOTNOTES);
1242
1243        let parser = Parser::new_with_broken_link_callback(
1244            content,
1245            options,
1246            Some(|link: BrokenLink<'_>| {
1247                broken_links.push(BrokenLinkInfo {
1248                    reference: link.reference.to_string(),
1249                    span: link.span.clone(),
1250                });
1251                None
1252            }),
1253        )
1254        .into_offset_iter();
1255
1256        let mut link_stack: Vec<(
1257            usize,
1258            usize,
1259            pulldown_cmark::CowStr<'a>,
1260            LinkType,
1261            pulldown_cmark::CowStr<'a>,
1262        )> = Vec::new();
1263        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1264
1265        for (event, range) in parser {
1266            match event {
1267                Event::Start(Tag::Link {
1268                    link_type,
1269                    dest_url,
1270                    id,
1271                    ..
1272                }) => {
1273                    // Link start - record position, URL, and reference ID
1274                    link_stack.push((range.start, range.end, dest_url, link_type, id));
1275                    text_chunks.clear();
1276                }
1277                Event::Text(text) if !link_stack.is_empty() => {
1278                    // Track text content with its byte range
1279                    text_chunks.push((text.to_string(), range.start, range.end));
1280                }
1281                Event::Code(code) if !link_stack.is_empty() => {
1282                    // Include inline code in link text (with backticks)
1283                    let code_text = format!("`{code}`");
1284                    text_chunks.push((code_text, range.start, range.end));
1285                }
1286                Event::End(TagEnd::Link) => {
1287                    if let Some((start_pos, _link_start_end, url, link_type, ref_id)) = link_stack.pop() {
1288                        // Skip if in HTML comment
1289                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1290                            text_chunks.clear();
1291                            continue;
1292                        }
1293
1294                        // Find line and column information
1295                        let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1296
1297                        // Skip if this link is on a MkDocs snippet line
1298                        if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1299                            text_chunks.clear();
1300                            continue;
1301                        }
1302
1303                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1304
1305                        let is_reference = matches!(
1306                            link_type,
1307                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1308                        );
1309
1310                        // Extract link text directly from source bytes to preserve escaping
1311                        // Text events from pulldown-cmark unescape \] → ], which breaks MD039
1312                        let link_text = if start_pos < content.len() {
1313                            let link_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1314
1315                            // Find MATCHING ] by tracking bracket depth for nested brackets
1316                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1317                            // Brackets inside code spans (between backticks) should be ignored
1318                            let mut close_pos = None;
1319                            let mut depth = 0;
1320                            let mut in_code_span = false;
1321
1322                            for (i, &byte) in link_bytes.iter().enumerate().skip(1) {
1323                                // Count preceding backslashes
1324                                let mut backslash_count = 0;
1325                                let mut j = i;
1326                                while j > 0 && link_bytes[j - 1] == b'\\' {
1327                                    backslash_count += 1;
1328                                    j -= 1;
1329                                }
1330                                let is_escaped = backslash_count % 2 != 0;
1331
1332                                // Track code spans - backticks toggle in/out of code
1333                                if byte == b'`' && !is_escaped {
1334                                    in_code_span = !in_code_span;
1335                                }
1336
1337                                // Only count brackets when NOT in a code span
1338                                if !is_escaped && !in_code_span {
1339                                    if byte == b'[' {
1340                                        depth += 1;
1341                                    } else if byte == b']' {
1342                                        if depth == 0 {
1343                                            // Found the matching closing bracket
1344                                            close_pos = Some(i);
1345                                            break;
1346                                        } else {
1347                                            depth -= 1;
1348                                        }
1349                                    }
1350                                }
1351                            }
1352
1353                            if let Some(pos) = close_pos {
1354                                Cow::Borrowed(std::str::from_utf8(&link_bytes[1..pos]).unwrap_or(""))
1355                            } else {
1356                                Cow::Borrowed("")
1357                            }
1358                        } else {
1359                            Cow::Borrowed("")
1360                        };
1361
1362                        // For reference links, use the actual reference ID from pulldown-cmark
1363                        let reference_id = if is_reference && !ref_id.is_empty() {
1364                            Some(Cow::Owned(ref_id.to_lowercase()))
1365                        } else if is_reference {
1366                            // For collapsed/shortcut references without explicit ID, use the link text
1367                            Some(Cow::Owned(link_text.to_lowercase()))
1368                        } else {
1369                            None
1370                        };
1371
1372                        // Track this position as found
1373                        found_positions.insert(start_pos);
1374
1375                        links.push(ParsedLink {
1376                            line: line_num,
1377                            start_col: col_start,
1378                            end_col: col_end,
1379                            byte_offset: start_pos,
1380                            byte_end: range.end,
1381                            text: link_text,
1382                            url: Cow::Owned(url.to_string()),
1383                            is_reference,
1384                            reference_id,
1385                            link_type,
1386                        });
1387
1388                        text_chunks.clear();
1389                    }
1390                }
1391                Event::FootnoteReference(footnote_id) => {
1392                    // Capture footnote references like [^1], [^note]
1393                    // Skip if in HTML comment
1394                    if is_in_html_comment_ranges(html_comment_ranges, range.start) {
1395                        continue;
1396                    }
1397
1398                    let (_, line_num, _) = Self::find_line_for_offset(lines, range.start);
1399                    footnote_refs.push(FootnoteRef {
1400                        id: footnote_id.to_string(),
1401                        line: line_num,
1402                        byte_offset: range.start,
1403                        byte_end: range.end,
1404                    });
1405                }
1406                _ => {}
1407            }
1408        }
1409
1410        // Also find undefined references using regex
1411        // These are patterns like [text][ref] that pulldown-cmark didn't parse as links
1412        // because the reference is undefined
1413        for cap in LINK_PATTERN.captures_iter(content) {
1414            let full_match = cap.get(0).unwrap();
1415            let match_start = full_match.start();
1416            let match_end = full_match.end();
1417
1418            // Skip if this was already found by pulldown-cmark (it's a valid link)
1419            if found_positions.contains(&match_start) {
1420                continue;
1421            }
1422
1423            // Skip if escaped
1424            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1425                continue;
1426            }
1427
1428            // Skip if it's an image
1429            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'!') {
1430                continue;
1431            }
1432
1433            // Skip if in code block
1434            if CodeBlockUtils::is_in_code_block(code_blocks, match_start) {
1435                continue;
1436            }
1437
1438            // Skip if in code span
1439            if Self::is_offset_in_code_span(code_spans, match_start) {
1440                continue;
1441            }
1442
1443            // Skip if in HTML comment
1444            if is_in_html_comment_ranges(html_comment_ranges, match_start) {
1445                continue;
1446            }
1447
1448            // Find line and column information
1449            let (line_idx, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1450
1451            // Skip if this link is on a MkDocs snippet line
1452            if is_mkdocs_snippet_line(lines[line_idx].content(content), flavor) {
1453                continue;
1454            }
1455
1456            let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1457
1458            let text = cap.get(1).map_or("", |m| m.as_str());
1459
1460            // Only process reference links (group 6)
1461            if let Some(ref_id) = cap.get(6) {
1462                let ref_id_str = ref_id.as_str();
1463                let normalized_ref = if ref_id_str.is_empty() {
1464                    Cow::Owned(text.to_lowercase()) // Implicit reference
1465                } else {
1466                    Cow::Owned(ref_id_str.to_lowercase())
1467                };
1468
1469                // This is an undefined reference (pulldown-cmark didn't parse it)
1470                links.push(ParsedLink {
1471                    line: line_num,
1472                    start_col: col_start,
1473                    end_col: col_end,
1474                    byte_offset: match_start,
1475                    byte_end: match_end,
1476                    text: Cow::Borrowed(text),
1477                    url: Cow::Borrowed(""), // Empty URL indicates undefined reference
1478                    is_reference: true,
1479                    reference_id: Some(normalized_ref),
1480                    link_type: LinkType::Reference, // Undefined references are reference-style
1481                });
1482            }
1483        }
1484
1485        (links, broken_links, footnote_refs)
1486    }
1487
1488    /// Parse all images in the content
1489    fn parse_images(
1490        content: &'a str,
1491        lines: &[LineInfo],
1492        code_blocks: &[(usize, usize)],
1493        code_spans: &[CodeSpan],
1494        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1495    ) -> Vec<ParsedImage<'a>> {
1496        use crate::utils::skip_context::is_in_html_comment_ranges;
1497        use std::collections::HashSet;
1498
1499        // Pre-size based on a heuristic: images are less common than links
1500        let mut images = Vec::with_capacity(content.len() / 1000);
1501        let mut found_positions = HashSet::new();
1502
1503        // Use pulldown-cmark for parsing - more accurate and faster
1504        let parser = Parser::new(content).into_offset_iter();
1505        let mut image_stack: Vec<(usize, pulldown_cmark::CowStr<'a>, LinkType, pulldown_cmark::CowStr<'a>)> =
1506            Vec::new();
1507        let mut text_chunks: Vec<(String, usize, usize)> = Vec::new(); // (text, start, end)
1508
1509        for (event, range) in parser {
1510            match event {
1511                Event::Start(Tag::Image {
1512                    link_type,
1513                    dest_url,
1514                    id,
1515                    ..
1516                }) => {
1517                    image_stack.push((range.start, dest_url, link_type, id));
1518                    text_chunks.clear();
1519                }
1520                Event::Text(text) if !image_stack.is_empty() => {
1521                    text_chunks.push((text.to_string(), range.start, range.end));
1522                }
1523                Event::Code(code) if !image_stack.is_empty() => {
1524                    let code_text = format!("`{code}`");
1525                    text_chunks.push((code_text, range.start, range.end));
1526                }
1527                Event::End(TagEnd::Image) => {
1528                    if let Some((start_pos, url, link_type, ref_id)) = image_stack.pop() {
1529                        // Skip if in code block
1530                        if CodeBlockUtils::is_in_code_block(code_blocks, start_pos) {
1531                            continue;
1532                        }
1533
1534                        // Skip if in code span
1535                        if Self::is_offset_in_code_span(code_spans, start_pos) {
1536                            continue;
1537                        }
1538
1539                        // Skip if in HTML comment
1540                        if is_in_html_comment_ranges(html_comment_ranges, start_pos) {
1541                            continue;
1542                        }
1543
1544                        // Find line and column using binary search
1545                        let (_, line_num, col_start) = Self::find_line_for_offset(lines, start_pos);
1546                        let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, range.end);
1547
1548                        let is_reference = matches!(
1549                            link_type,
1550                            LinkType::Reference | LinkType::Collapsed | LinkType::Shortcut
1551                        );
1552
1553                        // Extract alt text directly from source bytes to preserve escaping
1554                        // Text events from pulldown-cmark unescape \] → ], which breaks rules that need escaping
1555                        let alt_text = if start_pos < content.len() {
1556                            let image_bytes = &content.as_bytes()[start_pos..range.end.min(content.len())];
1557
1558                            // Find MATCHING ] by tracking bracket depth for nested brackets
1559                            // An unescaped bracket is one NOT preceded by an odd number of backslashes
1560                            let mut close_pos = None;
1561                            let mut depth = 0;
1562
1563                            if image_bytes.len() > 2 {
1564                                for (i, &byte) in image_bytes.iter().enumerate().skip(2) {
1565                                    // Count preceding backslashes
1566                                    let mut backslash_count = 0;
1567                                    let mut j = i;
1568                                    while j > 0 && image_bytes[j - 1] == b'\\' {
1569                                        backslash_count += 1;
1570                                        j -= 1;
1571                                    }
1572                                    let is_escaped = backslash_count % 2 != 0;
1573
1574                                    if !is_escaped {
1575                                        if byte == b'[' {
1576                                            depth += 1;
1577                                        } else if byte == b']' {
1578                                            if depth == 0 {
1579                                                // Found the matching closing bracket
1580                                                close_pos = Some(i);
1581                                                break;
1582                                            } else {
1583                                                depth -= 1;
1584                                            }
1585                                        }
1586                                    }
1587                                }
1588                            }
1589
1590                            if let Some(pos) = close_pos {
1591                                Cow::Borrowed(std::str::from_utf8(&image_bytes[2..pos]).unwrap_or(""))
1592                            } else {
1593                                Cow::Borrowed("")
1594                            }
1595                        } else {
1596                            Cow::Borrowed("")
1597                        };
1598
1599                        let reference_id = if is_reference && !ref_id.is_empty() {
1600                            Some(Cow::Owned(ref_id.to_lowercase()))
1601                        } else if is_reference {
1602                            Some(Cow::Owned(alt_text.to_lowercase())) // Collapsed/shortcut references
1603                        } else {
1604                            None
1605                        };
1606
1607                        found_positions.insert(start_pos);
1608                        images.push(ParsedImage {
1609                            line: line_num,
1610                            start_col: col_start,
1611                            end_col: col_end,
1612                            byte_offset: start_pos,
1613                            byte_end: range.end,
1614                            alt_text,
1615                            url: Cow::Owned(url.to_string()),
1616                            is_reference,
1617                            reference_id,
1618                            link_type,
1619                        });
1620                    }
1621                }
1622                _ => {}
1623            }
1624        }
1625
1626        // Regex fallback for undefined references that pulldown-cmark treats as plain text
1627        for cap in IMAGE_PATTERN.captures_iter(content) {
1628            let full_match = cap.get(0).unwrap();
1629            let match_start = full_match.start();
1630            let match_end = full_match.end();
1631
1632            // Skip if already found by pulldown-cmark
1633            if found_positions.contains(&match_start) {
1634                continue;
1635            }
1636
1637            // Skip if the ! is escaped
1638            if match_start > 0 && content.as_bytes().get(match_start - 1) == Some(&b'\\') {
1639                continue;
1640            }
1641
1642            // Skip if in code block, code span, or HTML comment
1643            if CodeBlockUtils::is_in_code_block(code_blocks, match_start)
1644                || Self::is_offset_in_code_span(code_spans, match_start)
1645                || is_in_html_comment_ranges(html_comment_ranges, match_start)
1646            {
1647                continue;
1648            }
1649
1650            // Only process reference images (undefined references not found by pulldown-cmark)
1651            if let Some(ref_id) = cap.get(6) {
1652                let (_, line_num, col_start) = Self::find_line_for_offset(lines, match_start);
1653                let (_, _end_line_num, col_end) = Self::find_line_for_offset(lines, match_end);
1654                let alt_text = cap.get(1).map_or("", |m| m.as_str());
1655                let ref_id_str = ref_id.as_str();
1656                let normalized_ref = if ref_id_str.is_empty() {
1657                    Cow::Owned(alt_text.to_lowercase())
1658                } else {
1659                    Cow::Owned(ref_id_str.to_lowercase())
1660                };
1661
1662                images.push(ParsedImage {
1663                    line: line_num,
1664                    start_col: col_start,
1665                    end_col: col_end,
1666                    byte_offset: match_start,
1667                    byte_end: match_end,
1668                    alt_text: Cow::Borrowed(alt_text),
1669                    url: Cow::Borrowed(""),
1670                    is_reference: true,
1671                    reference_id: Some(normalized_ref),
1672                    link_type: LinkType::Reference, // Undefined references are reference-style
1673                });
1674            }
1675        }
1676
1677        images
1678    }
1679
1680    /// Parse reference definitions
1681    fn parse_reference_defs(content: &str, lines: &[LineInfo]) -> Vec<ReferenceDef> {
1682        // Pre-size based on lines count as reference definitions are line-based
1683        let mut refs = Vec::with_capacity(lines.len() / 20); // ~1 ref per 20 lines
1684
1685        for (line_idx, line_info) in lines.iter().enumerate() {
1686            // Skip lines in code blocks
1687            if line_info.in_code_block {
1688                continue;
1689            }
1690
1691            let line = line_info.content(content);
1692            let line_num = line_idx + 1;
1693
1694            if let Some(cap) = REF_DEF_PATTERN.captures(line) {
1695                let id = cap.get(1).unwrap().as_str().to_lowercase();
1696                let url = cap.get(2).unwrap().as_str().to_string();
1697                let title_match = cap.get(3).or_else(|| cap.get(4));
1698                let title = title_match.map(|m| m.as_str().to_string());
1699
1700                // Calculate byte positions
1701                // The match starts at the beginning of the line (0) and extends to the end
1702                let match_obj = cap.get(0).unwrap();
1703                let byte_offset = line_info.byte_offset + match_obj.start();
1704                let byte_end = line_info.byte_offset + match_obj.end();
1705
1706                // Calculate title byte positions (includes the quote character before content)
1707                let (title_byte_start, title_byte_end) = if let Some(m) = title_match {
1708                    // The match is the content inside quotes, so we include the quote before
1709                    let start = line_info.byte_offset + m.start().saturating_sub(1);
1710                    let end = line_info.byte_offset + m.end() + 1; // Include closing quote
1711                    (Some(start), Some(end))
1712                } else {
1713                    (None, None)
1714                };
1715
1716                refs.push(ReferenceDef {
1717                    line: line_num,
1718                    id,
1719                    url,
1720                    title,
1721                    byte_offset,
1722                    byte_end,
1723                    title_byte_start,
1724                    title_byte_end,
1725                });
1726            }
1727        }
1728
1729        refs
1730    }
1731
1732    /// Fast blockquote prefix parser - replaces regex for 5-10x speedup
1733    /// Handles nested blockquotes like `> > > content`
1734    /// Returns: Some((prefix_with_ws, content_after_prefix)) or None
1735    #[inline]
1736    fn parse_blockquote_prefix(line: &str) -> Option<(&str, &str)> {
1737        let trimmed_start = line.trim_start();
1738        if !trimmed_start.starts_with('>') {
1739            return None;
1740        }
1741
1742        // Track total prefix length to handle nested blockquotes
1743        let mut remaining = line;
1744        let mut total_prefix_len = 0;
1745
1746        loop {
1747            let trimmed = remaining.trim_start();
1748            if !trimmed.starts_with('>') {
1749                break;
1750            }
1751
1752            // Add leading whitespace + '>' to prefix
1753            let leading_ws_len = remaining.len() - trimmed.len();
1754            total_prefix_len += leading_ws_len + 1;
1755
1756            let after_gt = &trimmed[1..];
1757
1758            // Handle optional whitespace after '>' (space or tab)
1759            if let Some(stripped) = after_gt.strip_prefix(' ') {
1760                total_prefix_len += 1;
1761                remaining = stripped;
1762            } else if let Some(stripped) = after_gt.strip_prefix('\t') {
1763                total_prefix_len += 1;
1764                remaining = stripped;
1765            } else {
1766                remaining = after_gt;
1767            }
1768        }
1769
1770        Some((&line[..total_prefix_len], remaining))
1771    }
1772
1773    /// Fast unordered list parser - replaces regex for 5-10x speedup
1774    /// Matches: ^(\s*)([-*+])([ \t]*)(.*)
1775    /// Returns: Some((leading_ws, marker, spacing, content)) or None
1776    #[inline]
1777    fn parse_unordered_list(line: &str) -> Option<(&str, char, &str, &str)> {
1778        let bytes = line.as_bytes();
1779        let mut i = 0;
1780
1781        // Skip leading whitespace
1782        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1783            i += 1;
1784        }
1785
1786        // Check for marker
1787        if i >= bytes.len() {
1788            return None;
1789        }
1790        let marker = bytes[i] as char;
1791        if marker != '-' && marker != '*' && marker != '+' {
1792            return None;
1793        }
1794        let marker_pos = i;
1795        i += 1;
1796
1797        // Collect spacing after marker (space or tab only)
1798        let spacing_start = i;
1799        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1800            i += 1;
1801        }
1802
1803        Some((&line[..marker_pos], marker, &line[spacing_start..i], &line[i..]))
1804    }
1805
1806    /// Fast ordered list parser - replaces regex for 5-10x speedup
1807    /// Matches: ^(\s*)(\d+)([.)])([ \t]*)(.*)
1808    /// Returns: Some((leading_ws, number_str, delimiter, spacing, content)) or None
1809    #[inline]
1810    fn parse_ordered_list(line: &str) -> Option<(&str, &str, char, &str, &str)> {
1811        let bytes = line.as_bytes();
1812        let mut i = 0;
1813
1814        // Skip leading whitespace
1815        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1816            i += 1;
1817        }
1818
1819        // Collect digits
1820        let number_start = i;
1821        while i < bytes.len() && bytes[i].is_ascii_digit() {
1822            i += 1;
1823        }
1824        if i == number_start {
1825            return None; // No digits found
1826        }
1827
1828        // Check for delimiter
1829        if i >= bytes.len() {
1830            return None;
1831        }
1832        let delimiter = bytes[i] as char;
1833        if delimiter != '.' && delimiter != ')' {
1834            return None;
1835        }
1836        let delimiter_pos = i;
1837        i += 1;
1838
1839        // Collect spacing after delimiter (space or tab only)
1840        let spacing_start = i;
1841        while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
1842            i += 1;
1843        }
1844
1845        Some((
1846            &line[..number_start],
1847            &line[number_start..delimiter_pos],
1848            delimiter,
1849            &line[spacing_start..i],
1850            &line[i..],
1851        ))
1852    }
1853
1854    /// Pre-compute which lines are in code blocks - O(m*n) where m=code_blocks, n=lines
1855    /// Returns a Vec<bool> where index i indicates if line i is in a code block
1856    fn compute_code_block_line_map(content: &str, line_offsets: &[usize], code_blocks: &[(usize, usize)]) -> Vec<bool> {
1857        let num_lines = line_offsets.len();
1858        let mut in_code_block = vec![false; num_lines];
1859
1860        // For each code block, mark all lines within it
1861        for &(start, end) in code_blocks {
1862            // Ensure we're at valid UTF-8 boundaries
1863            let safe_start = if start > 0 && !content.is_char_boundary(start) {
1864                let mut boundary = start;
1865                while boundary > 0 && !content.is_char_boundary(boundary) {
1866                    boundary -= 1;
1867                }
1868                boundary
1869            } else {
1870                start
1871            };
1872
1873            let safe_end = if end < content.len() && !content.is_char_boundary(end) {
1874                let mut boundary = end;
1875                while boundary < content.len() && !content.is_char_boundary(boundary) {
1876                    boundary += 1;
1877                }
1878                boundary
1879            } else {
1880                end.min(content.len())
1881            };
1882
1883            // Trust the code blocks detected by CodeBlockUtils::detect_code_blocks()
1884            // That function now has proper list context awareness (see code_block_utils.rs)
1885            // and correctly distinguishes between:
1886            // - Fenced code blocks (``` or ~~~)
1887            // - Indented code blocks at document level (4 spaces + blank line before)
1888            // - List continuation paragraphs (NOT code blocks, even with 4 spaces)
1889            //
1890            // We no longer need to re-validate here. The original validation logic
1891            // was causing false positives by marking list continuation paragraphs as
1892            // code blocks when they have 4 spaces of indentation.
1893
1894            // Use binary search to find the first and last line indices
1895            // line_offsets is sorted, so we can use partition_point for O(log n) lookup
1896            // Use safe_start/safe_end (UTF-8 boundaries) for consistent line mapping
1897            //
1898            // Find the line that CONTAINS safe_start: the line with the largest
1899            // start offset that is <= safe_start. partition_point gives us the
1900            // first line that starts AFTER safe_start, so we subtract 1.
1901            let first_line_after = line_offsets.partition_point(|&offset| offset <= safe_start);
1902            let first_line = first_line_after.saturating_sub(1);
1903            let last_line = line_offsets.partition_point(|&offset| offset < safe_end);
1904
1905            // Mark all lines in the range at once
1906            for flag in in_code_block.iter_mut().take(last_line).skip(first_line) {
1907                *flag = true;
1908            }
1909        }
1910
1911        in_code_block
1912    }
1913
1914    /// Pre-compute basic line information (without headings/blockquotes)
1915    fn compute_basic_line_info(
1916        content: &str,
1917        line_offsets: &[usize],
1918        code_blocks: &[(usize, usize)],
1919        flavor: MarkdownFlavor,
1920        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
1921        autodoc_ranges: &[crate::utils::skip_context::ByteRange],
1922    ) -> Vec<LineInfo> {
1923        let content_lines: Vec<&str> = content.lines().collect();
1924        let mut lines = Vec::with_capacity(content_lines.len());
1925
1926        // Pre-compute which lines are in code blocks
1927        let code_block_map = Self::compute_code_block_line_map(content, line_offsets, code_blocks);
1928
1929        // Detect front matter boundaries FIRST, before any other parsing
1930        // Use FrontMatterUtils to detect all types of front matter (YAML, TOML, JSON, malformed)
1931        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
1932
1933        for (i, line) in content_lines.iter().enumerate() {
1934            let byte_offset = line_offsets.get(i).copied().unwrap_or(0);
1935            let indent = line.len() - line.trim_start().len();
1936            // Compute visual indent with proper CommonMark tab expansion
1937            let visual_indent = ElementCache::calculate_indentation_width_default(line);
1938
1939            // Parse blockquote prefix once and reuse it (avoid redundant parsing)
1940            let blockquote_parse = Self::parse_blockquote_prefix(line);
1941
1942            // For blank detection, consider blockquote context
1943            let is_blank = if let Some((_, content)) = blockquote_parse {
1944                // In blockquote context, check if content after prefix is blank
1945                content.trim().is_empty()
1946            } else {
1947                line.trim().is_empty()
1948            };
1949
1950            // Use pre-computed map for O(1) lookup instead of O(m) iteration
1951            let in_code_block = code_block_map.get(i).copied().unwrap_or(false);
1952
1953            // Detect list items (skip if in frontmatter, in mkdocstrings block, or in HTML comment)
1954            let in_mkdocstrings = flavor == MarkdownFlavor::MkDocs
1955                && crate::utils::mkdocstrings_refs::is_within_autodoc_block_ranges(autodoc_ranges, byte_offset);
1956            // Check if the ENTIRE line is within an HTML comment (not just the line start)
1957            // This ensures content after `-->` on the same line is not incorrectly skipped
1958            let line_end_offset = byte_offset + line.len();
1959            let in_html_comment = crate::utils::skip_context::is_line_entirely_in_html_comment(
1960                html_comment_ranges,
1961                byte_offset,
1962                line_end_offset,
1963            );
1964            let list_item = if !(in_code_block
1965                || is_blank
1966                || in_mkdocstrings
1967                || in_html_comment
1968                || (front_matter_end > 0 && i < front_matter_end))
1969            {
1970                // Strip blockquote prefix if present for list detection (reuse cached result)
1971                let (line_for_list_check, blockquote_prefix_len) = if let Some((prefix, content)) = blockquote_parse {
1972                    (content, prefix.len())
1973                } else {
1974                    (&**line, 0)
1975                };
1976
1977                if let Some((leading_spaces, marker, spacing, _content)) =
1978                    Self::parse_unordered_list(line_for_list_check)
1979                {
1980                    let marker_column = blockquote_prefix_len + leading_spaces.len();
1981                    let content_column = marker_column + 1 + spacing.len();
1982
1983                    // According to CommonMark spec, unordered list items MUST have at least one space
1984                    // after the marker (-, *, or +). Without a space, it's not a list item.
1985                    // This also naturally handles cases like:
1986                    // - *emphasis* (not a list)
1987                    // - **bold** (not a list)
1988                    // - --- (horizontal rule, not a list)
1989                    if spacing.is_empty() {
1990                        None
1991                    } else {
1992                        Some(ListItemInfo {
1993                            marker: marker.to_string(),
1994                            is_ordered: false,
1995                            number: None,
1996                            marker_column,
1997                            content_column,
1998                        })
1999                    }
2000                } else if let Some((leading_spaces, number_str, delimiter, spacing, content)) =
2001                    Self::parse_ordered_list(line_for_list_check)
2002                {
2003                    let marker = format!("{number_str}{delimiter}");
2004                    let marker_column = blockquote_prefix_len + leading_spaces.len();
2005                    let content_column = marker_column + marker.len() + spacing.len();
2006
2007                    // CommonMark spec: If content follows the marker, a space is required.
2008                    // But if the line ends after the marker (empty content or whitespace-only),
2009                    // no space is needed. Examples:
2010                    // - "1." (valid - no content after marker)
2011                    // - "1. " (valid - space before empty content)
2012                    // - "1. text" (valid - space before content)
2013                    // - "1.text" (INVALID - content without space)
2014                    let content_after_spacing = content.trim();
2015                    if spacing.is_empty() && !content_after_spacing.is_empty() {
2016                        None
2017                    } else {
2018                        Some(ListItemInfo {
2019                            marker,
2020                            is_ordered: true,
2021                            number: number_str.parse().ok(),
2022                            marker_column,
2023                            content_column,
2024                        })
2025                    }
2026                } else {
2027                    None
2028                }
2029            } else {
2030                None
2031            };
2032
2033            // Detect horizontal rules (only outside code blocks and frontmatter)
2034            // Uses CommonMark-compliant check including leading indentation validation
2035            let in_front_matter = front_matter_end > 0 && i < front_matter_end;
2036            let is_hr = !in_code_block && !in_front_matter && is_horizontal_rule_line(line);
2037
2038            lines.push(LineInfo {
2039                byte_offset,
2040                byte_len: line.len(),
2041                indent,
2042                visual_indent,
2043                is_blank,
2044                in_code_block,
2045                in_front_matter,
2046                in_html_block: false, // Will be populated after line creation
2047                in_html_comment,
2048                list_item,
2049                heading: None,    // Will be populated in second pass for Setext headings
2050                blockquote: None, // Will be populated after line creation
2051                in_mkdocstrings,
2052                in_esm_block: false, // Will be populated after line creation for MDX files
2053                in_code_span_continuation: false, // Will be populated after code spans are parsed
2054                is_horizontal_rule: is_hr,
2055            });
2056        }
2057
2058        lines
2059    }
2060
2061    /// Detect headings and blockquotes (called after HTML block detection)
2062    fn detect_headings_and_blockquotes(
2063        content: &str,
2064        lines: &mut [LineInfo],
2065        flavor: MarkdownFlavor,
2066        html_comment_ranges: &[crate::utils::skip_context::ByteRange],
2067        link_byte_ranges: &[(usize, usize)],
2068    ) {
2069        // Regex for heading detection
2070        static ATX_HEADING_REGEX: LazyLock<regex::Regex> =
2071            LazyLock::new(|| regex::Regex::new(r"^(\s*)(#{1,6})(\s*)(.*)$").unwrap());
2072        static SETEXT_UNDERLINE_REGEX: LazyLock<regex::Regex> =
2073            LazyLock::new(|| regex::Regex::new(r"^(\s*)(=+|-+)\s*$").unwrap());
2074
2075        let content_lines: Vec<&str> = content.lines().collect();
2076
2077        // Detect front matter boundaries to skip those lines
2078        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
2079
2080        // Detect headings (including Setext which needs look-ahead) and blockquotes
2081        for i in 0..lines.len() {
2082            if lines[i].in_code_block {
2083                continue;
2084            }
2085
2086            // Skip lines in front matter
2087            if front_matter_end > 0 && i < front_matter_end {
2088                continue;
2089            }
2090
2091            // Skip lines in HTML blocks - HTML content should not be parsed as markdown
2092            if lines[i].in_html_block {
2093                continue;
2094            }
2095
2096            let line = content_lines[i];
2097
2098            // Check for blockquotes (even on blank lines within blockquotes)
2099            if let Some(bq) = parse_blockquote_detailed(line) {
2100                let nesting_level = bq.markers.len(); // Each '>' is one level
2101                let marker_column = bq.indent.len();
2102
2103                // Build the prefix (indentation + markers + space)
2104                let prefix = format!("{}{}{}", bq.indent, bq.markers, bq.spaces_after);
2105
2106                // Check for various blockquote issues
2107                let has_no_space = bq.spaces_after.is_empty() && !bq.content.is_empty();
2108                // Only flag multiple literal spaces, not tabs
2109                // Tabs are handled by MD010 (no-hard-tabs), matching markdownlint behavior
2110                let has_multiple_spaces = bq.spaces_after.chars().filter(|&c| c == ' ').count() > 1;
2111
2112                // Check if needs MD028 fix (empty blockquote line without proper spacing)
2113                // MD028 flags empty blockquote lines that don't have a single space after the marker
2114                // Lines like "> " or ">> " are already correct and don't need fixing
2115                let needs_md028_fix = bq.content.is_empty() && bq.spaces_after.is_empty();
2116
2117                lines[i].blockquote = Some(BlockquoteInfo {
2118                    nesting_level,
2119                    indent: bq.indent.to_string(),
2120                    marker_column,
2121                    prefix,
2122                    content: bq.content.to_string(),
2123                    has_no_space_after_marker: has_no_space,
2124                    has_multiple_spaces_after_marker: has_multiple_spaces,
2125                    needs_md028_fix,
2126                });
2127            }
2128
2129            // Skip heading detection for blank lines
2130            if lines[i].is_blank {
2131                continue;
2132            }
2133
2134            // Check for ATX headings (but skip MkDocs snippet lines)
2135            // In MkDocs flavor, lines like "# -8<- [start:name]" are snippet markers, not headings
2136            let is_snippet_line = if flavor == MarkdownFlavor::MkDocs {
2137                crate::utils::mkdocs_snippets::is_snippet_section_start(line)
2138                    || crate::utils::mkdocs_snippets::is_snippet_section_end(line)
2139            } else {
2140                false
2141            };
2142
2143            if !is_snippet_line && let Some(caps) = ATX_HEADING_REGEX.captures(line) {
2144                // Skip headings inside HTML comments (using pre-computed ranges for efficiency)
2145                if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset) {
2146                    continue;
2147                }
2148                // Skip lines that fall within link syntax (e.g., multiline links like `[text](url\n#fragment)`)
2149                // This prevents false positives where `#fragment` is detected as a heading
2150                let line_offset = lines[i].byte_offset;
2151                if link_byte_ranges
2152                    .iter()
2153                    .any(|&(start, end)| line_offset > start && line_offset < end)
2154                {
2155                    continue;
2156                }
2157                let leading_spaces = caps.get(1).map_or("", |m| m.as_str());
2158                let hashes = caps.get(2).map_or("", |m| m.as_str());
2159                let spaces_after = caps.get(3).map_or("", |m| m.as_str());
2160                let rest = caps.get(4).map_or("", |m| m.as_str());
2161
2162                let level = hashes.len() as u8;
2163                let marker_column = leading_spaces.len();
2164
2165                // Check for closing sequence, but handle custom IDs that might come after
2166                let (text, has_closing, closing_seq) = {
2167                    // First check if there's a custom ID at the end
2168                    let (rest_without_id, custom_id_part) = if let Some(id_start) = rest.rfind(" {#") {
2169                        // Check if this looks like a valid custom ID (ends with })
2170                        if rest[id_start..].trim_end().ends_with('}') {
2171                            // Split off the custom ID
2172                            (&rest[..id_start], &rest[id_start..])
2173                        } else {
2174                            (rest, "")
2175                        }
2176                    } else {
2177                        (rest, "")
2178                    };
2179
2180                    // Now look for closing hashes in the part before the custom ID
2181                    let trimmed_rest = rest_without_id.trim_end();
2182                    if let Some(last_hash_byte_pos) = trimmed_rest.rfind('#') {
2183                        // Find the start of the hash sequence by walking backwards
2184                        // Use char_indices to get byte positions at char boundaries
2185                        let char_positions: Vec<(usize, char)> = trimmed_rest.char_indices().collect();
2186
2187                        // Find which char index corresponds to last_hash_byte_pos
2188                        let last_hash_char_idx = char_positions
2189                            .iter()
2190                            .position(|(byte_pos, _)| *byte_pos == last_hash_byte_pos);
2191
2192                        if let Some(mut char_idx) = last_hash_char_idx {
2193                            // Walk backwards to find start of hash sequence
2194                            while char_idx > 0 && char_positions[char_idx - 1].1 == '#' {
2195                                char_idx -= 1;
2196                            }
2197
2198                            // Get the byte position of the start of hashes
2199                            let start_of_hashes = char_positions[char_idx].0;
2200
2201                            // Check if there's at least one space before the closing hashes
2202                            let has_space_before = char_idx == 0 || char_positions[char_idx - 1].1.is_whitespace();
2203
2204                            // Check if this is a valid closing sequence (all hashes to end of trimmed part)
2205                            let potential_closing = &trimmed_rest[start_of_hashes..];
2206                            let is_all_hashes = potential_closing.chars().all(|c| c == '#');
2207
2208                            if is_all_hashes && has_space_before {
2209                                // This is a closing sequence
2210                                let closing_hashes = potential_closing.to_string();
2211                                // The text is everything before the closing hashes
2212                                // Don't include the custom ID here - it will be extracted later
2213                                let text_part = if !custom_id_part.is_empty() {
2214                                    // If we have a custom ID, append it back to get the full rest
2215                                    // This allows the extract_header_id function to handle it properly
2216                                    format!("{}{}", trimmed_rest[..start_of_hashes].trim_end(), custom_id_part)
2217                                } else {
2218                                    trimmed_rest[..start_of_hashes].trim_end().to_string()
2219                                };
2220                                (text_part, true, closing_hashes)
2221                            } else {
2222                                // Not a valid closing sequence, return the full content
2223                                (rest.to_string(), false, String::new())
2224                            }
2225                        } else {
2226                            // Couldn't find char boundary, return the full content
2227                            (rest.to_string(), false, String::new())
2228                        }
2229                    } else {
2230                        // No hashes found, return the full content
2231                        (rest.to_string(), false, String::new())
2232                    }
2233                };
2234
2235                let content_column = marker_column + hashes.len() + spaces_after.len();
2236
2237                // Extract custom header ID if present
2238                let raw_text = text.trim().to_string();
2239                let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2240
2241                // If no custom ID was found on the header line, check the next line for standalone attr-list
2242                if custom_id.is_none() && i + 1 < content_lines.len() && i + 1 < lines.len() {
2243                    let next_line = content_lines[i + 1];
2244                    if !lines[i + 1].in_code_block
2245                        && crate::utils::header_id_utils::is_standalone_attr_list(next_line)
2246                        && let Some(next_line_id) =
2247                            crate::utils::header_id_utils::extract_standalone_attr_list_id(next_line)
2248                    {
2249                        custom_id = Some(next_line_id);
2250                    }
2251                }
2252
2253                // ATX heading is "valid" for processing by heading rules if:
2254                // 1. Has space after # (CommonMark compliant): `# Heading`
2255                // 2. Is empty (just hashes): `#`
2256                // 3. Has multiple hashes (##intro is likely intended heading, not hashtag)
2257                // 4. Content starts with uppercase (likely intended heading, not social hashtag)
2258                //
2259                // Invalid patterns (hashtag-like) are skipped by most heading rules:
2260                // - `#tag` - single # with lowercase (social hashtag)
2261                // - `#123` - single # with number (GitHub issue ref)
2262                let is_valid = !spaces_after.is_empty()
2263                    || rest.is_empty()
2264                    || level > 1
2265                    || rest.trim().chars().next().is_some_and(|c| c.is_uppercase());
2266
2267                lines[i].heading = Some(HeadingInfo {
2268                    level,
2269                    style: HeadingStyle::ATX,
2270                    marker: hashes.to_string(),
2271                    marker_column,
2272                    content_column,
2273                    text: clean_text,
2274                    custom_id,
2275                    raw_text,
2276                    has_closing_sequence: has_closing,
2277                    closing_sequence: closing_seq,
2278                    is_valid,
2279                });
2280            }
2281            // Check for Setext headings (need to look at next line)
2282            else if i + 1 < content_lines.len() && i + 1 < lines.len() {
2283                let next_line = content_lines[i + 1];
2284                if !lines[i + 1].in_code_block && SETEXT_UNDERLINE_REGEX.is_match(next_line) {
2285                    // Skip if next line is front matter delimiter
2286                    if front_matter_end > 0 && i < front_matter_end {
2287                        continue;
2288                    }
2289
2290                    // Skip Setext headings inside HTML comments (using pre-computed ranges for efficiency)
2291                    if crate::utils::skip_context::is_in_html_comment_ranges(html_comment_ranges, lines[i].byte_offset)
2292                    {
2293                        continue;
2294                    }
2295
2296                    let underline = next_line.trim();
2297
2298                    let level = if underline.starts_with('=') { 1 } else { 2 };
2299                    let style = if level == 1 {
2300                        HeadingStyle::Setext1
2301                    } else {
2302                        HeadingStyle::Setext2
2303                    };
2304
2305                    // Extract custom header ID if present
2306                    let raw_text = line.trim().to_string();
2307                    let (clean_text, mut custom_id) = crate::utils::header_id_utils::extract_header_id(&raw_text);
2308
2309                    // If no custom ID was found on the header line, check the line after underline for standalone attr-list
2310                    if custom_id.is_none() && i + 2 < content_lines.len() && i + 2 < lines.len() {
2311                        let attr_line = content_lines[i + 2];
2312                        if !lines[i + 2].in_code_block
2313                            && crate::utils::header_id_utils::is_standalone_attr_list(attr_line)
2314                            && let Some(attr_line_id) =
2315                                crate::utils::header_id_utils::extract_standalone_attr_list_id(attr_line)
2316                        {
2317                            custom_id = Some(attr_line_id);
2318                        }
2319                    }
2320
2321                    lines[i].heading = Some(HeadingInfo {
2322                        level,
2323                        style,
2324                        marker: underline.to_string(),
2325                        marker_column: next_line.len() - next_line.trim_start().len(),
2326                        content_column: lines[i].indent,
2327                        text: clean_text,
2328                        custom_id,
2329                        raw_text,
2330                        has_closing_sequence: false,
2331                        closing_sequence: String::new(),
2332                        is_valid: true, // Setext headings are always valid
2333                    });
2334                }
2335            }
2336        }
2337    }
2338
2339    /// Detect HTML blocks in the content
2340    fn detect_html_blocks(content: &str, lines: &mut [LineInfo]) {
2341        // HTML block elements that trigger block context
2342        // Includes HTML5 media, embedded content, and interactive elements
2343        const BLOCK_ELEMENTS: &[&str] = &[
2344            "address",
2345            "article",
2346            "aside",
2347            "audio",
2348            "blockquote",
2349            "canvas",
2350            "details",
2351            "dialog",
2352            "dd",
2353            "div",
2354            "dl",
2355            "dt",
2356            "embed",
2357            "fieldset",
2358            "figcaption",
2359            "figure",
2360            "footer",
2361            "form",
2362            "h1",
2363            "h2",
2364            "h3",
2365            "h4",
2366            "h5",
2367            "h6",
2368            "header",
2369            "hr",
2370            "iframe",
2371            "li",
2372            "main",
2373            "menu",
2374            "nav",
2375            "noscript",
2376            "object",
2377            "ol",
2378            "p",
2379            "picture",
2380            "pre",
2381            "script",
2382            "search",
2383            "section",
2384            "source",
2385            "style",
2386            "summary",
2387            "svg",
2388            "table",
2389            "tbody",
2390            "td",
2391            "template",
2392            "textarea",
2393            "tfoot",
2394            "th",
2395            "thead",
2396            "tr",
2397            "track",
2398            "ul",
2399            "video",
2400        ];
2401
2402        let mut i = 0;
2403        while i < lines.len() {
2404            // Skip if already in code block or front matter
2405            if lines[i].in_code_block || lines[i].in_front_matter {
2406                i += 1;
2407                continue;
2408            }
2409
2410            let trimmed = lines[i].content(content).trim_start();
2411
2412            // Check if line starts with an HTML tag
2413            if trimmed.starts_with('<') && trimmed.len() > 1 {
2414                // Extract tag name safely
2415                let after_bracket = &trimmed[1..];
2416                let is_closing = after_bracket.starts_with('/');
2417                let tag_start = if is_closing { &after_bracket[1..] } else { after_bracket };
2418
2419                // Extract tag name (stop at space, >, /, or end of string)
2420                let tag_name = tag_start
2421                    .chars()
2422                    .take_while(|c| c.is_ascii_alphabetic() || *c == '-' || c.is_ascii_digit())
2423                    .collect::<String>()
2424                    .to_lowercase();
2425
2426                // Check if it's a block element
2427                if !tag_name.is_empty() && BLOCK_ELEMENTS.contains(&tag_name.as_str()) {
2428                    // Mark this line as in HTML block
2429                    lines[i].in_html_block = true;
2430
2431                    // For simplicity, just mark lines until we find a closing tag or reach a blank line
2432                    // This avoids complex nesting logic that might cause infinite loops
2433                    if !is_closing {
2434                        let closing_tag = format!("</{tag_name}>");
2435                        // style and script tags can contain blank lines (CSS/JS formatting)
2436                        let allow_blank_lines = tag_name == "style" || tag_name == "script";
2437                        let mut j = i + 1;
2438                        let mut found_closing_tag = false;
2439                        while j < lines.len() && j < i + 100 {
2440                            // Limit search to 100 lines
2441                            // Stop at blank lines (except for style/script tags)
2442                            if !allow_blank_lines && lines[j].is_blank {
2443                                break;
2444                            }
2445
2446                            lines[j].in_html_block = true;
2447
2448                            // Check if this line contains the closing tag
2449                            if lines[j].content(content).contains(&closing_tag) {
2450                                found_closing_tag = true;
2451                            }
2452
2453                            // After finding closing tag, continue marking lines as
2454                            // in_html_block until blank line (per CommonMark spec)
2455                            if found_closing_tag {
2456                                j += 1;
2457                                // Continue marking subsequent lines until blank
2458                                while j < lines.len() && j < i + 100 {
2459                                    if lines[j].is_blank {
2460                                        break;
2461                                    }
2462                                    lines[j].in_html_block = true;
2463                                    j += 1;
2464                                }
2465                                break;
2466                            }
2467                            j += 1;
2468                        }
2469                    }
2470                }
2471            }
2472
2473            i += 1;
2474        }
2475    }
2476
2477    /// Detect ESM import/export blocks in MDX files
2478    /// ESM blocks consist of contiguous import/export statements at the top of the file
2479    fn detect_esm_blocks(content: &str, lines: &mut [LineInfo], flavor: MarkdownFlavor) {
2480        // Only process MDX files
2481        if !flavor.supports_esm_blocks() {
2482            return;
2483        }
2484
2485        let mut in_multiline_comment = false;
2486
2487        for line in lines.iter_mut() {
2488            // Skip blank lines and HTML comments
2489            if line.is_blank || line.in_html_comment {
2490                continue;
2491            }
2492
2493            let trimmed = line.content(content).trim_start();
2494
2495            // Handle continuation of multi-line JS comments
2496            if in_multiline_comment {
2497                if trimmed.contains("*/") {
2498                    in_multiline_comment = false;
2499                }
2500                continue;
2501            }
2502
2503            // Skip single-line JS comments (// and ///)
2504            if trimmed.starts_with("//") {
2505                continue;
2506            }
2507
2508            // Handle start of multi-line JS comment
2509            if trimmed.starts_with("/*") {
2510                if !trimmed.contains("*/") {
2511                    in_multiline_comment = true;
2512                }
2513                continue;
2514            }
2515
2516            // Check if line starts with import or export
2517            if trimmed.starts_with("import ") || trimmed.starts_with("export ") {
2518                line.in_esm_block = true;
2519            } else {
2520                // Once we hit a non-ESM, non-comment line, we're done with the ESM block
2521                break;
2522            }
2523        }
2524    }
2525
2526    /// Parse all inline code spans in the content using pulldown-cmark streaming parser
2527    fn parse_code_spans(content: &str, lines: &[LineInfo]) -> Vec<CodeSpan> {
2528        let mut code_spans = Vec::new();
2529
2530        // Quick check - if no backticks, no code spans
2531        if !content.contains('`') {
2532            return code_spans;
2533        }
2534
2535        // Use pulldown-cmark's streaming parser with byte offsets
2536        let parser = Parser::new(content).into_offset_iter();
2537
2538        for (event, range) in parser {
2539            if let Event::Code(_) = event {
2540                let start_pos = range.start;
2541                let end_pos = range.end;
2542
2543                // The range includes the backticks, extract the actual content
2544                let full_span = &content[start_pos..end_pos];
2545                let backtick_count = full_span.chars().take_while(|&c| c == '`').count();
2546
2547                // Extract content between backticks, preserving spaces
2548                let content_start = start_pos + backtick_count;
2549                let content_end = end_pos - backtick_count;
2550                let span_content = if content_start < content_end {
2551                    content[content_start..content_end].to_string()
2552                } else {
2553                    String::new()
2554                };
2555
2556                // Use binary search to find line number - O(log n) instead of O(n)
2557                // Find the rightmost line whose byte_offset <= start_pos
2558                let line_idx = lines
2559                    .partition_point(|line| line.byte_offset <= start_pos)
2560                    .saturating_sub(1);
2561                let line_num = line_idx + 1;
2562                let byte_col_start = start_pos - lines[line_idx].byte_offset;
2563
2564                // Find end column using binary search
2565                let end_line_idx = lines
2566                    .partition_point(|line| line.byte_offset <= end_pos)
2567                    .saturating_sub(1);
2568                let byte_col_end = end_pos - lines[end_line_idx].byte_offset;
2569
2570                // Convert byte offsets to character positions for correct Unicode handling
2571                // This ensures consistency with warning.column which uses character positions
2572                let line_content = lines[line_idx].content(content);
2573                let col_start = if byte_col_start <= line_content.len() {
2574                    line_content[..byte_col_start].chars().count()
2575                } else {
2576                    line_content.chars().count()
2577                };
2578
2579                let end_line_content = lines[end_line_idx].content(content);
2580                let col_end = if byte_col_end <= end_line_content.len() {
2581                    end_line_content[..byte_col_end].chars().count()
2582                } else {
2583                    end_line_content.chars().count()
2584                };
2585
2586                code_spans.push(CodeSpan {
2587                    line: line_num,
2588                    end_line: end_line_idx + 1,
2589                    start_col: col_start,
2590                    end_col: col_end,
2591                    byte_offset: start_pos,
2592                    byte_end: end_pos,
2593                    backtick_count,
2594                    content: span_content,
2595                });
2596            }
2597        }
2598
2599        // Sort by position to ensure consistent ordering
2600        code_spans.sort_by_key(|span| span.byte_offset);
2601
2602        code_spans
2603    }
2604
2605    /// Parse all list blocks in the content (legacy line-by-line approach)
2606    ///
2607    /// Uses a forward-scanning O(n) algorithm that tracks two variables during iteration:
2608    /// - `has_list_breaking_content_since_last_item`: Set when encountering content that
2609    ///   terminates a list (headings, horizontal rules, tables, insufficiently indented content)
2610    /// - `min_continuation_for_tracking`: Minimum indentation required for content to be
2611    ///   treated as list continuation (based on the list marker width)
2612    ///
2613    /// When a new list item is encountered, we check if list-breaking content was seen
2614    /// since the last item. If so, we start a new list block.
2615    fn parse_list_blocks(content: &str, lines: &[LineInfo]) -> Vec<ListBlock> {
2616        // Minimum indentation for unordered list continuation per CommonMark spec
2617        const UNORDERED_LIST_MIN_CONTINUATION_INDENT: usize = 2;
2618
2619        /// Initialize or reset the forward-scanning tracking state.
2620        /// This helper eliminates code duplication across three initialization sites.
2621        #[inline]
2622        fn reset_tracking_state(
2623            list_item: &ListItemInfo,
2624            has_list_breaking_content: &mut bool,
2625            min_continuation: &mut usize,
2626        ) {
2627            *has_list_breaking_content = false;
2628            let marker_width = if list_item.is_ordered {
2629                list_item.marker.len() + 1 // Ordered markers need space after period/paren
2630            } else {
2631                list_item.marker.len()
2632            };
2633            *min_continuation = if list_item.is_ordered {
2634                marker_width
2635            } else {
2636                UNORDERED_LIST_MIN_CONTINUATION_INDENT
2637            };
2638        }
2639
2640        // Pre-size based on lines that could be list items
2641        let mut list_blocks = Vec::with_capacity(lines.len() / 10); // Estimate ~10% of lines might start list blocks
2642        let mut current_block: Option<ListBlock> = None;
2643        let mut last_list_item_line = 0;
2644        let mut current_indent_level = 0;
2645        let mut last_marker_width = 0;
2646
2647        // Track list-breaking content since last item (fixes O(n²) bottleneck from issue #148)
2648        let mut has_list_breaking_content_since_last_item = false;
2649        let mut min_continuation_for_tracking = 0;
2650
2651        for (line_idx, line_info) in lines.iter().enumerate() {
2652            let line_num = line_idx + 1;
2653
2654            // Enhanced code block handling using Design #3's context analysis
2655            if line_info.in_code_block {
2656                if let Some(ref mut block) = current_block {
2657                    // Calculate minimum indentation for list continuation
2658                    let min_continuation_indent =
2659                        CodeBlockUtils::calculate_min_continuation_indent(content, lines, line_idx);
2660
2661                    // Analyze code block context using the three-tier classification
2662                    let context = CodeBlockUtils::analyze_code_block_context(lines, line_idx, min_continuation_indent);
2663
2664                    match context {
2665                        CodeBlockContext::Indented => {
2666                            // Code block is properly indented - continues the list
2667                            block.end_line = line_num;
2668                            continue;
2669                        }
2670                        CodeBlockContext::Standalone => {
2671                            // Code block separates lists - end current block
2672                            let completed_block = current_block.take().unwrap();
2673                            list_blocks.push(completed_block);
2674                            continue;
2675                        }
2676                        CodeBlockContext::Adjacent => {
2677                            // Edge case - use conservative behavior (continue list)
2678                            block.end_line = line_num;
2679                            continue;
2680                        }
2681                    }
2682                } else {
2683                    // No current list block - skip code block lines
2684                    continue;
2685                }
2686            }
2687
2688            // Extract blockquote prefix if any
2689            let blockquote_prefix = if let Some(caps) = BLOCKQUOTE_PREFIX_REGEX.captures(line_info.content(content)) {
2690                caps.get(0).unwrap().as_str().to_string()
2691            } else {
2692                String::new()
2693            };
2694
2695            // Track list-breaking content for non-list, non-blank lines (O(n) replacement for nested loop)
2696            // Skip lines that are continuations of multi-line code spans - they're part of the previous list item
2697            if current_block.is_some()
2698                && line_info.list_item.is_none()
2699                && !line_info.is_blank
2700                && !line_info.in_code_span_continuation
2701            {
2702                let line_content = line_info.content(content).trim();
2703
2704                // Check for structural separators that break lists
2705                // Note: Lazy continuation (indent=0) is valid in CommonMark and should NOT break lists.
2706                // Only lines with indent between 1 and min_continuation_for_tracking-1 break lists,
2707                // as they indicate improper indentation rather than lazy continuation.
2708                let is_lazy_continuation = line_info.indent == 0 && !line_info.is_blank;
2709                let breaks_list = line_info.heading.is_some()
2710                    || line_content.starts_with("---")
2711                    || line_content.starts_with("***")
2712                    || line_content.starts_with("___")
2713                    || crate::utils::skip_context::is_table_line(line_content)
2714                    || line_content.starts_with(">")
2715                    || (line_info.indent > 0
2716                        && line_info.indent < min_continuation_for_tracking
2717                        && !is_lazy_continuation);
2718
2719                if breaks_list {
2720                    has_list_breaking_content_since_last_item = true;
2721                }
2722            }
2723
2724            // If this line is a code span continuation within an active list block,
2725            // extend the block's end_line to include this line (maintains list continuity)
2726            if line_info.in_code_span_continuation
2727                && line_info.list_item.is_none()
2728                && let Some(ref mut block) = current_block
2729            {
2730                block.end_line = line_num;
2731            }
2732
2733            // Extend block.end_line for regular continuation lines (non-list-item, non-blank,
2734            // properly indented lines within the list). This ensures the workaround at line 2448
2735            // works correctly when there are multiple continuation lines before a nested list item.
2736            // Also include lazy continuation lines (indent=0) per CommonMark spec.
2737            let is_valid_continuation =
2738                line_info.indent >= min_continuation_for_tracking || (line_info.indent == 0 && !line_info.is_blank); // Lazy continuation
2739            if !line_info.in_code_span_continuation
2740                && line_info.list_item.is_none()
2741                && !line_info.is_blank
2742                && !line_info.in_code_block
2743                && is_valid_continuation
2744                && let Some(ref mut block) = current_block
2745            {
2746                block.end_line = line_num;
2747            }
2748
2749            // Check if this line is a list item
2750            if let Some(list_item) = &line_info.list_item {
2751                // Calculate nesting level based on indentation
2752                let item_indent = list_item.marker_column;
2753                let nesting = item_indent / 2; // Assume 2-space indentation for nesting
2754
2755                if let Some(ref mut block) = current_block {
2756                    // Check if this continues the current block
2757                    // For nested lists, we need to check if this is a nested item (higher nesting level)
2758                    // or a continuation at the same or lower level
2759                    let is_nested = nesting > block.nesting_level;
2760                    let same_type =
2761                        (block.is_ordered && list_item.is_ordered) || (!block.is_ordered && !list_item.is_ordered);
2762                    let same_context = block.blockquote_prefix == blockquote_prefix;
2763                    // Allow one blank line after last item, or lines immediately after block content
2764                    let reasonable_distance = line_num <= last_list_item_line + 2 || line_num == block.end_line + 1;
2765
2766                    // For unordered lists, also check marker consistency
2767                    let marker_compatible =
2768                        block.is_ordered || block.marker.is_none() || block.marker.as_ref() == Some(&list_item.marker);
2769
2770                    // O(1) check: Use the tracked variable instead of O(n) nested loop
2771                    // This eliminates the quadratic bottleneck from issue #148
2772                    let has_non_list_content = has_list_breaking_content_since_last_item;
2773
2774                    // A list continues if:
2775                    // 1. It's a nested item (indented more than the parent), OR
2776                    // 2. It's the same type at the same level with reasonable distance
2777                    let mut continues_list = if is_nested {
2778                        // Nested items always continue the list if they're in the same context
2779                        same_context && reasonable_distance && !has_non_list_content
2780                    } else {
2781                        // Same-level items need to match type and markers
2782                        same_type && same_context && reasonable_distance && marker_compatible && !has_non_list_content
2783                    };
2784
2785                    // WORKAROUND: If items are truly consecutive (no blank lines), they MUST be in the same list
2786                    // This handles edge cases where content patterns might otherwise split lists incorrectly
2787                    if !continues_list && reasonable_distance && line_num > 0 && block.end_line == line_num - 1 {
2788                        // Check if the previous line was a list item or a continuation of a list item
2789                        // (including lazy continuation lines)
2790                        if block.item_lines.contains(&(line_num - 1)) {
2791                            // They're consecutive list items - force them to be in the same list
2792                            continues_list = true;
2793                        } else {
2794                            // Previous line is a continuation line within this block
2795                            // (e.g., lazy continuation with indent=0)
2796                            // Since block.end_line == line_num - 1, we know line_num - 1 is part of this block
2797                            continues_list = true;
2798                        }
2799                    }
2800
2801                    if continues_list {
2802                        // Extend current block
2803                        block.end_line = line_num;
2804                        block.item_lines.push(line_num);
2805
2806                        // Update max marker width
2807                        block.max_marker_width = block.max_marker_width.max(if list_item.is_ordered {
2808                            list_item.marker.len() + 1
2809                        } else {
2810                            list_item.marker.len()
2811                        });
2812
2813                        // Update marker consistency for unordered lists
2814                        if !block.is_ordered
2815                            && block.marker.is_some()
2816                            && block.marker.as_ref() != Some(&list_item.marker)
2817                        {
2818                            // Mixed markers, clear the marker field
2819                            block.marker = None;
2820                        }
2821
2822                        // Reset tracked state for issue #148 optimization
2823                        reset_tracking_state(
2824                            list_item,
2825                            &mut has_list_breaking_content_since_last_item,
2826                            &mut min_continuation_for_tracking,
2827                        );
2828                    } else {
2829                        // End current block and start a new one
2830
2831                        list_blocks.push(block.clone());
2832
2833                        *block = ListBlock {
2834                            start_line: line_num,
2835                            end_line: line_num,
2836                            is_ordered: list_item.is_ordered,
2837                            marker: if list_item.is_ordered {
2838                                None
2839                            } else {
2840                                Some(list_item.marker.clone())
2841                            },
2842                            blockquote_prefix: blockquote_prefix.clone(),
2843                            item_lines: vec![line_num],
2844                            nesting_level: nesting,
2845                            max_marker_width: if list_item.is_ordered {
2846                                list_item.marker.len() + 1
2847                            } else {
2848                                list_item.marker.len()
2849                            },
2850                        };
2851
2852                        // Initialize tracked state for new block (issue #148 optimization)
2853                        reset_tracking_state(
2854                            list_item,
2855                            &mut has_list_breaking_content_since_last_item,
2856                            &mut min_continuation_for_tracking,
2857                        );
2858                    }
2859                } else {
2860                    // Start a new block
2861                    current_block = Some(ListBlock {
2862                        start_line: line_num,
2863                        end_line: line_num,
2864                        is_ordered: list_item.is_ordered,
2865                        marker: if list_item.is_ordered {
2866                            None
2867                        } else {
2868                            Some(list_item.marker.clone())
2869                        },
2870                        blockquote_prefix,
2871                        item_lines: vec![line_num],
2872                        nesting_level: nesting,
2873                        max_marker_width: list_item.marker.len(),
2874                    });
2875
2876                    // Initialize tracked state for new block (issue #148 optimization)
2877                    reset_tracking_state(
2878                        list_item,
2879                        &mut has_list_breaking_content_since_last_item,
2880                        &mut min_continuation_for_tracking,
2881                    );
2882                }
2883
2884                last_list_item_line = line_num;
2885                current_indent_level = item_indent;
2886                last_marker_width = if list_item.is_ordered {
2887                    list_item.marker.len() + 1 // Add 1 for the space after ordered list markers
2888                } else {
2889                    list_item.marker.len()
2890                };
2891            } else if let Some(ref mut block) = current_block {
2892                // Not a list item - check if it continues the current block
2893
2894                // For MD032 compatibility, we use a simple approach:
2895                // - Indented lines continue the list
2896                // - Blank lines followed by indented content continue the list
2897                // - Everything else ends the list
2898
2899                // Check if the last line in the list block ended with a backslash (hard line break)
2900                // This handles cases where list items use backslash for hard line breaks
2901                let prev_line_ends_with_backslash = if block.end_line > 0 && block.end_line - 1 < lines.len() {
2902                    lines[block.end_line - 1].content(content).trim_end().ends_with('\\')
2903                } else {
2904                    false
2905                };
2906
2907                // Calculate minimum indentation for list continuation
2908                // For ordered lists, use the last marker width (e.g., 3 for "1. ", 4 for "10. ")
2909                // For unordered lists like "- ", content starts at column 2, so continuations need at least 2 spaces
2910                let min_continuation_indent = if block.is_ordered {
2911                    current_indent_level + last_marker_width
2912                } else {
2913                    current_indent_level + 2 // Unordered lists need at least 2 spaces (e.g., "- " = 2 chars)
2914                };
2915
2916                if prev_line_ends_with_backslash || line_info.indent >= min_continuation_indent {
2917                    // Indented line or backslash continuation continues the list
2918                    block.end_line = line_num;
2919                } else if line_info.is_blank {
2920                    // Blank line - check if it's internal to the list or ending it
2921                    // We only include blank lines that are followed by more list content
2922                    let mut check_idx = line_idx + 1;
2923                    let mut found_continuation = false;
2924
2925                    // Skip additional blank lines
2926                    while check_idx < lines.len() && lines[check_idx].is_blank {
2927                        check_idx += 1;
2928                    }
2929
2930                    if check_idx < lines.len() {
2931                        let next_line = &lines[check_idx];
2932                        // Check if followed by indented content (list continuation)
2933                        if !next_line.in_code_block && next_line.indent >= min_continuation_indent {
2934                            found_continuation = true;
2935                        }
2936                        // Check if followed by another list item at the same level
2937                        else if !next_line.in_code_block
2938                            && next_line.list_item.is_some()
2939                            && let Some(item) = &next_line.list_item
2940                        {
2941                            let next_blockquote_prefix = BLOCKQUOTE_PREFIX_REGEX
2942                                .find(next_line.content(content))
2943                                .map_or(String::new(), |m| m.as_str().to_string());
2944                            if item.marker_column == current_indent_level
2945                                && item.is_ordered == block.is_ordered
2946                                && block.blockquote_prefix.trim() == next_blockquote_prefix.trim()
2947                            {
2948                                // Check if there was meaningful content between the list items (unused now)
2949                                // This variable is kept for potential future use but is currently replaced by has_structural_separators
2950                                let _has_meaningful_content = (line_idx + 1..check_idx).any(|idx| {
2951                                    if let Some(between_line) = lines.get(idx) {
2952                                        let between_content = between_line.content(content);
2953                                        let trimmed = between_content.trim();
2954                                        // Skip empty lines
2955                                        if trimmed.is_empty() {
2956                                            return false;
2957                                        }
2958                                        // Check for meaningful content
2959                                        let line_indent = between_content.len() - between_content.trim_start().len();
2960
2961                                        // Structural separators (code fences, headings, etc.) are meaningful and should BREAK lists
2962                                        if trimmed.starts_with("```")
2963                                            || trimmed.starts_with("~~~")
2964                                            || trimmed.starts_with("---")
2965                                            || trimmed.starts_with("***")
2966                                            || trimmed.starts_with("___")
2967                                            || trimmed.starts_with(">")
2968                                            || crate::utils::skip_context::is_table_line(trimmed)
2969                                            || between_line.heading.is_some()
2970                                        {
2971                                            return true; // These are structural separators - meaningful content that breaks lists
2972                                        }
2973
2974                                        // Only properly indented content continues the list
2975                                        line_indent >= min_continuation_indent
2976                                    } else {
2977                                        false
2978                                    }
2979                                });
2980
2981                                if block.is_ordered {
2982                                    // For ordered lists: don't continue if there are structural separators
2983                                    // Check if there are structural separators between the list items
2984                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
2985                                        if let Some(between_line) = lines.get(idx) {
2986                                            let trimmed = between_line.content(content).trim();
2987                                            if trimmed.is_empty() {
2988                                                return false;
2989                                            }
2990                                            // Check for structural separators that break lists
2991                                            trimmed.starts_with("```")
2992                                                || trimmed.starts_with("~~~")
2993                                                || trimmed.starts_with("---")
2994                                                || trimmed.starts_with("***")
2995                                                || trimmed.starts_with("___")
2996                                                || trimmed.starts_with(">")
2997                                                || crate::utils::skip_context::is_table_line(trimmed)
2998                                                || between_line.heading.is_some()
2999                                        } else {
3000                                            false
3001                                        }
3002                                    });
3003                                    found_continuation = !has_structural_separators;
3004                                } else {
3005                                    // For unordered lists: also check for structural separators
3006                                    let has_structural_separators = (line_idx + 1..check_idx).any(|idx| {
3007                                        if let Some(between_line) = lines.get(idx) {
3008                                            let trimmed = between_line.content(content).trim();
3009                                            if trimmed.is_empty() {
3010                                                return false;
3011                                            }
3012                                            // Check for structural separators that break lists
3013                                            trimmed.starts_with("```")
3014                                                || trimmed.starts_with("~~~")
3015                                                || trimmed.starts_with("---")
3016                                                || trimmed.starts_with("***")
3017                                                || trimmed.starts_with("___")
3018                                                || trimmed.starts_with(">")
3019                                                || crate::utils::skip_context::is_table_line(trimmed)
3020                                                || between_line.heading.is_some()
3021                                        } else {
3022                                            false
3023                                        }
3024                                    });
3025                                    found_continuation = !has_structural_separators;
3026                                }
3027                            }
3028                        }
3029                    }
3030
3031                    if found_continuation {
3032                        // Include the blank line in the block
3033                        block.end_line = line_num;
3034                    } else {
3035                        // Blank line ends the list - don't include it
3036                        list_blocks.push(block.clone());
3037                        current_block = None;
3038                    }
3039                } else {
3040                    // Check for lazy continuation - non-indented line immediately after a list item
3041                    // But only if the line has sufficient indentation for the list type
3042                    let min_required_indent = if block.is_ordered {
3043                        current_indent_level + last_marker_width
3044                    } else {
3045                        current_indent_level + 2
3046                    };
3047
3048                    // For lazy continuation to apply, the line must either:
3049                    // 1. Have no indentation (true lazy continuation)
3050                    // 2. Have sufficient indentation for the list type
3051                    // BUT structural separators (headings, code blocks, etc.) should never be lazy continuations
3052                    let line_content = line_info.content(content).trim();
3053
3054                    // Check for table-like patterns
3055                    let looks_like_table = crate::utils::skip_context::is_table_line(line_content);
3056
3057                    let is_structural_separator = line_info.heading.is_some()
3058                        || line_content.starts_with("```")
3059                        || line_content.starts_with("~~~")
3060                        || line_content.starts_with("---")
3061                        || line_content.starts_with("***")
3062                        || line_content.starts_with("___")
3063                        || line_content.starts_with(">")
3064                        || looks_like_table;
3065
3066                    // Allow lazy continuation if we're still within the same list block
3067                    // (not just immediately after a list item)
3068                    let is_lazy_continuation = !is_structural_separator
3069                        && !line_info.is_blank
3070                        && (line_info.indent == 0 || line_info.indent >= min_required_indent);
3071
3072                    if is_lazy_continuation {
3073                        // Additional check: if the line starts with uppercase and looks like a new sentence,
3074                        // it's probably not a continuation
3075                        let content_to_check = if !blockquote_prefix.is_empty() {
3076                            // Strip blockquote prefix to check the actual content
3077                            line_info
3078                                .content(content)
3079                                .strip_prefix(&blockquote_prefix)
3080                                .unwrap_or(line_info.content(content))
3081                                .trim()
3082                        } else {
3083                            line_info.content(content).trim()
3084                        };
3085
3086                        let starts_with_uppercase = content_to_check.chars().next().is_some_and(|c| c.is_uppercase());
3087
3088                        // If it starts with uppercase and the previous line ended with punctuation,
3089                        // it's likely a new paragraph, not a continuation
3090                        if starts_with_uppercase && last_list_item_line > 0 {
3091                            // This looks like a new paragraph
3092                            list_blocks.push(block.clone());
3093                            current_block = None;
3094                        } else {
3095                            // This is a lazy continuation line
3096                            block.end_line = line_num;
3097                        }
3098                    } else {
3099                        // Non-indented, non-blank line that's not a lazy continuation - end the block
3100                        list_blocks.push(block.clone());
3101                        current_block = None;
3102                    }
3103                }
3104            }
3105        }
3106
3107        // Don't forget the last block
3108        if let Some(block) = current_block {
3109            list_blocks.push(block);
3110        }
3111
3112        // Merge adjacent blocks that should be one
3113        merge_adjacent_list_blocks(content, &mut list_blocks, lines);
3114
3115        list_blocks
3116    }
3117
3118    /// Compute character frequency for fast content analysis
3119    fn compute_char_frequency(content: &str) -> CharFrequency {
3120        let mut frequency = CharFrequency::default();
3121
3122        for ch in content.chars() {
3123            match ch {
3124                '#' => frequency.hash_count += 1,
3125                '*' => frequency.asterisk_count += 1,
3126                '_' => frequency.underscore_count += 1,
3127                '-' => frequency.hyphen_count += 1,
3128                '+' => frequency.plus_count += 1,
3129                '>' => frequency.gt_count += 1,
3130                '|' => frequency.pipe_count += 1,
3131                '[' => frequency.bracket_count += 1,
3132                '`' => frequency.backtick_count += 1,
3133                '<' => frequency.lt_count += 1,
3134                '!' => frequency.exclamation_count += 1,
3135                '\n' => frequency.newline_count += 1,
3136                _ => {}
3137            }
3138        }
3139
3140        frequency
3141    }
3142
3143    /// Parse HTML tags in the content
3144    fn parse_html_tags(
3145        content: &str,
3146        lines: &[LineInfo],
3147        code_blocks: &[(usize, usize)],
3148        flavor: MarkdownFlavor,
3149    ) -> Vec<HtmlTag> {
3150        static HTML_TAG_REGEX: LazyLock<regex::Regex> =
3151            LazyLock::new(|| regex::Regex::new(r"(?i)<(/?)([a-zA-Z][a-zA-Z0-9-]*)(?:\s+[^>]*?)?\s*(/?)>").unwrap());
3152
3153        let mut html_tags = Vec::with_capacity(content.matches('<').count());
3154
3155        for cap in HTML_TAG_REGEX.captures_iter(content) {
3156            let full_match = cap.get(0).unwrap();
3157            let match_start = full_match.start();
3158            let match_end = full_match.end();
3159
3160            // Skip if in code block
3161            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3162                continue;
3163            }
3164
3165            let is_closing = !cap.get(1).unwrap().as_str().is_empty();
3166            let tag_name_original = cap.get(2).unwrap().as_str();
3167            let tag_name = tag_name_original.to_lowercase();
3168            let is_self_closing = !cap.get(3).unwrap().as_str().is_empty();
3169
3170            // Skip JSX components in MDX files (tags starting with uppercase letter)
3171            // JSX components like <Chart />, <MyComponent> should not be treated as HTML
3172            if flavor.supports_jsx() && tag_name_original.chars().next().is_some_and(|c| c.is_uppercase()) {
3173                continue;
3174            }
3175
3176            // Find which line this tag is on
3177            let mut line_num = 1;
3178            let mut col_start = match_start;
3179            let mut col_end = match_end;
3180            for (idx, line_info) in lines.iter().enumerate() {
3181                if match_start >= line_info.byte_offset {
3182                    line_num = idx + 1;
3183                    col_start = match_start - line_info.byte_offset;
3184                    col_end = match_end - line_info.byte_offset;
3185                } else {
3186                    break;
3187                }
3188            }
3189
3190            html_tags.push(HtmlTag {
3191                line: line_num,
3192                start_col: col_start,
3193                end_col: col_end,
3194                byte_offset: match_start,
3195                byte_end: match_end,
3196                tag_name,
3197                is_closing,
3198                is_self_closing,
3199                raw_content: full_match.as_str().to_string(),
3200            });
3201        }
3202
3203        html_tags
3204    }
3205
3206    /// Parse emphasis spans in the content
3207    fn parse_emphasis_spans(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<EmphasisSpan> {
3208        static EMPHASIS_REGEX: LazyLock<regex::Regex> =
3209            LazyLock::new(|| regex::Regex::new(r"(\*{1,3}|_{1,3})([^*_\s][^*_]*?)(\*{1,3}|_{1,3})").unwrap());
3210
3211        let mut emphasis_spans = Vec::with_capacity(content.matches('*').count() + content.matches('_').count() / 4);
3212
3213        for cap in EMPHASIS_REGEX.captures_iter(content) {
3214            let full_match = cap.get(0).unwrap();
3215            let match_start = full_match.start();
3216            let match_end = full_match.end();
3217
3218            // Skip if in code block
3219            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3220                continue;
3221            }
3222
3223            let opening_markers = cap.get(1).unwrap().as_str();
3224            let content_part = cap.get(2).unwrap().as_str();
3225            let closing_markers = cap.get(3).unwrap().as_str();
3226
3227            // Validate matching markers
3228            if opening_markers.chars().next() != closing_markers.chars().next()
3229                || opening_markers.len() != closing_markers.len()
3230            {
3231                continue;
3232            }
3233
3234            let marker = opening_markers.chars().next().unwrap();
3235            let marker_count = opening_markers.len();
3236
3237            // Find which line this emphasis is on
3238            let mut line_num = 1;
3239            let mut col_start = match_start;
3240            let mut col_end = match_end;
3241            for (idx, line_info) in lines.iter().enumerate() {
3242                if match_start >= line_info.byte_offset {
3243                    line_num = idx + 1;
3244                    col_start = match_start - line_info.byte_offset;
3245                    col_end = match_end - line_info.byte_offset;
3246                } else {
3247                    break;
3248                }
3249            }
3250
3251            emphasis_spans.push(EmphasisSpan {
3252                line: line_num,
3253                start_col: col_start,
3254                end_col: col_end,
3255                byte_offset: match_start,
3256                byte_end: match_end,
3257                marker,
3258                marker_count,
3259                content: content_part.to_string(),
3260            });
3261        }
3262
3263        emphasis_spans
3264    }
3265
3266    /// Parse table rows in the content
3267    fn parse_table_rows(content: &str, lines: &[LineInfo]) -> Vec<TableRow> {
3268        let mut table_rows = Vec::with_capacity(lines.len() / 20);
3269
3270        for (line_idx, line_info) in lines.iter().enumerate() {
3271            // Skip lines in code blocks or blank lines
3272            if line_info.in_code_block || line_info.is_blank {
3273                continue;
3274            }
3275
3276            let line = line_info.content(content);
3277            let line_num = line_idx + 1;
3278
3279            // Check if this line contains pipes (potential table row)
3280            if !line.contains('|') {
3281                continue;
3282            }
3283
3284            // Count columns by splitting on pipes
3285            let parts: Vec<&str> = line.split('|').collect();
3286            let column_count = if parts.len() > 2 { parts.len() - 2 } else { parts.len() };
3287
3288            // Check if this is a separator row
3289            let is_separator = line.chars().all(|c| "|:-+ \t".contains(c));
3290            let mut column_alignments = Vec::new();
3291
3292            if is_separator {
3293                for part in &parts[1..parts.len() - 1] {
3294                    // Skip first and last empty parts
3295                    let trimmed = part.trim();
3296                    let alignment = if trimmed.starts_with(':') && trimmed.ends_with(':') {
3297                        "center".to_string()
3298                    } else if trimmed.ends_with(':') {
3299                        "right".to_string()
3300                    } else if trimmed.starts_with(':') {
3301                        "left".to_string()
3302                    } else {
3303                        "none".to_string()
3304                    };
3305                    column_alignments.push(alignment);
3306                }
3307            }
3308
3309            table_rows.push(TableRow {
3310                line: line_num,
3311                is_separator,
3312                column_count,
3313                column_alignments,
3314            });
3315        }
3316
3317        table_rows
3318    }
3319
3320    /// Parse bare URLs and emails in the content
3321    fn parse_bare_urls(content: &str, lines: &[LineInfo], code_blocks: &[(usize, usize)]) -> Vec<BareUrl> {
3322        let mut bare_urls = Vec::with_capacity(content.matches("http").count() + content.matches('@').count());
3323
3324        // Check for bare URLs (not in angle brackets or markdown links)
3325        for cap in URL_SIMPLE_REGEX.captures_iter(content) {
3326            let full_match = cap.get(0).unwrap();
3327            let match_start = full_match.start();
3328            let match_end = full_match.end();
3329
3330            // Skip if in code block
3331            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3332                continue;
3333            }
3334
3335            // Skip if already in angle brackets or markdown links
3336            let preceding_char = if match_start > 0 {
3337                content.chars().nth(match_start - 1)
3338            } else {
3339                None
3340            };
3341            let following_char = content.chars().nth(match_end);
3342
3343            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3344                continue;
3345            }
3346            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3347                continue;
3348            }
3349
3350            let url = full_match.as_str();
3351            let url_type = if url.starts_with("https://") {
3352                "https"
3353            } else if url.starts_with("http://") {
3354                "http"
3355            } else if url.starts_with("ftp://") {
3356                "ftp"
3357            } else {
3358                "other"
3359            };
3360
3361            // Find which line this URL is on
3362            let mut line_num = 1;
3363            let mut col_start = match_start;
3364            let mut col_end = match_end;
3365            for (idx, line_info) in lines.iter().enumerate() {
3366                if match_start >= line_info.byte_offset {
3367                    line_num = idx + 1;
3368                    col_start = match_start - line_info.byte_offset;
3369                    col_end = match_end - line_info.byte_offset;
3370                } else {
3371                    break;
3372                }
3373            }
3374
3375            bare_urls.push(BareUrl {
3376                line: line_num,
3377                start_col: col_start,
3378                end_col: col_end,
3379                byte_offset: match_start,
3380                byte_end: match_end,
3381                url: url.to_string(),
3382                url_type: url_type.to_string(),
3383            });
3384        }
3385
3386        // Check for bare email addresses
3387        for cap in BARE_EMAIL_PATTERN.captures_iter(content) {
3388            let full_match = cap.get(0).unwrap();
3389            let match_start = full_match.start();
3390            let match_end = full_match.end();
3391
3392            // Skip if in code block
3393            if CodeBlockUtils::is_in_code_block_or_span(code_blocks, match_start) {
3394                continue;
3395            }
3396
3397            // Skip if already in angle brackets or markdown links
3398            let preceding_char = if match_start > 0 {
3399                content.chars().nth(match_start - 1)
3400            } else {
3401                None
3402            };
3403            let following_char = content.chars().nth(match_end);
3404
3405            if preceding_char == Some('<') || preceding_char == Some('(') || preceding_char == Some('[') {
3406                continue;
3407            }
3408            if following_char == Some('>') || following_char == Some(')') || following_char == Some(']') {
3409                continue;
3410            }
3411
3412            let email = full_match.as_str();
3413
3414            // Find which line this email is on
3415            let mut line_num = 1;
3416            let mut col_start = match_start;
3417            let mut col_end = match_end;
3418            for (idx, line_info) in lines.iter().enumerate() {
3419                if match_start >= line_info.byte_offset {
3420                    line_num = idx + 1;
3421                    col_start = match_start - line_info.byte_offset;
3422                    col_end = match_end - line_info.byte_offset;
3423                } else {
3424                    break;
3425                }
3426            }
3427
3428            bare_urls.push(BareUrl {
3429                line: line_num,
3430                start_col: col_start,
3431                end_col: col_end,
3432                byte_offset: match_start,
3433                byte_end: match_end,
3434                url: email.to_string(),
3435                url_type: "email".to_string(),
3436            });
3437        }
3438
3439        bare_urls
3440    }
3441
3442    /// Get an iterator over valid CommonMark headings
3443    ///
3444    /// This iterator filters out malformed headings like `#NoSpace` (hashtag-like patterns)
3445    /// that should be flagged by MD018 but should not be processed by other heading rules.
3446    ///
3447    /// # Examples
3448    ///
3449    /// ```rust
3450    /// use rumdl_lib::lint_context::LintContext;
3451    /// use rumdl_lib::config::MarkdownFlavor;
3452    ///
3453    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
3454    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3455    ///
3456    /// for heading in ctx.valid_headings() {
3457    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
3458    /// }
3459    /// // Only prints valid headings, skips `#NoSpace`
3460    /// ```
3461    #[must_use]
3462    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
3463        ValidHeadingsIter::new(&self.lines)
3464    }
3465
3466    /// Check if the document contains any valid CommonMark headings
3467    ///
3468    /// Returns `true` if there is at least one heading with proper space after `#`.
3469    #[must_use]
3470    pub fn has_valid_headings(&self) -> bool {
3471        self.lines
3472            .iter()
3473            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
3474    }
3475}
3476
3477/// Merge adjacent list blocks that should be treated as one
3478fn merge_adjacent_list_blocks(content: &str, list_blocks: &mut Vec<ListBlock>, lines: &[LineInfo]) {
3479    if list_blocks.len() < 2 {
3480        return;
3481    }
3482
3483    let mut merger = ListBlockMerger::new(content, lines);
3484    *list_blocks = merger.merge(list_blocks);
3485}
3486
3487/// Helper struct to manage the complex logic of merging list blocks
3488struct ListBlockMerger<'a> {
3489    content: &'a str,
3490    lines: &'a [LineInfo],
3491}
3492
3493impl<'a> ListBlockMerger<'a> {
3494    fn new(content: &'a str, lines: &'a [LineInfo]) -> Self {
3495        Self { content, lines }
3496    }
3497
3498    fn merge(&mut self, list_blocks: &[ListBlock]) -> Vec<ListBlock> {
3499        let mut merged = Vec::with_capacity(list_blocks.len());
3500        let mut current = list_blocks[0].clone();
3501
3502        for next in list_blocks.iter().skip(1) {
3503            if self.should_merge_blocks(&current, next) {
3504                current = self.merge_two_blocks(current, next);
3505            } else {
3506                merged.push(current);
3507                current = next.clone();
3508            }
3509        }
3510
3511        merged.push(current);
3512        merged
3513    }
3514
3515    /// Determine if two adjacent list blocks should be merged
3516    fn should_merge_blocks(&self, current: &ListBlock, next: &ListBlock) -> bool {
3517        // Basic compatibility checks
3518        if !self.blocks_are_compatible(current, next) {
3519            return false;
3520        }
3521
3522        // Check spacing and content between blocks
3523        let spacing = self.analyze_spacing_between(current, next);
3524        match spacing {
3525            BlockSpacing::Consecutive => true,
3526            BlockSpacing::SingleBlank => self.can_merge_with_blank_between(current, next),
3527            BlockSpacing::MultipleBlanks | BlockSpacing::ContentBetween => {
3528                self.can_merge_with_content_between(current, next)
3529            }
3530        }
3531    }
3532
3533    /// Check if blocks have compatible structure for merging
3534    fn blocks_are_compatible(&self, current: &ListBlock, next: &ListBlock) -> bool {
3535        current.is_ordered == next.is_ordered
3536            && current.blockquote_prefix == next.blockquote_prefix
3537            && current.nesting_level == next.nesting_level
3538    }
3539
3540    /// Analyze the spacing between two list blocks
3541    fn analyze_spacing_between(&self, current: &ListBlock, next: &ListBlock) -> BlockSpacing {
3542        let gap = next.start_line - current.end_line;
3543
3544        match gap {
3545            1 => BlockSpacing::Consecutive,
3546            2 => BlockSpacing::SingleBlank,
3547            _ if gap > 2 => {
3548                if self.has_only_blank_lines_between(current, next) {
3549                    BlockSpacing::MultipleBlanks
3550                } else {
3551                    BlockSpacing::ContentBetween
3552                }
3553            }
3554            _ => BlockSpacing::Consecutive, // gap == 0, overlapping (shouldn't happen)
3555        }
3556    }
3557
3558    /// Check if unordered lists can be merged with a single blank line between
3559    fn can_merge_with_blank_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3560        // Check if there are structural separators between the blocks
3561        // If has_meaningful_content_between returns true, it means there are structural separators
3562        if has_meaningful_content_between(self.content, current, next, self.lines) {
3563            return false; // Structural separators prevent merging
3564        }
3565
3566        // Only merge unordered lists with same marker across single blank
3567        !current.is_ordered && current.marker == next.marker
3568    }
3569
3570    /// Check if ordered lists can be merged when there's content between them
3571    fn can_merge_with_content_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3572        // Do not merge lists if there are structural separators between them
3573        if has_meaningful_content_between(self.content, current, next, self.lines) {
3574            return false; // Structural separators prevent merging
3575        }
3576
3577        // Only consider merging ordered lists if there's no structural content between
3578        current.is_ordered && next.is_ordered
3579    }
3580
3581    /// Check if there are only blank lines between blocks
3582    fn has_only_blank_lines_between(&self, current: &ListBlock, next: &ListBlock) -> bool {
3583        for line_num in (current.end_line + 1)..next.start_line {
3584            if let Some(line_info) = self.lines.get(line_num - 1)
3585                && !line_info.content(self.content).trim().is_empty()
3586            {
3587                return false;
3588            }
3589        }
3590        true
3591    }
3592
3593    /// Merge two compatible list blocks into one
3594    fn merge_two_blocks(&self, mut current: ListBlock, next: &ListBlock) -> ListBlock {
3595        current.end_line = next.end_line;
3596        current.item_lines.extend_from_slice(&next.item_lines);
3597
3598        // Update max marker width
3599        current.max_marker_width = current.max_marker_width.max(next.max_marker_width);
3600
3601        // Handle marker consistency for unordered lists
3602        if !current.is_ordered && self.markers_differ(&current, next) {
3603            current.marker = None; // Mixed markers
3604        }
3605
3606        current
3607    }
3608
3609    /// Check if two blocks have different markers
3610    fn markers_differ(&self, current: &ListBlock, next: &ListBlock) -> bool {
3611        current.marker.is_some() && next.marker.is_some() && current.marker != next.marker
3612    }
3613}
3614
3615/// Types of spacing between list blocks
3616#[derive(Debug, PartialEq)]
3617enum BlockSpacing {
3618    Consecutive,    // No gap between blocks
3619    SingleBlank,    // One blank line between blocks
3620    MultipleBlanks, // Multiple blank lines but no content
3621    ContentBetween, // Content exists between blocks
3622}
3623
3624/// Check if there's meaningful content (not just blank lines) between two list blocks
3625fn has_meaningful_content_between(content: &str, current: &ListBlock, next: &ListBlock, lines: &[LineInfo]) -> bool {
3626    // Check lines between current.end_line and next.start_line
3627    for line_num in (current.end_line + 1)..next.start_line {
3628        if let Some(line_info) = lines.get(line_num - 1) {
3629            // Convert to 0-indexed
3630            let trimmed = line_info.content(content).trim();
3631
3632            // Skip empty lines
3633            if trimmed.is_empty() {
3634                continue;
3635            }
3636
3637            // Check for structural separators that should separate lists (CommonMark compliant)
3638
3639            // Headings separate lists
3640            if line_info.heading.is_some() {
3641                return true; // Has meaningful content - headings separate lists
3642            }
3643
3644            // Horizontal rules separate lists (---, ***, ___)
3645            if is_horizontal_rule(trimmed) {
3646                return true; // Has meaningful content - horizontal rules separate lists
3647            }
3648
3649            // Tables separate lists
3650            if crate::utils::skip_context::is_table_line(trimmed) {
3651                return true; // Has meaningful content - tables separate lists
3652            }
3653
3654            // Blockquotes separate lists
3655            if trimmed.starts_with('>') {
3656                return true; // Has meaningful content - blockquotes separate lists
3657            }
3658
3659            // Code block fences separate lists (unless properly indented as list content)
3660            if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
3661                let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3662
3663                // Check if this code block is properly indented as list continuation
3664                let min_continuation_indent = if current.is_ordered {
3665                    current.nesting_level + current.max_marker_width + 1 // +1 for space after marker
3666                } else {
3667                    current.nesting_level + 2
3668                };
3669
3670                if line_indent < min_continuation_indent {
3671                    // This is a standalone code block that separates lists
3672                    return true; // Has meaningful content - standalone code blocks separate lists
3673                }
3674            }
3675
3676            // Check if this line has proper indentation for list continuation
3677            let line_indent = line_info.byte_len - line_info.content(content).trim_start().len();
3678
3679            // Calculate minimum indentation needed to be list continuation
3680            let min_indent = if current.is_ordered {
3681                current.nesting_level + current.max_marker_width
3682            } else {
3683                current.nesting_level + 2
3684            };
3685
3686            // If the line is not indented enough to be list continuation, it's meaningful content
3687            if line_indent < min_indent {
3688                return true; // Has meaningful content - content not indented as list continuation
3689            }
3690
3691            // If we reach here, the line is properly indented as list continuation
3692            // Continue checking other lines
3693        }
3694    }
3695
3696    // Only blank lines or properly indented list continuation content between blocks
3697    false
3698}
3699
3700/// Check if a line is a horizontal rule (---, ***, ___) per CommonMark spec.
3701/// CommonMark rules for thematic breaks (horizontal rules):
3702/// - May have 0-3 spaces of leading indentation (but NOT tabs)
3703/// - Must have 3+ of the same character (-, *, or _)
3704/// - May have spaces between characters
3705/// - No other characters allowed
3706pub fn is_horizontal_rule_line(line: &str) -> bool {
3707    // CommonMark: HRs can have 0-3 spaces of leading indentation, not tabs
3708    let leading_spaces = line.len() - line.trim_start_matches(' ').len();
3709    if leading_spaces > 3 || line.starts_with('\t') {
3710        return false;
3711    }
3712
3713    is_horizontal_rule_content(line.trim())
3714}
3715
3716/// Check if trimmed content matches horizontal rule pattern.
3717/// Use `is_horizontal_rule_line` for full CommonMark compliance including indentation check.
3718pub fn is_horizontal_rule_content(trimmed: &str) -> bool {
3719    if trimmed.len() < 3 {
3720        return false;
3721    }
3722
3723    // Check for three or more consecutive -, *, or _ characters (with optional spaces)
3724    let chars: Vec<char> = trimmed.chars().collect();
3725    if let Some(&first_char) = chars.first()
3726        && (first_char == '-' || first_char == '*' || first_char == '_')
3727    {
3728        let mut count = 0;
3729        for &ch in &chars {
3730            if ch == first_char {
3731                count += 1;
3732            } else if ch != ' ' && ch != '\t' {
3733                return false; // Non-matching, non-whitespace character
3734            }
3735        }
3736        return count >= 3;
3737    }
3738    false
3739}
3740
3741/// Backwards-compatible alias for `is_horizontal_rule_content`
3742pub fn is_horizontal_rule(trimmed: &str) -> bool {
3743    is_horizontal_rule_content(trimmed)
3744}
3745
3746/// Check if content contains patterns that cause the markdown crate to panic
3747#[cfg(test)]
3748mod tests {
3749    use super::*;
3750
3751    #[test]
3752    fn test_empty_content() {
3753        let ctx = LintContext::new("", MarkdownFlavor::Standard, None);
3754        assert_eq!(ctx.content, "");
3755        assert_eq!(ctx.line_offsets, vec![0]);
3756        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3757        assert_eq!(ctx.lines.len(), 0);
3758    }
3759
3760    #[test]
3761    fn test_single_line() {
3762        let ctx = LintContext::new("# Hello", MarkdownFlavor::Standard, None);
3763        assert_eq!(ctx.content, "# Hello");
3764        assert_eq!(ctx.line_offsets, vec![0]);
3765        assert_eq!(ctx.offset_to_line_col(0), (1, 1));
3766        assert_eq!(ctx.offset_to_line_col(3), (1, 4));
3767    }
3768
3769    #[test]
3770    fn test_multi_line() {
3771        let content = "# Title\n\nSecond line\nThird line";
3772        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3773        assert_eq!(ctx.line_offsets, vec![0, 8, 9, 21]);
3774        // Test offset to line/col
3775        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // start
3776        assert_eq!(ctx.offset_to_line_col(8), (2, 1)); // start of blank line
3777        assert_eq!(ctx.offset_to_line_col(9), (3, 1)); // start of 'Second line'
3778        assert_eq!(ctx.offset_to_line_col(15), (3, 7)); // middle of 'Second line'
3779        assert_eq!(ctx.offset_to_line_col(21), (4, 1)); // start of 'Third line'
3780    }
3781
3782    #[test]
3783    fn test_line_info() {
3784        let content = "# Title\n    indented\n\ncode:\n```rust\nfn main() {}\n```";
3785        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3786
3787        // Test line info
3788        assert_eq!(ctx.lines.len(), 7);
3789
3790        // Line 1: "# Title"
3791        let line1 = &ctx.lines[0];
3792        assert_eq!(line1.content(ctx.content), "# Title");
3793        assert_eq!(line1.byte_offset, 0);
3794        assert_eq!(line1.indent, 0);
3795        assert!(!line1.is_blank);
3796        assert!(!line1.in_code_block);
3797        assert!(line1.list_item.is_none());
3798
3799        // Line 2: "    indented"
3800        let line2 = &ctx.lines[1];
3801        assert_eq!(line2.content(ctx.content), "    indented");
3802        assert_eq!(line2.byte_offset, 8);
3803        assert_eq!(line2.indent, 4);
3804        assert!(!line2.is_blank);
3805
3806        // Line 3: "" (blank)
3807        let line3 = &ctx.lines[2];
3808        assert_eq!(line3.content(ctx.content), "");
3809        assert!(line3.is_blank);
3810
3811        // Test helper methods
3812        assert_eq!(ctx.line_to_byte_offset(1), Some(0));
3813        assert_eq!(ctx.line_to_byte_offset(2), Some(8));
3814        assert_eq!(ctx.line_info(1).map(|l| l.indent), Some(0));
3815        assert_eq!(ctx.line_info(2).map(|l| l.indent), Some(4));
3816    }
3817
3818    #[test]
3819    fn test_list_item_detection() {
3820        let content = "- Unordered item\n  * Nested item\n1. Ordered item\n   2) Nested ordered\n\nNot a list";
3821        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3822
3823        // Line 1: "- Unordered item"
3824        let line1 = &ctx.lines[0];
3825        assert!(line1.list_item.is_some());
3826        let list1 = line1.list_item.as_ref().unwrap();
3827        assert_eq!(list1.marker, "-");
3828        assert!(!list1.is_ordered);
3829        assert_eq!(list1.marker_column, 0);
3830        assert_eq!(list1.content_column, 2);
3831
3832        // Line 2: "  * Nested item"
3833        let line2 = &ctx.lines[1];
3834        assert!(line2.list_item.is_some());
3835        let list2 = line2.list_item.as_ref().unwrap();
3836        assert_eq!(list2.marker, "*");
3837        assert_eq!(list2.marker_column, 2);
3838
3839        // Line 3: "1. Ordered item"
3840        let line3 = &ctx.lines[2];
3841        assert!(line3.list_item.is_some());
3842        let list3 = line3.list_item.as_ref().unwrap();
3843        assert_eq!(list3.marker, "1.");
3844        assert!(list3.is_ordered);
3845        assert_eq!(list3.number, Some(1));
3846
3847        // Line 6: "Not a list"
3848        let line6 = &ctx.lines[5];
3849        assert!(line6.list_item.is_none());
3850    }
3851
3852    #[test]
3853    fn test_offset_to_line_col_edge_cases() {
3854        let content = "a\nb\nc";
3855        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3856        // line_offsets: [0, 2, 4]
3857        assert_eq!(ctx.offset_to_line_col(0), (1, 1)); // 'a'
3858        assert_eq!(ctx.offset_to_line_col(1), (1, 2)); // after 'a'
3859        assert_eq!(ctx.offset_to_line_col(2), (2, 1)); // 'b'
3860        assert_eq!(ctx.offset_to_line_col(3), (2, 2)); // after 'b'
3861        assert_eq!(ctx.offset_to_line_col(4), (3, 1)); // 'c'
3862        assert_eq!(ctx.offset_to_line_col(5), (3, 2)); // after 'c'
3863    }
3864
3865    #[test]
3866    fn test_mdx_esm_blocks() {
3867        let content = r##"import {Chart} from './snowfall.js'
3868export const year = 2023
3869
3870# Last year's snowfall
3871
3872In {year}, the snowfall was above average.
3873It was followed by a warm spring which caused
3874flood conditions in many of the nearby rivers.
3875
3876<Chart color="#fcb32c" year={year} />
3877"##;
3878
3879        let ctx = LintContext::new(content, MarkdownFlavor::MDX, None);
3880
3881        // Check that lines 1 and 2 are marked as ESM blocks
3882        assert_eq!(ctx.lines.len(), 10);
3883        assert!(ctx.lines[0].in_esm_block, "Line 1 (import) should be in_esm_block");
3884        assert!(ctx.lines[1].in_esm_block, "Line 2 (export) should be in_esm_block");
3885        assert!(!ctx.lines[2].in_esm_block, "Line 3 (blank) should NOT be in_esm_block");
3886        assert!(
3887            !ctx.lines[3].in_esm_block,
3888            "Line 4 (heading) should NOT be in_esm_block"
3889        );
3890        assert!(!ctx.lines[4].in_esm_block, "Line 5 (blank) should NOT be in_esm_block");
3891        assert!(!ctx.lines[5].in_esm_block, "Line 6 (text) should NOT be in_esm_block");
3892    }
3893
3894    #[test]
3895    fn test_mdx_esm_blocks_not_detected_in_standard_flavor() {
3896        let content = r#"import {Chart} from './snowfall.js'
3897export const year = 2023
3898
3899# Last year's snowfall
3900"#;
3901
3902        let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
3903
3904        // ESM blocks should NOT be detected in Standard flavor
3905        assert!(
3906            !ctx.lines[0].in_esm_block,
3907            "Line 1 should NOT be in_esm_block in Standard flavor"
3908        );
3909        assert!(
3910            !ctx.lines[1].in_esm_block,
3911            "Line 2 should NOT be in_esm_block in Standard flavor"
3912        );
3913    }
3914}
rumdl_lib/lint_context.rs

rumdl_lib/
lint_context.rs