rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20/// Macro for profiling sections - only active in non-WASM builds
21#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23    ($name:expr, $profile:expr, $code:expr) => {{
24        let start = std::time::Instant::now();
25        let result = $code;
26        if $profile {
27            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28        }
29        result
30    }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38/// Grouped byte ranges for skip context detection
39/// Used to reduce parameter count in internal functions
40pub(super) struct SkipByteRanges<'a> {
41    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) pandoc_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
50pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52/// Type alias for byte ranges used in JSX expression and MDX comment detection
53pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56    pub content: &'a str,
57    content_lines: Vec<&'a str>, // Pre-split lines from content (avoids repeated allocations)
58    pub line_offsets: Vec<usize>,
59    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
60    pub code_block_details: Vec<CodeBlockDetail>, // Per-block metadata (fenced/indented, info string)
61    pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, // Pre-computed strong emphasis spans
62    pub line_to_list: crate::utils::code_block_utils::LineToListMap, // Ordered list membership by line
63    pub list_start_values: crate::utils::code_block_utils::ListStartValues, // Start values per list ID
64    pub lines: Vec<LineInfo>,             // Pre-computed line information
65    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
66    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
67    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
68    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
69    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
70    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
71    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
72    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
73    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
74    pub char_frequency: CharFrequency,    // Character frequency analysis
75    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
76    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
77    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
78    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
79    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
80    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
81    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
82    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
83    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
84    pub flavor: MarkdownFlavor,           // Markdown flavor being used
85    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
86    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
87    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
88    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (@key, [@key])
89    pandoc_div_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto div block ranges (::: ... :::)
90    colon_fence_ranges: Vec<(usize, usize)>, // Pre-computed Azure DevOps colon code fence ranges (:::lang ... :::)
91    inline_footnote_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc inline footnote ranges (^[...])
92    pandoc_header_slugs: std::collections::HashSet<String>, // Pre-computed Pandoc implicit header reference slugs
93    example_list_marker_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc example-list marker ranges (@) / (@label)
94    example_reference_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc example reference ranges (@label) inline
95    sub_super_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc subscript (~x~) and superscript (^x^) ranges
96    inline_code_attr_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc inline code attribute ranges (`code`{.lang})
97    bracketed_span_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc bracketed span ranges ([text]{attrs})
98    line_block_ranges: Vec<crate::utils::skip_context::ByteRange>,     // Pre-computed Pandoc line block ranges (| text)
99    pipe_table_caption_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc pipe-table caption ranges (: caption)
100    pandoc_metadata_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc YAML metadata block ranges (--- ... --- or ...)
101    grid_table_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc grid-table ranges (+---+---+)
102    multi_line_table_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc multi-line table ranges
103    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
104    link_title_ranges: Vec<(usize, usize)>, // Pre-computed sorted link title byte ranges
105    code_span_byte_ranges: Vec<(usize, usize)>, // Pre-computed code span byte ranges from pulldown-cmark
106    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
107    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
108    lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, // Lazy-loaded lazy continuation lines
109    myst_directive_ranges: Vec<(usize, usize)>, // Pre-computed MyST colon directive byte ranges (:::{name} ... :::)
110    myst_comment_ranges: Vec<(usize, usize)>, // Pre-computed MyST comment byte ranges (% comment)
111    myst_role_ranges: Vec<(usize, usize)>, // Pre-computed MyST role byte ranges ({role}`content`)
112}
113
114impl<'a> LintContext<'a> {
115    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
116        #[cfg(not(target_arch = "wasm32"))]
117        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
118
119        let line_offsets = profile_section!("Line offsets", profile, {
120            let mut offsets = vec![0];
121            for (i, c) in content.char_indices() {
122                if c == '\n' {
123                    offsets.push(i + 1);
124                }
125            }
126            offsets
127        });
128
129        // Compute content_lines once for all functions that need it
130        let content_lines: Vec<&str> = content.lines().collect();
131
132        // Detect front matter boundaries once for all functions that need it
133        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
134
135        // Detect code blocks and code spans once and cache them
136        let parse_result = profile_section!(
137            "Code blocks",
138            profile,
139            CodeBlockUtils::detect_code_blocks_and_spans(content)
140        );
141        let mut code_blocks = parse_result.code_blocks;
142        let code_span_ranges = parse_result.code_spans;
143        let code_block_details = parse_result.code_block_details;
144        let strong_spans = parse_result.strong_spans;
145        let line_to_list = parse_result.line_to_list;
146        let list_start_values = parse_result.list_start_values;
147
148        // Pre-compute HTML comment ranges ONCE for all operations
149        let html_comment_ranges = profile_section!(
150            "HTML comment ranges",
151            profile,
152            crate::utils::skip_context::compute_html_comment_ranges(content)
153        );
154
155        // Pre-compute autodoc block ranges (avoids O(n^2) scaling)
156        // Detected for all flavors except AzureDevOps, where `:::` denotes code fences
157        // rather than autodoc directives.
158        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
159            if flavor.supports_colon_code_fences() || flavor.supports_myst_directives() {
160                Vec::new()
161            } else {
162                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
163            }
164        });
165
166        // Pre-compute Pandoc/Quarto div block ranges for Pandoc-compatible flavors
167        let pandoc_div_ranges = profile_section!("Pandoc div ranges", profile, {
168            if flavor.is_pandoc_compatible() {
169                crate::utils::pandoc::detect_div_block_ranges(content)
170            } else {
171                Vec::new()
172            }
173        });
174
175        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
176        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
177            if flavor == MarkdownFlavor::MkDocs {
178                crate::utils::pymdown_blocks::detect_block_ranges(content)
179            } else {
180                Vec::new()
181            }
182        });
183
184        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
185        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
186        let skip_ranges = SkipByteRanges {
187            html_comment_ranges: &html_comment_ranges,
188            autodoc_ranges: &autodoc_ranges,
189            pandoc_div_ranges: &pandoc_div_ranges,
190            pymdown_block_ranges: &pymdown_block_ranges,
191        };
192        let (mut lines, emphasis_spans) = profile_section!(
193            "Basic line info",
194            profile,
195            line_computation::compute_basic_line_info(
196                content,
197                &content_lines,
198                &line_offsets,
199                &code_blocks,
200                flavor,
201                &skip_ranges,
202                front_matter_end,
203            )
204        );
205
206        // Detect HTML blocks BEFORE heading detection
207        profile_section!(
208            "HTML blocks",
209            profile,
210            heading_detection::detect_html_blocks(content, &mut lines)
211        );
212
213        // Detect ESM import/export blocks in MDX files BEFORE heading detection
214        profile_section!(
215            "ESM blocks",
216            profile,
217            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
218        );
219
220        // Detect JSX component blocks in MDX files (e.g. <Tabs>...</Tabs>)
221        profile_section!(
222            "JSX block detection",
223            profile,
224            flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
225        );
226
227        // Detect JSX expressions and MDX comments in MDX files
228        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
229            "JSX/MDX detection",
230            profile,
231            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
232        );
233
234        // Detect `<div markdown>`-style HTML blocks (grid cards, etc.) regardless of flavor.
235        // The `markdown` attribute is an explicit, author-supplied signal; recognizing it
236        // in all flavors keeps `rumdl fmt` from mangling Material grid cards when the
237        // MkDocs flavor isn't active.
238        profile_section!(
239            "Markdown-in-HTML blocks",
240            profile,
241            flavor_detection::detect_markdown_html_blocks(&content_lines, &mut lines)
242        );
243
244        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
245        profile_section!(
246            "MkDocs constructs",
247            profile,
248            flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
249        );
250
251        // Detect footnote definitions and correct false code block detection.
252        // With ENABLE_FOOTNOTES, pulldown-cmark correctly parses multi-line
253        // footnotes, but the code block detector may still mark 4-space-indented
254        // footnote continuation lines as indented code blocks.
255        profile_section!(
256            "Footnote definitions",
257            profile,
258            detect_footnote_definitions(content, &mut lines, &line_offsets)
259        );
260
261        // Filter code_blocks to remove false positives from footnote continuation content.
262        // Same pattern as MkDocs/JSX corrections below.
263        {
264            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
265            for &(start, end) in &code_blocks {
266                let start_line = line_offsets
267                    .partition_point(|&offset| offset <= start)
268                    .saturating_sub(1);
269                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
270
271                let mut sub_start: Option<usize> = None;
272                for (i, &offset) in line_offsets[start_line..end_line]
273                    .iter()
274                    .enumerate()
275                    .map(|(j, o)| (j + start_line, o))
276                {
277                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
278                    if is_real_code && sub_start.is_none() {
279                        let byte_start = if i == start_line { start } else { offset };
280                        sub_start = Some(byte_start);
281                    } else if !is_real_code && sub_start.is_some() {
282                        new_code_blocks.push((sub_start.unwrap(), offset));
283                        sub_start = None;
284                    }
285                }
286                if let Some(s) = sub_start {
287                    new_code_blocks.push((s, end));
288                }
289            }
290            code_blocks = new_code_blocks;
291        }
292
293        // Filter code_blocks to remove false positives from MkDocs admonition/tab content
294        // and `<div markdown>` HTML blocks (grid cards).
295        // pulldown-cmark treats 4-space-indented content as indented code blocks, but inside
296        // these containers this is regular markdown content. detect_mkdocs_line_info and
297        // detect_markdown_html_blocks already corrected LineInfo.in_code_block for these lines,
298        // but the code_blocks byte ranges are still stale. We split ranges rather than using
299        // all-or-nothing removal, so fenced code blocks within the containers are preserved.
300        let has_markdown_html = lines.iter().any(|l| l.in_mkdocs_html_markdown);
301        if flavor == MarkdownFlavor::MkDocs || has_markdown_html {
302            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
303            for &(start, end) in &code_blocks {
304                let start_line = line_offsets
305                    .partition_point(|&offset| offset <= start)
306                    .saturating_sub(1);
307                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
308
309                // Walk lines in this range, collecting sub-ranges where in_code_block is true
310                let mut sub_start: Option<usize> = None;
311                for (i, &offset) in line_offsets[start_line..end_line]
312                    .iter()
313                    .enumerate()
314                    .map(|(j, o)| (j + start_line, o))
315                {
316                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
317                    if is_real_code && sub_start.is_none() {
318                        let byte_start = if i == start_line { start } else { offset };
319                        sub_start = Some(byte_start);
320                    } else if !is_real_code && sub_start.is_some() {
321                        new_code_blocks.push((sub_start.unwrap(), offset));
322                        sub_start = None;
323                    }
324                }
325                if let Some(s) = sub_start {
326                    new_code_blocks.push((s, end));
327                }
328            }
329            code_blocks = new_code_blocks;
330        }
331
332        // Filter code_blocks for MDX JSX blocks (same pattern as MkDocs above).
333        // detect_jsx_blocks already corrected LineInfo.in_code_block for indented content
334        // inside JSX component blocks, but code_blocks byte ranges need updating too.
335        if flavor.supports_jsx() {
336            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
337            for &(start, end) in &code_blocks {
338                let start_line = line_offsets
339                    .partition_point(|&offset| offset <= start)
340                    .saturating_sub(1);
341                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
342
343                let mut sub_start: Option<usize> = None;
344                for (i, &offset) in line_offsets[start_line..end_line]
345                    .iter()
346                    .enumerate()
347                    .map(|(j, o)| (j + start_line, o))
348                {
349                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
350                    if is_real_code && sub_start.is_none() {
351                        let byte_start = if i == start_line { start } else { offset };
352                        sub_start = Some(byte_start);
353                    } else if !is_real_code && sub_start.is_some() {
354                        new_code_blocks.push((sub_start.unwrap(), offset));
355                        sub_start = None;
356                    }
357                }
358                if let Some(s) = sub_start {
359                    new_code_blocks.push((s, end));
360                }
361            }
362            code_blocks = new_code_blocks;
363        }
364
365        // Detect Azure DevOps colon code fences and extend code_blocks so that
366        // all byte-range consumers correctly skip their content.
367        let colon_fence_ranges = profile_section!(
368            "Azure colon fence detection",
369            profile,
370            flavor_detection::detect_azure_colon_fences(content, &mut lines, flavor)
371        );
372        if !colon_fence_ranges.is_empty() {
373            code_blocks.extend(colon_fence_ranges.iter().copied());
374            code_blocks.sort_by_key(|&(start, _)| start);
375        }
376
377        // Detect MyST colon directives (:::{name} ... :::) — these are structural
378        // containers, NOT code blocks. Content inside is linted as markdown.
379        let myst_directive_ranges = profile_section!(
380            "MyST colon directives",
381            profile,
382            flavor_detection::detect_myst_colon_directives(content, &mut lines, flavor)
383        );
384
385        // Detect MyST % comments
386        let myst_comment_ranges = profile_section!(
387            "MyST comments",
388            profile,
389            flavor_detection::detect_myst_comments(content, &mut lines, flavor)
390        );
391
392        // Detect MyST backtick directives (```{name}) and clear in_code_block for
393        // content-bearing directives so their body is linted as markdown.
394        profile_section!(
395            "MyST backtick directives",
396            profile,
397            flavor_detection::detect_myst_backtick_directives(
398                content,
399                &mut lines,
400                flavor,
401                &code_block_details,
402                &line_offsets
403            )
404        );
405
406        // Filter code_blocks to remove false positives from MyST content-bearing directives.
407        // Same pattern as MkDocs admonition filtering.
408        if flavor.supports_myst_directives() {
409            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
410            for &(start, end) in &code_blocks {
411                let start_line = line_offsets
412                    .partition_point(|&offset| offset <= start)
413                    .saturating_sub(1);
414                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
415
416                let mut sub_start: Option<usize> = None;
417                for (i, &offset) in line_offsets[start_line..end_line]
418                    .iter()
419                    .enumerate()
420                    .map(|(j, o)| (j + start_line, o))
421                {
422                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
423                    if is_real_code && sub_start.is_none() {
424                        let byte_start = if i == start_line { start } else { offset };
425                        sub_start = Some(byte_start);
426                    } else if !is_real_code && sub_start.is_some() {
427                        new_code_blocks.push((sub_start.unwrap(), offset));
428                        sub_start = None;
429                    }
430                }
431                if let Some(s) = sub_start {
432                    new_code_blocks.push((s, end));
433                }
434            }
435            code_blocks = new_code_blocks;
436        }
437
438        // Detect kramdown constructs (extension blocks, IALs, ALDs) in kramdown flavor
439        profile_section!(
440            "Kramdown constructs",
441            profile,
442            flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
443        );
444
445        // Layer 1: Sanitize content-derived fields inside kramdown extension blocks
446        // so downstream heading detection and collection builders never see them.
447        // This must run BEFORE detect_headings_and_blockquotes to prevent headings
448        // from being populated inside extension blocks.
449        for line in &mut lines {
450            if line.in_kramdown_extension_block {
451                line.list_item = None;
452                line.is_horizontal_rule = false;
453                line.blockquote = None;
454                line.is_kramdown_block_ial = false;
455            }
456        }
457
458        // Detect Obsidian comments (%%...%%) in Obsidian flavor
459        let obsidian_comment_ranges = profile_section!(
460            "Obsidian comments",
461            profile,
462            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
463        );
464
465        // Detect MyST role syntax ({role}`content`)
466        let myst_role_ranges = profile_section!(
467            "MyST roles",
468            profile,
469            flavor_detection::detect_myst_role_ranges(content, &lines, flavor, &code_blocks)
470        );
471
472        // Run pulldown-cmark parse for links, images, and link byte ranges in a single pass.
473        // Link byte ranges are needed for heading detection; links/images are finalized later
474        // after code_spans are available.
475        let pulldown_result = profile_section!(
476            "Links, images & link ranges",
477            profile,
478            link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
479        );
480
481        // Now detect headings and blockquotes
482        profile_section!(
483            "Headings & blockquotes",
484            profile,
485            heading_detection::detect_headings_and_blockquotes(
486                &content_lines,
487                &mut lines,
488                flavor,
489                &html_comment_ranges,
490                &pulldown_result.link_byte_ranges,
491                front_matter_end,
492            )
493        );
494
495        // Clear headings that were detected inside kramdown extension blocks
496        for line in &mut lines {
497            if line.in_kramdown_extension_block {
498                line.heading = None;
499            }
500        }
501
502        // Parse code spans early so we can exclude them from link/image parsing
503        let mut code_spans = profile_section!(
504            "Code spans",
505            profile,
506            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
507        );
508
509        // Supplement code spans for MkDocs container content that pulldown-cmark missed.
510        // pulldown-cmark treats 4-space-indented MkDocs content as indented code blocks,
511        // so backtick code spans within admonitions/tabs/markdown HTML are invisible to it.
512        if flavor == MarkdownFlavor::MkDocs {
513            let extra = profile_section!(
514                "MkDocs code spans",
515                profile,
516                element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
517            );
518            if !extra.is_empty() {
519                code_spans.extend(extra);
520                code_spans.sort_by_key(|span| span.byte_offset);
521            }
522        }
523
524        // Supplement code spans for MDX JSX component body content that pulldown-cmark missed.
525        // pulldown-cmark treats JSX component opening tags (e.g. `<ParamField>`) as HTML block
526        // starters, so backtick code spans within component bodies are invisible to the initial
527        // parse.
528        if flavor == MarkdownFlavor::MDX {
529            let extra = profile_section!(
530                "MDX JSX code spans",
531                profile,
532                element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
533            );
534            if !extra.is_empty() {
535                code_spans.extend(extra);
536                code_spans.sort_by_key(|span| span.byte_offset);
537            }
538        }
539
540        // Mark lines that are continuations of multi-line code spans
541        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
542        for span in &code_spans {
543            if span.end_line > span.line {
544                // Mark lines after the first line as continuations
545                for line_num in (span.line + 1)..=span.end_line {
546                    if let Some(line_info) = lines.get_mut(line_num - 1) {
547                        line_info.in_code_span_continuation = true;
548                    }
549                }
550            }
551        }
552
553        // Finalize links and images: filter by code_spans and run regex fallbacks
554        let (links, images, broken_links, footnote_refs) = profile_section!(
555            "Links & images finalize",
556            profile,
557            link_parser::finalize_links_and_images(
558                content,
559                &lines,
560                &code_blocks,
561                &code_spans,
562                flavor,
563                &html_comment_ranges,
564                pulldown_result
565            )
566        );
567
568        let reference_defs = profile_section!(
569            "Reference defs",
570            profile,
571            link_parser::parse_reference_defs(content, &lines)
572        );
573
574        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
575
576        // Compute character frequency for fast content analysis
577        let char_frequency = profile_section!(
578            "Char frequency",
579            profile,
580            line_computation::compute_char_frequency(content)
581        );
582
583        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
584        let table_blocks = profile_section!(
585            "Table blocks",
586            profile,
587            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
588                content,
589                &code_blocks,
590                &code_spans,
591                &html_comment_ranges,
592            )
593        );
594
595        // Layer 2: Filter pre-computed collections to exclude items inside kramdown extension blocks.
596        // Rules that iterate these collections automatically skip kramdown content.
597        let links = links
598            .into_iter()
599            .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
600            .collect::<Vec<_>>();
601        let images = images
602            .into_iter()
603            .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
604            .collect::<Vec<_>>();
605        let broken_links = broken_links
606            .into_iter()
607            .filter(|bl| {
608                // BrokenLinkInfo has span but no line field; find line from byte offset
609                let line_idx = line_offsets
610                    .partition_point(|&offset| offset <= bl.span.start)
611                    .saturating_sub(1);
612                !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
613            })
614            .collect::<Vec<_>>();
615        let footnote_refs = footnote_refs
616            .into_iter()
617            .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
618            .collect::<Vec<_>>();
619        let reference_defs = reference_defs
620            .into_iter()
621            .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
622            .collect::<Vec<_>>();
623        let list_blocks = list_blocks
624            .into_iter()
625            .filter(|block| {
626                !lines
627                    .get(block.start_line - 1)
628                    .is_some_and(|l| l.in_kramdown_extension_block)
629            })
630            .collect::<Vec<_>>();
631        let table_blocks = table_blocks
632            .into_iter()
633            .filter(|block| {
634                // TableBlock.start_line is 0-indexed
635                !lines
636                    .get(block.start_line)
637                    .is_some_and(|l| l.in_kramdown_extension_block)
638            })
639            .collect::<Vec<_>>();
640        let emphasis_spans = emphasis_spans
641            .into_iter()
642            .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
643            .collect::<Vec<_>>();
644
645        // Rebuild reference_defs_map after filtering
646        let reference_defs_map: HashMap<String, usize> = reference_defs
647            .iter()
648            .enumerate()
649            .map(|(idx, def)| (def.id.to_lowercase(), idx))
650            .collect();
651
652        // Pre-compute sorted link title byte ranges for binary search
653        let link_title_ranges: Vec<(usize, usize)> = reference_defs
654            .iter()
655            .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
656                (Some(start), Some(end)) => Some((start, end)),
657                _ => None,
658            })
659            .collect();
660
661        // Reuse already-computed line_offsets and code_blocks instead of re-detecting
662        let line_index = profile_section!(
663            "Line index",
664            profile,
665            crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
666                content,
667                line_offsets.clone(),
668                &code_blocks,
669            )
670        );
671
672        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
673        let jinja_ranges = profile_section!(
674            "Jinja ranges",
675            profile,
676            crate::utils::jinja_utils::find_jinja_ranges(content)
677        );
678
679        // Pre-compute Pandoc/Quarto citation ranges for Pandoc-compatible flavors
680        let citation_ranges = profile_section!("Citation ranges", profile, {
681            if flavor.is_pandoc_compatible() {
682                crate::utils::pandoc::find_citation_ranges(content)
683            } else {
684                Vec::new()
685            }
686        });
687
688        // Pre-compute Pandoc inline footnote ranges for Pandoc-compatible flavors
689        let inline_footnote_ranges = profile_section!("Inline footnote ranges", profile, {
690            if flavor.is_pandoc_compatible() {
691                crate::utils::pandoc::detect_inline_footnote_ranges(content)
692            } else {
693                Vec::new()
694            }
695        });
696
697        // Pre-compute Pandoc implicit header reference slugs for Pandoc-compatible flavors
698        let pandoc_header_slugs = profile_section!("Pandoc header slugs", profile, {
699            if flavor.is_pandoc_compatible() {
700                crate::utils::pandoc::collect_pandoc_header_slugs(content)
701            } else {
702                std::collections::HashSet::new()
703            }
704        });
705
706        // Pre-compute Pandoc example-list marker ranges for Pandoc-compatible flavors
707        let example_list_marker_ranges = profile_section!("Example list markers", profile, {
708            if flavor.is_pandoc_compatible() {
709                crate::utils::pandoc::detect_example_list_marker_ranges(content)
710            } else {
711                Vec::new()
712            }
713        });
714
715        // Pre-compute Pandoc example reference ranges for Pandoc-compatible flavors
716        let example_reference_ranges = profile_section!("Example references", profile, {
717            if flavor.is_pandoc_compatible() {
718                crate::utils::pandoc::detect_example_reference_ranges(content, &example_list_marker_ranges)
719            } else {
720                Vec::new()
721            }
722        });
723
724        // Pre-compute Pandoc subscript (~x~) and superscript (^x^) ranges
725        let sub_super_ranges = profile_section!("Subscript/superscript ranges", profile, {
726            if flavor.is_pandoc_compatible() {
727                crate::utils::pandoc::detect_subscript_superscript_ranges(content)
728            } else {
729                Vec::new()
730            }
731        });
732
733        // Pre-compute Pandoc inline code attribute ranges (`code`{.lang}) for Pandoc-compatible flavors
734        let inline_code_attr_ranges = profile_section!("Inline code attribute ranges", profile, {
735            if flavor.is_pandoc_compatible() {
736                crate::utils::pandoc::detect_inline_code_attr_ranges(content)
737            } else {
738                Vec::new()
739            }
740        });
741
742        // Pre-compute Pandoc bracketed span ranges ([text]{attrs}) for Pandoc-compatible flavors
743        let bracketed_span_ranges = profile_section!("Bracketed span ranges", profile, {
744            if flavor.is_pandoc_compatible() {
745                crate::utils::pandoc::detect_bracketed_span_ranges(content)
746            } else {
747                Vec::new()
748            }
749        });
750
751        // Pre-compute Pandoc line block ranges (| text) for Pandoc-compatible flavors
752        let line_block_ranges = profile_section!("Line block ranges", profile, {
753            if flavor.is_pandoc_compatible() {
754                crate::utils::pandoc::detect_line_block_ranges(content)
755            } else {
756                Vec::new()
757            }
758        });
759
760        // Pre-compute Pandoc pipe-table caption ranges (: caption) for Pandoc-compatible flavors
761        let pipe_table_caption_ranges = profile_section!("Pipe-table caption ranges", profile, {
762            if flavor.is_pandoc_compatible() {
763                crate::utils::pandoc::detect_pipe_table_caption_ranges(content)
764            } else {
765                Vec::new()
766            }
767        });
768
769        // Pre-compute Pandoc YAML metadata block ranges (--- ... --- or ...) for Pandoc-compatible flavors
770        let pandoc_metadata_ranges = profile_section!("Pandoc metadata ranges", profile, {
771            if flavor.is_pandoc_compatible() {
772                crate::utils::pandoc::detect_yaml_metadata_block_ranges(content)
773            } else {
774                Vec::new()
775            }
776        });
777
778        // Pre-compute Pandoc grid-table ranges (+---+---+) for Pandoc-compatible flavors
779        let grid_table_ranges = profile_section!("Grid table ranges", profile, {
780            if flavor.is_pandoc_compatible() {
781                crate::utils::pandoc::detect_grid_table_ranges(content)
782            } else {
783                Vec::new()
784            }
785        });
786
787        // Pre-compute Pandoc multi-line table ranges for Pandoc-compatible flavors
788        let multi_line_table_ranges = profile_section!("Multi-line table ranges", profile, {
789            if flavor.is_pandoc_compatible() {
790                crate::utils::pandoc::detect_multi_line_table_ranges(content)
791            } else {
792                Vec::new()
793            }
794        });
795
796        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
797        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
798            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
799            let mut ranges = Vec::new();
800            for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
801                ranges.push((mat.start(), mat.end()));
802            }
803            ranges
804        });
805
806        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
807
808        Self {
809            content,
810            content_lines,
811            line_offsets,
812            code_blocks,
813            code_block_details,
814            strong_spans,
815            line_to_list,
816            list_start_values,
817            lines,
818            links,
819            images,
820            broken_links,
821            footnote_refs,
822            reference_defs,
823            reference_defs_map,
824            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
825            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
826            list_blocks,
827            char_frequency,
828            html_tags_cache: OnceLock::new(),
829            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
830            table_rows_cache: OnceLock::new(),
831            bare_urls_cache: OnceLock::new(),
832            has_mixed_list_nesting_cache: OnceLock::new(),
833            html_comment_ranges,
834            table_blocks,
835            line_index,
836            jinja_ranges,
837            flavor,
838            source_file,
839            jsx_expression_ranges,
840            mdx_comment_ranges,
841            citation_ranges,
842            pandoc_div_ranges,
843            colon_fence_ranges,
844            inline_footnote_ranges,
845            pandoc_header_slugs,
846            example_list_marker_ranges,
847            example_reference_ranges,
848            sub_super_ranges,
849            inline_code_attr_ranges,
850            bracketed_span_ranges,
851            line_block_ranges,
852            pipe_table_caption_ranges,
853            pandoc_metadata_ranges,
854            grid_table_ranges,
855            multi_line_table_ranges,
856            shortcode_ranges,
857            link_title_ranges,
858            code_span_byte_ranges: code_span_ranges,
859            inline_config,
860            obsidian_comment_ranges,
861            lazy_cont_lines_cache: OnceLock::new(),
862            myst_directive_ranges,
863            myst_comment_ranges,
864            myst_role_ranges,
865        }
866    }
867
868    /// Binary search for whether `pos` falls inside any range in a sorted, non-overlapping
869    /// slice of `(start, end)` byte ranges. O(log n) instead of O(n).
870    #[inline]
871    fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
872        // Find the rightmost range whose start <= pos
873        let idx = ranges.partition_point(|&(start, _)| start <= pos);
874        // If idx == 0, no range starts at or before pos
875        idx > 0 && pos < ranges[idx - 1].1
876    }
877
878    /// Check if a byte position is within a code span. O(log n).
879    pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
880        Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
881    }
882
883    /// Check if `pos` is inside any link byte range. O(log n).
884    pub fn is_in_link(&self, pos: usize) -> bool {
885        let idx = self.links.partition_point(|link| link.byte_offset <= pos);
886        if idx > 0 && pos < self.links[idx - 1].byte_end {
887            return true;
888        }
889        let idx = self.images.partition_point(|img| img.byte_offset <= pos);
890        if idx > 0 && pos < self.images[idx - 1].byte_end {
891            return true;
892        }
893        self.is_in_reference_def(pos)
894    }
895
896    /// Get parsed inline configuration state.
897    pub fn inline_config(&self) -> &InlineConfig {
898        &self.inline_config
899    }
900
901    /// Byte ranges of Azure DevOps colon code fences (`:::lang … :::`).
902    /// Empty for all other flavors.
903    pub fn colon_fence_ranges(&self) -> &[(usize, usize)] {
904        &self.colon_fence_ranges
905    }
906
907    /// Get pre-split content lines, avoiding repeated `content.lines().collect()` allocations.
908    ///
909    /// Lines are 0-indexed (line 0 corresponds to line number 1 in the document).
910    pub fn raw_lines(&self) -> &[&'a str] {
911        &self.content_lines
912    }
913
914    /// Check if a rule is disabled at a specific line number (1-indexed)
915    ///
916    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
917    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
918    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
919        self.inline_config.is_rule_disabled(rule_name, line_number)
920    }
921
922    /// Get code spans - computed lazily on first access
923    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
924        Arc::clone(
925            self.code_spans_cache
926                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
927        )
928    }
929
930    /// Get math spans - computed lazily on first access
931    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
932        Arc::clone(
933            self.math_spans_cache
934                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
935        )
936    }
937
938    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
939    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
940        let math_spans = self.math_spans();
941        // Binary search: find the last span whose byte_offset <= byte_pos
942        let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
943        idx > 0 && byte_pos < math_spans[idx - 1].byte_end
944    }
945
946    /// Get HTML comment ranges - pre-computed during LintContext construction
947    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
948        &self.html_comment_ranges
949    }
950
951    /// Check if a byte position is inside an Obsidian comment
952    ///
953    /// Returns false for non-Obsidian flavors.
954    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
955        Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
956    }
957
958    /// Check if a line/column position is inside an Obsidian comment
959    ///
960    /// Line number is 1-indexed, column is 1-indexed.
961    /// Returns false for non-Obsidian flavors.
962    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
963        if self.obsidian_comment_ranges.is_empty() {
964            return false;
965        }
966
967        // Convert line/column (1-indexed, char-based) to byte position
968        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
969        self.is_in_obsidian_comment(byte_pos)
970    }
971
972    /// Get byte ranges of MyST colon directive blocks
973    pub fn myst_directive_ranges(&self) -> &[(usize, usize)] {
974        &self.myst_directive_ranges
975    }
976
977    /// Check if a byte position is inside a MyST role (`{role}`content``)
978    pub fn is_in_myst_role(&self, byte_pos: usize) -> bool {
979        Self::binary_search_ranges(&self.myst_role_ranges, byte_pos)
980    }
981
982    /// Check if a byte position is inside a MyST comment (`% comment`)
983    pub fn is_in_myst_comment(&self, byte_pos: usize) -> bool {
984        Self::binary_search_ranges(&self.myst_comment_ranges, byte_pos)
985    }
986
987    /// Check if a line (1-indexed) is a MyST colon-fence directive opener (`:::{name} ...`).
988    ///
989    /// The text after `{name}` on an opener is the directive's argument (an opaque
990    /// path, URL, or label), not markdown prose. Rules that reformat prose should
991    /// skip these lines. Returns false for non-MyST flavors and for directive body
992    /// or closer lines.
993    pub fn is_myst_colon_directive_opener_line(&self, line_num: usize) -> bool {
994        if !self.flavor.supports_myst_directives() {
995            return false;
996        }
997        self.lines.get(line_num.wrapping_sub(1)).is_some_and(|info| {
998            info.in_myst_directive
999                && flavor_detection::myst_colon_directive_opener(info.content(self.content)).is_some()
1000        })
1001    }
1002
1003    /// Get HTML tags - computed lazily on first access
1004    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
1005        Arc::clone(self.html_tags_cache.get_or_init(|| {
1006            let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
1007            // Filter out HTML tags inside kramdown extension blocks
1008            Arc::new(
1009                tags.into_iter()
1010                    .filter(|tag| {
1011                        !self
1012                            .lines
1013                            .get(tag.line - 1)
1014                            .is_some_and(|l| l.in_kramdown_extension_block)
1015                    })
1016                    .collect(),
1017            )
1018        }))
1019    }
1020
1021    /// Get emphasis spans - pre-computed during construction
1022    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
1023        Arc::clone(
1024            self.emphasis_spans_cache
1025                .get()
1026                .expect("emphasis_spans_cache initialized during construction"),
1027        )
1028    }
1029
1030    /// Get table rows - computed lazily on first access
1031    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
1032        Arc::clone(
1033            self.table_rows_cache
1034                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
1035        )
1036    }
1037
1038    /// Get bare URLs - computed lazily on first access
1039    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
1040        Arc::clone(self.bare_urls_cache.get_or_init(|| {
1041            Arc::new(element_parsers::parse_bare_urls(
1042                self.content,
1043                &self.lines,
1044                &self.code_blocks,
1045            ))
1046        }))
1047    }
1048
1049    /// Get lazy continuation lines - computed lazily on first access
1050    pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
1051        Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
1052            Arc::new(element_parsers::detect_lazy_continuation_lines(
1053                self.content,
1054                &self.lines,
1055                &self.line_offsets,
1056            ))
1057        }))
1058    }
1059
1060    /// Check if document has mixed ordered/unordered list nesting.
1061    /// Result is cached after first computation (document-level invariant).
1062    /// This is used by MD007 for smart style auto-detection.
1063    pub fn has_mixed_list_nesting(&self) -> bool {
1064        *self
1065            .has_mixed_list_nesting_cache
1066            .get_or_init(|| self.compute_mixed_list_nesting())
1067    }
1068
1069    /// Internal computation for mixed list nesting (only called once per LintContext).
1070    fn compute_mixed_list_nesting(&self) -> bool {
1071        // Track parent list items by their marker position and type
1072        // Using marker_column instead of indent because it works correctly
1073        // for blockquoted content where indent doesn't account for the prefix
1074        // Stack stores: (marker_column, is_ordered)
1075        let mut stack: Vec<(usize, bool)> = Vec::new();
1076        let mut last_was_blank = false;
1077
1078        for line_info in &self.lines {
1079            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
1080            if line_info.in_code_block
1081                || line_info.in_front_matter
1082                || line_info.in_mkdocstrings
1083                || line_info.in_html_comment
1084                || line_info.in_mdx_comment
1085                || line_info.in_esm_block
1086            {
1087                continue;
1088            }
1089
1090            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
1091            if line_info.is_blank {
1092                last_was_blank = true;
1093                continue;
1094            }
1095
1096            if let Some(list_item) = &line_info.list_item {
1097                // Normalize column 1 to column 0 (consistent with MD007 check function)
1098                let current_pos = if list_item.marker_column == 1 {
1099                    0
1100                } else {
1101                    list_item.marker_column
1102                };
1103
1104                // If there was a blank line and this item is at root level, reset stack
1105                if last_was_blank && current_pos == 0 {
1106                    stack.clear();
1107                }
1108                last_was_blank = false;
1109
1110                // Pop items at same or greater position (they're siblings or deeper, not parents)
1111                while let Some(&(pos, _)) = stack.last() {
1112                    if pos >= current_pos {
1113                        stack.pop();
1114                    } else {
1115                        break;
1116                    }
1117                }
1118
1119                // Check if immediate parent has different type - this is mixed nesting
1120                if let Some(&(_, parent_is_ordered)) = stack.last()
1121                    && parent_is_ordered != list_item.is_ordered
1122                {
1123                    return true; // Found mixed nesting - early exit
1124                }
1125
1126                stack.push((current_pos, list_item.is_ordered));
1127            } else {
1128                // Non-list line (but not blank) - could be paragraph or other content
1129                last_was_blank = false;
1130            }
1131        }
1132
1133        false
1134    }
1135
1136    /// Map a byte offset to (line, column)
1137    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1138        match self.line_offsets.binary_search(&offset) {
1139            Ok(line) => (line + 1, 1),
1140            Err(line) => {
1141                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1142                (line, offset - line_start + 1)
1143            }
1144        }
1145    }
1146
1147    /// Check if a position is within a code block or code span. O(log n).
1148    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1149        // Check code blocks first (already uses binary search internally)
1150        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1151            return true;
1152        }
1153
1154        // Check inline code spans via binary search
1155        self.is_byte_offset_in_code_span(pos)
1156    }
1157
1158    /// Get line information by line number (1-indexed)
1159    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1160        if line_num > 0 {
1161            self.lines.get(line_num - 1)
1162        } else {
1163            None
1164        }
1165    }
1166
1167    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1168    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1169        let normalized_id = ref_id.to_lowercase();
1170        self.reference_defs_map
1171            .get(&normalized_id)
1172            .map(|&idx| self.reference_defs[idx].url.as_str())
1173    }
1174
1175    /// Check if a line is part of a list block
1176    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1177        self.list_blocks
1178            .iter()
1179            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1180    }
1181
1182    /// Check if a line is within an HTML block
1183    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1184        if line_num == 0 || line_num > self.lines.len() {
1185            return false;
1186        }
1187        self.lines[line_num - 1].in_html_block
1188    }
1189
1190    /// Check if a 1-indexed line number is inside a GFM table block.
1191    ///
1192    /// Returns `true` for the header line, delimiter line, and all body rows.
1193    /// `TableBlock` spans are stored 0-indexed; this helper accepts the
1194    /// 1-indexed line numbers used elsewhere in the rule API.
1195    pub fn is_in_table_block(&self, line_num: usize) -> bool {
1196        if line_num == 0 {
1197            return false;
1198        }
1199        let line_idx = line_num - 1;
1200        self.table_blocks
1201            .iter()
1202            .any(|block| line_idx >= block.start_line && line_idx <= block.end_line)
1203    }
1204
1205    /// Check if a line and column is within a code span
1206    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1207        if line_num == 0 || line_num > self.lines.len() {
1208            return false;
1209        }
1210
1211        // Use the code spans cache to check
1212        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1213        // Convert col to 0-indexed for comparison
1214        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1215        let code_spans = self.code_spans();
1216        code_spans.iter().any(|span| {
1217            // Check if line is within the span's line range
1218            if line_num < span.line || line_num > span.end_line {
1219                return false;
1220            }
1221
1222            if span.line == span.end_line {
1223                // Single-line span: check column bounds
1224                col_0indexed >= span.start_col && col_0indexed < span.end_col
1225            } else if line_num == span.line {
1226                // First line of multi-line span: anything after start_col is in span
1227                col_0indexed >= span.start_col
1228            } else if line_num == span.end_line {
1229                // Last line of multi-line span: anything before end_col is in span
1230                col_0indexed < span.end_col
1231            } else {
1232                // Middle line of multi-line span: entire line is in span
1233                true
1234            }
1235        })
1236    }
1237
1238    /// Check if a byte offset is within a code span. O(log n).
1239    #[inline]
1240    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1241        let code_spans = self.code_spans();
1242        let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
1243        idx > 0 && byte_offset < code_spans[idx - 1].byte_end
1244    }
1245
1246    /// Check if a byte position is within a reference definition. O(log n).
1247    #[inline]
1248    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1249        let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
1250        idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
1251    }
1252
1253    /// Check if a byte position is within an HTML comment. O(log n).
1254    #[inline]
1255    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1256        let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1257        idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1258    }
1259
1260    /// Check if a byte position is within an HTML tag (including multiline tags).
1261    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines. O(log n).
1262    #[inline]
1263    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1264        let tags = self.html_tags();
1265        let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1266        idx > 0 && byte_pos < tags[idx - 1].byte_end
1267    }
1268
1269    /// Check if a byte position is within a Jinja template ({{ }} or {% %}). O(log n).
1270    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1271        Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1272    }
1273
1274    /// Check if a byte position is within a JSX expression (MDX: {expression}). O(log n).
1275    #[inline]
1276    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1277        Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1278    }
1279
1280    /// Check if a byte position is within an MDX comment ({/* ... */}). O(log n).
1281    #[inline]
1282    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1283        Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1284    }
1285
1286    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`).
1287    /// Active for Pandoc-compatible flavors. O(log n).
1288    #[inline]
1289    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1290        let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1291        idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1292    }
1293
1294    /// Pre-computed Pandoc/Quarto citation ranges.
1295    #[inline]
1296    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1297        &self.citation_ranges
1298    }
1299
1300    /// Check if a byte position is within a Pandoc/Quarto div block (`::: ... :::`).
1301    /// Active for Pandoc-compatible flavors. O(log n) via binary search over sorted ranges.
1302    #[inline]
1303    pub fn is_in_div_block(&self, byte_pos: usize) -> bool {
1304        let idx = self.pandoc_div_ranges.partition_point(|r| r.start <= byte_pos);
1305        idx > 0 && byte_pos < self.pandoc_div_ranges[idx - 1].end
1306    }
1307
1308    /// Check if a byte position is within a Pandoc inline footnote (`^[note text]`).
1309    /// Active for Pandoc-compatible flavors. O(log n).
1310    #[inline]
1311    pub fn is_in_inline_footnote(&self, byte_pos: usize) -> bool {
1312        let idx = self.inline_footnote_ranges.partition_point(|r| r.start <= byte_pos);
1313        idx > 0 && byte_pos < self.inline_footnote_ranges[idx - 1].end
1314    }
1315
1316    /// Check if a byte position is within a Pandoc example-list marker (`(@)` /
1317    /// `(@label)` at line start). Active for Pandoc-compatible flavors. O(log n).
1318    #[inline]
1319    pub fn is_in_example_list_marker(&self, byte_pos: usize) -> bool {
1320        let idx = self.example_list_marker_ranges.partition_point(|r| r.start <= byte_pos);
1321        idx > 0 && byte_pos < self.example_list_marker_ranges[idx - 1].end
1322    }
1323
1324    /// Check if a byte position is within a Pandoc example reference (`(@label)`
1325    /// inline). Active for Pandoc-compatible flavors. O(log n).
1326    #[inline]
1327    pub fn is_in_example_reference(&self, byte_pos: usize) -> bool {
1328        let idx = self.example_reference_ranges.partition_point(|r| r.start <= byte_pos);
1329        idx > 0 && byte_pos < self.example_reference_ranges[idx - 1].end
1330    }
1331
1332    /// Check if a byte position is within a Pandoc subscript (`~x~`) or
1333    /// superscript (`^x^`) span. Active for Pandoc-compatible flavors. O(log n).
1334    #[inline]
1335    pub fn is_in_subscript_or_superscript(&self, byte_pos: usize) -> bool {
1336        let idx = self.sub_super_ranges.partition_point(|r| r.start <= byte_pos);
1337        idx > 0 && byte_pos < self.sub_super_ranges[idx - 1].end
1338    }
1339
1340    /// Check if a byte position is within a Pandoc inline-code attribute block
1341    /// (`{.lang}` immediately following `` `code` ``). Active for Pandoc-compatible
1342    /// flavors. O(log n).
1343    #[inline]
1344    pub fn is_in_inline_code_attr(&self, byte_pos: usize) -> bool {
1345        let idx = self.inline_code_attr_ranges.partition_point(|r| r.start <= byte_pos);
1346        idx > 0 && byte_pos < self.inline_code_attr_ranges[idx - 1].end
1347    }
1348
1349    /// Check if a byte position is within a Pandoc bracketed span (`[text]{attrs}`).
1350    /// Active for Pandoc-compatible flavors. O(log n).
1351    #[inline]
1352    pub fn is_in_bracketed_span(&self, byte_pos: usize) -> bool {
1353        let idx = self.bracketed_span_ranges.partition_point(|r| r.start <= byte_pos);
1354        idx > 0 && byte_pos < self.bracketed_span_ranges[idx - 1].end
1355    }
1356
1357    /// Returns true if `byte_pos` falls inside a Pandoc line block (`| text`).
1358    /// Active for Pandoc-compatible flavors. O(log n).
1359    #[inline]
1360    pub fn is_in_line_block(&self, byte_pos: usize) -> bool {
1361        let idx = self.line_block_ranges.partition_point(|r| r.start <= byte_pos);
1362        idx > 0 && byte_pos < self.line_block_ranges[idx - 1].end
1363    }
1364
1365    /// Returns true if `byte_pos` falls inside a Pandoc pipe-table caption
1366    /// (`: caption` adjacent to a pipe table). Active for Pandoc-compatible
1367    /// flavors. O(log n).
1368    #[inline]
1369    pub fn is_in_pipe_table_caption(&self, byte_pos: usize) -> bool {
1370        let idx = self.pipe_table_caption_ranges.partition_point(|r| r.start <= byte_pos);
1371        idx > 0 && byte_pos < self.pipe_table_caption_ranges[idx - 1].end
1372    }
1373
1374    /// Returns true if `byte_pos` falls inside a Pandoc YAML metadata block.
1375    /// Active for Pandoc-compatible flavors. O(log n).
1376    #[inline]
1377    pub fn is_in_pandoc_metadata(&self, byte_pos: usize) -> bool {
1378        let idx = self.pandoc_metadata_ranges.partition_point(|r| r.start <= byte_pos);
1379        idx > 0 && byte_pos < self.pandoc_metadata_ranges[idx - 1].end
1380    }
1381
1382    /// Returns true if `byte_pos` falls inside a Pandoc grid table.
1383    /// Active for Pandoc-compatible flavors. O(log n).
1384    #[inline]
1385    pub fn is_in_grid_table(&self, byte_pos: usize) -> bool {
1386        let idx = self.grid_table_ranges.partition_point(|r| r.start <= byte_pos);
1387        idx > 0 && byte_pos < self.grid_table_ranges[idx - 1].end
1388    }
1389
1390    /// Returns true if `byte_pos` falls inside a Pandoc multi-line table.
1391    /// Active for Pandoc-compatible flavors. O(log n).
1392    #[inline]
1393    pub fn is_in_multi_line_table(&self, byte_pos: usize) -> bool {
1394        let idx = self.multi_line_table_ranges.partition_point(|r| r.start <= byte_pos);
1395        idx > 0 && byte_pos < self.multi_line_table_ranges[idx - 1].end
1396    }
1397
1398    /// Returns true if `link_text`, after Pandoc slugification, matches a heading
1399    /// in the document. Returns false for non-Pandoc-compatible flavors because
1400    /// the `pandoc_header_slugs` set is empty when the pre-pass detector is gated
1401    /// off. Use this when the caller has raw bracketed text (`[Section name]`).
1402    pub fn matches_implicit_header_reference(&self, link_text: &str) -> bool {
1403        let slug = crate::utils::pandoc::pandoc_header_slug(link_text);
1404        self.pandoc_header_slugs.contains(&slug)
1405    }
1406
1407    /// Returns true if `slug` (already in Pandoc-slug form) matches a heading
1408    /// in the document. Returns false for non-Pandoc-compatible flavors because
1409    /// the `pandoc_header_slugs` set is empty when the pre-pass detector is gated
1410    /// off. Use this when the caller already has a slug (e.g. the fragment of a
1411    /// URL after `#`). O(1).
1412    #[inline]
1413    pub fn has_pandoc_slug(&self, slug: &str) -> bool {
1414        self.pandoc_header_slugs.contains(slug)
1415    }
1416
1417    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}}). O(log n).
1418    #[inline]
1419    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1420        Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1421    }
1422
1423    /// Pre-computed Hugo/Quarto shortcode ranges.
1424    #[inline]
1425    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1426        &self.shortcode_ranges
1427    }
1428
1429    /// Check if a byte position is within a link reference definition title. O(log n).
1430    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1431        Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1432    }
1433
1434    /// Check if content has any instances of a specific character (fast)
1435    pub fn has_char(&self, ch: char) -> bool {
1436        match ch {
1437            '#' => self.char_frequency.hash_count > 0,
1438            '*' => self.char_frequency.asterisk_count > 0,
1439            '_' => self.char_frequency.underscore_count > 0,
1440            '-' => self.char_frequency.hyphen_count > 0,
1441            '+' => self.char_frequency.plus_count > 0,
1442            '>' => self.char_frequency.gt_count > 0,
1443            '|' => self.char_frequency.pipe_count > 0,
1444            '[' => self.char_frequency.bracket_count > 0,
1445            '`' => self.char_frequency.backtick_count > 0,
1446            '<' => self.char_frequency.lt_count > 0,
1447            '!' => self.char_frequency.exclamation_count > 0,
1448            '\n' => self.char_frequency.newline_count > 0,
1449            _ => self.content.contains(ch), // Fallback for other characters
1450        }
1451    }
1452
1453    /// Get count of a specific character (fast)
1454    pub fn char_count(&self, ch: char) -> usize {
1455        match ch {
1456            '#' => self.char_frequency.hash_count,
1457            '*' => self.char_frequency.asterisk_count,
1458            '_' => self.char_frequency.underscore_count,
1459            '-' => self.char_frequency.hyphen_count,
1460            '+' => self.char_frequency.plus_count,
1461            '>' => self.char_frequency.gt_count,
1462            '|' => self.char_frequency.pipe_count,
1463            '[' => self.char_frequency.bracket_count,
1464            '`' => self.char_frequency.backtick_count,
1465            '<' => self.char_frequency.lt_count,
1466            '!' => self.char_frequency.exclamation_count,
1467            '\n' => self.char_frequency.newline_count,
1468            _ => self.content.matches(ch).count(), // Fallback for other characters
1469        }
1470    }
1471
1472    /// Check if content likely contains headings (fast)
1473    pub fn likely_has_headings(&self) -> bool {
1474        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') // Setext H1 underlines use '='
1475    }
1476
1477    /// Check if content likely contains lists (fast)
1478    pub fn likely_has_lists(&self) -> bool {
1479        self.char_frequency.asterisk_count > 0
1480            || self.char_frequency.hyphen_count > 0
1481            || self.char_frequency.plus_count > 0
1482    }
1483
1484    /// Check if content likely contains emphasis (fast)
1485    pub fn likely_has_emphasis(&self) -> bool {
1486        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1487    }
1488
1489    /// Check if content likely contains tables (fast)
1490    pub fn likely_has_tables(&self) -> bool {
1491        self.char_frequency.pipe_count > 2
1492    }
1493
1494    /// Check if content likely contains blockquotes (fast)
1495    pub fn likely_has_blockquotes(&self) -> bool {
1496        self.char_frequency.gt_count > 0
1497    }
1498
1499    /// Check if content likely contains code (fast)
1500    pub fn likely_has_code(&self) -> bool {
1501        self.char_frequency.backtick_count > 0
1502    }
1503
1504    /// Check if content likely contains links or images (fast)
1505    pub fn likely_has_links_or_images(&self) -> bool {
1506        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1507    }
1508
1509    /// Check if content likely contains HTML (fast)
1510    pub fn likely_has_html(&self) -> bool {
1511        self.char_frequency.lt_count > 0
1512    }
1513
1514    /// Get the blockquote prefix for inserting a blank line at the given line index.
1515    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1516    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1517    /// Returns an empty string if the line is not inside a blockquote.
1518    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1519        if let Some(line_info) = self.lines.get(line_idx)
1520            && let Some(ref bq) = line_info.blockquote
1521        {
1522            bq.prefix.trim_end().to_string()
1523        } else {
1524            String::new()
1525        }
1526    }
1527
1528    /// Find the line index for a given byte offset using binary search.
1529    /// Returns (line_index, line_number, column) where:
1530    /// - line_index is the 0-based index in the lines array
1531    /// - line_number is the 1-based line number
1532    /// - column is the byte offset within that line
1533    #[inline]
1534    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1535        // Binary search to find the line containing this byte offset
1536        let idx = match lines.binary_search_by(|line| {
1537            if byte_offset < line.byte_offset {
1538                std::cmp::Ordering::Greater
1539            } else if byte_offset > line.byte_offset + line.byte_len {
1540                std::cmp::Ordering::Less
1541            } else {
1542                std::cmp::Ordering::Equal
1543            }
1544        }) {
1545            Ok(idx) => idx,
1546            Err(idx) => idx.saturating_sub(1),
1547        };
1548
1549        let line = &lines[idx];
1550        let line_num = idx + 1;
1551        let col = byte_offset.saturating_sub(line.byte_offset);
1552
1553        (idx, line_num, col)
1554    }
1555
1556    /// Check if a byte offset is within a code span using binary search
1557    #[inline]
1558    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1559        // Since spans are sorted by byte_offset, use partition_point for binary search
1560        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1561
1562        // Check the span that starts at or before our offset
1563        if idx > 0 {
1564            let span = &code_spans[idx - 1];
1565            if offset >= span.byte_offset && offset < span.byte_end {
1566                return true;
1567            }
1568        }
1569
1570        false
1571    }
1572
1573    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
1574    ///
1575    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
1576    /// This is the standard iterator for rules that need to process headings.
1577    ///
1578    /// # Examples
1579    ///
1580    /// ```
1581    /// use rumdl_lib::lint_context::LintContext;
1582    /// use rumdl_lib::config::MarkdownFlavor;
1583    ///
1584    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
1585    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1586    ///
1587    /// for heading in ctx.valid_headings() {
1588    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
1589    /// }
1590    /// // Only prints valid headings, skips `#NoSpace`
1591    /// ```
1592    #[must_use]
1593    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1594        ValidHeadingsIter::new(&self.lines)
1595    }
1596
1597    /// Check if the document contains any valid CommonMark headings
1598    ///
1599    /// Returns `true` if there is at least one heading with proper space after `#`.
1600    #[must_use]
1601    pub fn has_valid_headings(&self) -> bool {
1602        self.lines
1603            .iter()
1604            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1605    }
1606}
1607
1608/// Detect footnote definitions and mark their continuation lines.
1609///
1610/// Uses pulldown-cmark to find footnote definition ranges and fenced code
1611/// blocks within them, then:
1612/// 1. Sets `in_footnote_definition = true` on all lines within
1613/// 2. Clears `in_code_block = false` on continuation lines that were
1614///    misidentified as indented code blocks (but preserves real fenced
1615///    code blocks within footnotes)
1616fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1617    use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1618
1619    let options = crate::utils::rumdl_parser_options();
1620    let parser = Parser::new_ext(content, options).into_offset_iter();
1621
1622    // Collect footnote ranges and fenced code block ranges within them
1623    let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1624    let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1625    let mut in_footnote = false;
1626
1627    for (event, range) in parser {
1628        match event {
1629            Event::Start(Tag::FootnoteDefinition(_)) => {
1630                in_footnote = true;
1631                footnote_ranges.push((range.start, range.end));
1632            }
1633            Event::End(TagEnd::FootnoteDefinition) => {
1634                in_footnote = false;
1635            }
1636            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1637                fenced_code_ranges.push((range.start, range.end));
1638            }
1639            _ => {}
1640        }
1641    }
1642
1643    let byte_to_line = |byte_offset: usize| -> usize {
1644        line_offsets
1645            .partition_point(|&offset| offset <= byte_offset)
1646            .saturating_sub(1)
1647    };
1648
1649    // Mark footnote definition lines
1650    for &(start, end) in &footnote_ranges {
1651        let start_line = byte_to_line(start);
1652        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1653
1654        for line in &mut lines[start_line..end_line] {
1655            line.in_footnote_definition = true;
1656            line.in_code_block = false;
1657        }
1658    }
1659
1660    // Restore in_code_block for fenced code blocks within footnotes
1661    for &(start, end) in &fenced_code_ranges {
1662        let start_line = byte_to_line(start);
1663        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1664
1665        for line in &mut lines[start_line..end_line] {
1666            line.in_code_block = true;
1667        }
1668    }
1669}
rumdl_lib/lint_context/mod.rs

rumdl_lib/lint_context/
mod.rs