rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20/// Macro for profiling sections - only active in non-WASM builds
21#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23    ($name:expr, $profile:expr, $code:expr) => {{
24        let start = std::time::Instant::now();
25        let result = $code;
26        if $profile {
27            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28        }
29        result
30    }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38/// Grouped byte ranges for skip context detection
39/// Used to reduce parameter count in internal functions
40pub(super) struct SkipByteRanges<'a> {
41    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) pandoc_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
50pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52/// Type alias for byte ranges used in JSX expression and MDX comment detection
53pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56    pub content: &'a str,
57    content_lines: Vec<&'a str>, // Pre-split lines from content (avoids repeated allocations)
58    pub line_offsets: Vec<usize>,
59    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
60    pub code_block_details: Vec<CodeBlockDetail>, // Per-block metadata (fenced/indented, info string)
61    pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, // Pre-computed strong emphasis spans
62    pub line_to_list: crate::utils::code_block_utils::LineToListMap, // Ordered list membership by line
63    pub list_start_values: crate::utils::code_block_utils::ListStartValues, // Start values per list ID
64    pub lines: Vec<LineInfo>,             // Pre-computed line information
65    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
66    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
67    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
68    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
69    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
70    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
71    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
72    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
73    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
74    pub char_frequency: CharFrequency,    // Character frequency analysis
75    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
76    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
77    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
78    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
79    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
80    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
81    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
82    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
83    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
84    pub flavor: MarkdownFlavor,           // Markdown flavor being used
85    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
86    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
87    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
88    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (@key, [@key])
89    pandoc_div_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto div block ranges (::: ... :::)
90    colon_fence_ranges: Vec<(usize, usize)>, // Pre-computed Azure DevOps colon code fence ranges (:::lang ... :::)
91    inline_footnote_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc inline footnote ranges (^[...])
92    pandoc_header_slugs: std::collections::HashSet<String>, // Pre-computed Pandoc implicit header reference slugs
93    example_list_marker_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc example-list marker ranges (@) / (@label)
94    example_reference_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc example reference ranges (@label) inline
95    sub_super_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc subscript (~x~) and superscript (^x^) ranges
96    inline_code_attr_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc inline code attribute ranges (`code`{.lang})
97    bracketed_span_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc bracketed span ranges ([text]{attrs})
98    line_block_ranges: Vec<crate::utils::skip_context::ByteRange>,     // Pre-computed Pandoc line block ranges (| text)
99    pipe_table_caption_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc pipe-table caption ranges (: caption)
100    pandoc_metadata_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc YAML metadata block ranges (--- ... --- or ...)
101    grid_table_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc grid-table ranges (+---+---+)
102    multi_line_table_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc multi-line table ranges
103    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
104    link_title_ranges: Vec<(usize, usize)>, // Pre-computed sorted link title byte ranges
105    code_span_byte_ranges: Vec<(usize, usize)>, // Pre-computed code span byte ranges from pulldown-cmark
106    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
107    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
108    lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, // Lazy-loaded lazy continuation lines
109}
110
111impl<'a> LintContext<'a> {
112    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
113        #[cfg(not(target_arch = "wasm32"))]
114        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
115
116        let line_offsets = profile_section!("Line offsets", profile, {
117            let mut offsets = vec![0];
118            for (i, c) in content.char_indices() {
119                if c == '\n' {
120                    offsets.push(i + 1);
121                }
122            }
123            offsets
124        });
125
126        // Compute content_lines once for all functions that need it
127        let content_lines: Vec<&str> = content.lines().collect();
128
129        // Detect front matter boundaries once for all functions that need it
130        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
131
132        // Detect code blocks and code spans once and cache them
133        let parse_result = profile_section!(
134            "Code blocks",
135            profile,
136            CodeBlockUtils::detect_code_blocks_and_spans(content)
137        );
138        let mut code_blocks = parse_result.code_blocks;
139        let code_span_ranges = parse_result.code_spans;
140        let code_block_details = parse_result.code_block_details;
141        let strong_spans = parse_result.strong_spans;
142        let line_to_list = parse_result.line_to_list;
143        let list_start_values = parse_result.list_start_values;
144
145        // Pre-compute HTML comment ranges ONCE for all operations
146        let html_comment_ranges = profile_section!(
147            "HTML comment ranges",
148            profile,
149            crate::utils::skip_context::compute_html_comment_ranges(content)
150        );
151
152        // Pre-compute autodoc block ranges (avoids O(n^2) scaling)
153        // Detected for all flavors except AzureDevOps, where `:::` denotes code fences
154        // rather than autodoc directives.
155        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
156            if flavor.supports_colon_code_fences() {
157                Vec::new()
158            } else {
159                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
160            }
161        });
162
163        // Pre-compute Pandoc/Quarto div block ranges for Pandoc-compatible flavors
164        let pandoc_div_ranges = profile_section!("Pandoc div ranges", profile, {
165            if flavor.is_pandoc_compatible() {
166                crate::utils::pandoc::detect_div_block_ranges(content)
167            } else {
168                Vec::new()
169            }
170        });
171
172        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
173        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
174            if flavor == MarkdownFlavor::MkDocs {
175                crate::utils::pymdown_blocks::detect_block_ranges(content)
176            } else {
177                Vec::new()
178            }
179        });
180
181        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
182        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
183        let skip_ranges = SkipByteRanges {
184            html_comment_ranges: &html_comment_ranges,
185            autodoc_ranges: &autodoc_ranges,
186            pandoc_div_ranges: &pandoc_div_ranges,
187            pymdown_block_ranges: &pymdown_block_ranges,
188        };
189        let (mut lines, emphasis_spans) = profile_section!(
190            "Basic line info",
191            profile,
192            line_computation::compute_basic_line_info(
193                content,
194                &content_lines,
195                &line_offsets,
196                &code_blocks,
197                flavor,
198                &skip_ranges,
199                front_matter_end,
200            )
201        );
202
203        // Detect HTML blocks BEFORE heading detection
204        profile_section!(
205            "HTML blocks",
206            profile,
207            heading_detection::detect_html_blocks(content, &mut lines)
208        );
209
210        // Detect ESM import/export blocks in MDX files BEFORE heading detection
211        profile_section!(
212            "ESM blocks",
213            profile,
214            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
215        );
216
217        // Detect JSX component blocks in MDX files (e.g. <Tabs>...</Tabs>)
218        profile_section!(
219            "JSX block detection",
220            profile,
221            flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
222        );
223
224        // Detect JSX expressions and MDX comments in MDX files
225        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
226            "JSX/MDX detection",
227            profile,
228            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
229        );
230
231        // Detect `<div markdown>`-style HTML blocks (grid cards, etc.) regardless of flavor.
232        // The `markdown` attribute is an explicit, author-supplied signal; recognizing it
233        // in all flavors keeps `rumdl fmt` from mangling Material grid cards when the
234        // MkDocs flavor isn't active.
235        profile_section!(
236            "Markdown-in-HTML blocks",
237            profile,
238            flavor_detection::detect_markdown_html_blocks(&content_lines, &mut lines)
239        );
240
241        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
242        profile_section!(
243            "MkDocs constructs",
244            profile,
245            flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
246        );
247
248        // Detect footnote definitions and correct false code block detection.
249        // With ENABLE_FOOTNOTES, pulldown-cmark correctly parses multi-line
250        // footnotes, but the code block detector may still mark 4-space-indented
251        // footnote continuation lines as indented code blocks.
252        profile_section!(
253            "Footnote definitions",
254            profile,
255            detect_footnote_definitions(content, &mut lines, &line_offsets)
256        );
257
258        // Filter code_blocks to remove false positives from footnote continuation content.
259        // Same pattern as MkDocs/JSX corrections below.
260        {
261            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
262            for &(start, end) in &code_blocks {
263                let start_line = line_offsets
264                    .partition_point(|&offset| offset <= start)
265                    .saturating_sub(1);
266                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
267
268                let mut sub_start: Option<usize> = None;
269                for (i, &offset) in line_offsets[start_line..end_line]
270                    .iter()
271                    .enumerate()
272                    .map(|(j, o)| (j + start_line, o))
273                {
274                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
275                    if is_real_code && sub_start.is_none() {
276                        let byte_start = if i == start_line { start } else { offset };
277                        sub_start = Some(byte_start);
278                    } else if !is_real_code && sub_start.is_some() {
279                        new_code_blocks.push((sub_start.unwrap(), offset));
280                        sub_start = None;
281                    }
282                }
283                if let Some(s) = sub_start {
284                    new_code_blocks.push((s, end));
285                }
286            }
287            code_blocks = new_code_blocks;
288        }
289
290        // Filter code_blocks to remove false positives from MkDocs admonition/tab content
291        // and `<div markdown>` HTML blocks (grid cards).
292        // pulldown-cmark treats 4-space-indented content as indented code blocks, but inside
293        // these containers this is regular markdown content. detect_mkdocs_line_info and
294        // detect_markdown_html_blocks already corrected LineInfo.in_code_block for these lines,
295        // but the code_blocks byte ranges are still stale. We split ranges rather than using
296        // all-or-nothing removal, so fenced code blocks within the containers are preserved.
297        let has_markdown_html = lines.iter().any(|l| l.in_mkdocs_html_markdown);
298        if flavor == MarkdownFlavor::MkDocs || has_markdown_html {
299            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
300            for &(start, end) in &code_blocks {
301                let start_line = line_offsets
302                    .partition_point(|&offset| offset <= start)
303                    .saturating_sub(1);
304                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
305
306                // Walk lines in this range, collecting sub-ranges where in_code_block is true
307                let mut sub_start: Option<usize> = None;
308                for (i, &offset) in line_offsets[start_line..end_line]
309                    .iter()
310                    .enumerate()
311                    .map(|(j, o)| (j + start_line, o))
312                {
313                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
314                    if is_real_code && sub_start.is_none() {
315                        let byte_start = if i == start_line { start } else { offset };
316                        sub_start = Some(byte_start);
317                    } else if !is_real_code && sub_start.is_some() {
318                        new_code_blocks.push((sub_start.unwrap(), offset));
319                        sub_start = None;
320                    }
321                }
322                if let Some(s) = sub_start {
323                    new_code_blocks.push((s, end));
324                }
325            }
326            code_blocks = new_code_blocks;
327        }
328
329        // Filter code_blocks for MDX JSX blocks (same pattern as MkDocs above).
330        // detect_jsx_blocks already corrected LineInfo.in_code_block for indented content
331        // inside JSX component blocks, but code_blocks byte ranges need updating too.
332        if flavor.supports_jsx() {
333            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
334            for &(start, end) in &code_blocks {
335                let start_line = line_offsets
336                    .partition_point(|&offset| offset <= start)
337                    .saturating_sub(1);
338                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
339
340                let mut sub_start: Option<usize> = None;
341                for (i, &offset) in line_offsets[start_line..end_line]
342                    .iter()
343                    .enumerate()
344                    .map(|(j, o)| (j + start_line, o))
345                {
346                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
347                    if is_real_code && sub_start.is_none() {
348                        let byte_start = if i == start_line { start } else { offset };
349                        sub_start = Some(byte_start);
350                    } else if !is_real_code && sub_start.is_some() {
351                        new_code_blocks.push((sub_start.unwrap(), offset));
352                        sub_start = None;
353                    }
354                }
355                if let Some(s) = sub_start {
356                    new_code_blocks.push((s, end));
357                }
358            }
359            code_blocks = new_code_blocks;
360        }
361
362        // Detect Azure DevOps colon code fences and extend code_blocks so that
363        // all byte-range consumers correctly skip their content.
364        let colon_fence_ranges = profile_section!(
365            "Azure colon fence detection",
366            profile,
367            flavor_detection::detect_azure_colon_fences(content, &mut lines, flavor)
368        );
369        if !colon_fence_ranges.is_empty() {
370            code_blocks.extend(colon_fence_ranges.iter().copied());
371            code_blocks.sort_by_key(|&(start, _)| start);
372        }
373
374        // Detect kramdown constructs (extension blocks, IALs, ALDs) in kramdown flavor
375        profile_section!(
376            "Kramdown constructs",
377            profile,
378            flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
379        );
380
381        // Layer 1: Sanitize content-derived fields inside kramdown extension blocks
382        // so downstream heading detection and collection builders never see them.
383        // This must run BEFORE detect_headings_and_blockquotes to prevent headings
384        // from being populated inside extension blocks.
385        for line in &mut lines {
386            if line.in_kramdown_extension_block {
387                line.list_item = None;
388                line.is_horizontal_rule = false;
389                line.blockquote = None;
390                line.is_kramdown_block_ial = false;
391            }
392        }
393
394        // Detect Obsidian comments (%%...%%) in Obsidian flavor
395        let obsidian_comment_ranges = profile_section!(
396            "Obsidian comments",
397            profile,
398            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
399        );
400
401        // Run pulldown-cmark parse for links, images, and link byte ranges in a single pass.
402        // Link byte ranges are needed for heading detection; links/images are finalized later
403        // after code_spans are available.
404        let pulldown_result = profile_section!(
405            "Links, images & link ranges",
406            profile,
407            link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
408        );
409
410        // Now detect headings and blockquotes
411        profile_section!(
412            "Headings & blockquotes",
413            profile,
414            heading_detection::detect_headings_and_blockquotes(
415                &content_lines,
416                &mut lines,
417                flavor,
418                &html_comment_ranges,
419                &pulldown_result.link_byte_ranges,
420                front_matter_end,
421            )
422        );
423
424        // Clear headings that were detected inside kramdown extension blocks
425        for line in &mut lines {
426            if line.in_kramdown_extension_block {
427                line.heading = None;
428            }
429        }
430
431        // Parse code spans early so we can exclude them from link/image parsing
432        let mut code_spans = profile_section!(
433            "Code spans",
434            profile,
435            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
436        );
437
438        // Supplement code spans for MkDocs container content that pulldown-cmark missed.
439        // pulldown-cmark treats 4-space-indented MkDocs content as indented code blocks,
440        // so backtick code spans within admonitions/tabs/markdown HTML are invisible to it.
441        if flavor == MarkdownFlavor::MkDocs {
442            let extra = profile_section!(
443                "MkDocs code spans",
444                profile,
445                element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
446            );
447            if !extra.is_empty() {
448                code_spans.extend(extra);
449                code_spans.sort_by_key(|span| span.byte_offset);
450            }
451        }
452
453        // Supplement code spans for MDX JSX component body content that pulldown-cmark missed.
454        // pulldown-cmark treats JSX component opening tags (e.g. `<ParamField>`) as HTML block
455        // starters, so backtick code spans within component bodies are invisible to the initial
456        // parse.
457        if flavor == MarkdownFlavor::MDX {
458            let extra = profile_section!(
459                "MDX JSX code spans",
460                profile,
461                element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
462            );
463            if !extra.is_empty() {
464                code_spans.extend(extra);
465                code_spans.sort_by_key(|span| span.byte_offset);
466            }
467        }
468
469        // Mark lines that are continuations of multi-line code spans
470        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
471        for span in &code_spans {
472            if span.end_line > span.line {
473                // Mark lines after the first line as continuations
474                for line_num in (span.line + 1)..=span.end_line {
475                    if let Some(line_info) = lines.get_mut(line_num - 1) {
476                        line_info.in_code_span_continuation = true;
477                    }
478                }
479            }
480        }
481
482        // Finalize links and images: filter by code_spans and run regex fallbacks
483        let (links, images, broken_links, footnote_refs) = profile_section!(
484            "Links & images finalize",
485            profile,
486            link_parser::finalize_links_and_images(
487                content,
488                &lines,
489                &code_blocks,
490                &code_spans,
491                flavor,
492                &html_comment_ranges,
493                pulldown_result
494            )
495        );
496
497        let reference_defs = profile_section!(
498            "Reference defs",
499            profile,
500            link_parser::parse_reference_defs(content, &lines)
501        );
502
503        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
504
505        // Compute character frequency for fast content analysis
506        let char_frequency = profile_section!(
507            "Char frequency",
508            profile,
509            line_computation::compute_char_frequency(content)
510        );
511
512        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
513        let table_blocks = profile_section!(
514            "Table blocks",
515            profile,
516            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
517                content,
518                &code_blocks,
519                &code_spans,
520                &html_comment_ranges,
521            )
522        );
523
524        // Layer 2: Filter pre-computed collections to exclude items inside kramdown extension blocks.
525        // Rules that iterate these collections automatically skip kramdown content.
526        let links = links
527            .into_iter()
528            .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
529            .collect::<Vec<_>>();
530        let images = images
531            .into_iter()
532            .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
533            .collect::<Vec<_>>();
534        let broken_links = broken_links
535            .into_iter()
536            .filter(|bl| {
537                // BrokenLinkInfo has span but no line field; find line from byte offset
538                let line_idx = line_offsets
539                    .partition_point(|&offset| offset <= bl.span.start)
540                    .saturating_sub(1);
541                !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
542            })
543            .collect::<Vec<_>>();
544        let footnote_refs = footnote_refs
545            .into_iter()
546            .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
547            .collect::<Vec<_>>();
548        let reference_defs = reference_defs
549            .into_iter()
550            .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
551            .collect::<Vec<_>>();
552        let list_blocks = list_blocks
553            .into_iter()
554            .filter(|block| {
555                !lines
556                    .get(block.start_line - 1)
557                    .is_some_and(|l| l.in_kramdown_extension_block)
558            })
559            .collect::<Vec<_>>();
560        let table_blocks = table_blocks
561            .into_iter()
562            .filter(|block| {
563                // TableBlock.start_line is 0-indexed
564                !lines
565                    .get(block.start_line)
566                    .is_some_and(|l| l.in_kramdown_extension_block)
567            })
568            .collect::<Vec<_>>();
569        let emphasis_spans = emphasis_spans
570            .into_iter()
571            .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
572            .collect::<Vec<_>>();
573
574        // Rebuild reference_defs_map after filtering
575        let reference_defs_map: HashMap<String, usize> = reference_defs
576            .iter()
577            .enumerate()
578            .map(|(idx, def)| (def.id.to_lowercase(), idx))
579            .collect();
580
581        // Pre-compute sorted link title byte ranges for binary search
582        let link_title_ranges: Vec<(usize, usize)> = reference_defs
583            .iter()
584            .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
585                (Some(start), Some(end)) => Some((start, end)),
586                _ => None,
587            })
588            .collect();
589
590        // Reuse already-computed line_offsets and code_blocks instead of re-detecting
591        let line_index = profile_section!(
592            "Line index",
593            profile,
594            crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
595                content,
596                line_offsets.clone(),
597                &code_blocks,
598            )
599        );
600
601        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
602        let jinja_ranges = profile_section!(
603            "Jinja ranges",
604            profile,
605            crate::utils::jinja_utils::find_jinja_ranges(content)
606        );
607
608        // Pre-compute Pandoc/Quarto citation ranges for Pandoc-compatible flavors
609        let citation_ranges = profile_section!("Citation ranges", profile, {
610            if flavor.is_pandoc_compatible() {
611                crate::utils::pandoc::find_citation_ranges(content)
612            } else {
613                Vec::new()
614            }
615        });
616
617        // Pre-compute Pandoc inline footnote ranges for Pandoc-compatible flavors
618        let inline_footnote_ranges = profile_section!("Inline footnote ranges", profile, {
619            if flavor.is_pandoc_compatible() {
620                crate::utils::pandoc::detect_inline_footnote_ranges(content)
621            } else {
622                Vec::new()
623            }
624        });
625
626        // Pre-compute Pandoc implicit header reference slugs for Pandoc-compatible flavors
627        let pandoc_header_slugs = profile_section!("Pandoc header slugs", profile, {
628            if flavor.is_pandoc_compatible() {
629                crate::utils::pandoc::collect_pandoc_header_slugs(content)
630            } else {
631                std::collections::HashSet::new()
632            }
633        });
634
635        // Pre-compute Pandoc example-list marker ranges for Pandoc-compatible flavors
636        let example_list_marker_ranges = profile_section!("Example list markers", profile, {
637            if flavor.is_pandoc_compatible() {
638                crate::utils::pandoc::detect_example_list_marker_ranges(content)
639            } else {
640                Vec::new()
641            }
642        });
643
644        // Pre-compute Pandoc example reference ranges for Pandoc-compatible flavors
645        let example_reference_ranges = profile_section!("Example references", profile, {
646            if flavor.is_pandoc_compatible() {
647                crate::utils::pandoc::detect_example_reference_ranges(content, &example_list_marker_ranges)
648            } else {
649                Vec::new()
650            }
651        });
652
653        // Pre-compute Pandoc subscript (~x~) and superscript (^x^) ranges
654        let sub_super_ranges = profile_section!("Subscript/superscript ranges", profile, {
655            if flavor.is_pandoc_compatible() {
656                crate::utils::pandoc::detect_subscript_superscript_ranges(content)
657            } else {
658                Vec::new()
659            }
660        });
661
662        // Pre-compute Pandoc inline code attribute ranges (`code`{.lang}) for Pandoc-compatible flavors
663        let inline_code_attr_ranges = profile_section!("Inline code attribute ranges", profile, {
664            if flavor.is_pandoc_compatible() {
665                crate::utils::pandoc::detect_inline_code_attr_ranges(content)
666            } else {
667                Vec::new()
668            }
669        });
670
671        // Pre-compute Pandoc bracketed span ranges ([text]{attrs}) for Pandoc-compatible flavors
672        let bracketed_span_ranges = profile_section!("Bracketed span ranges", profile, {
673            if flavor.is_pandoc_compatible() {
674                crate::utils::pandoc::detect_bracketed_span_ranges(content)
675            } else {
676                Vec::new()
677            }
678        });
679
680        // Pre-compute Pandoc line block ranges (| text) for Pandoc-compatible flavors
681        let line_block_ranges = profile_section!("Line block ranges", profile, {
682            if flavor.is_pandoc_compatible() {
683                crate::utils::pandoc::detect_line_block_ranges(content)
684            } else {
685                Vec::new()
686            }
687        });
688
689        // Pre-compute Pandoc pipe-table caption ranges (: caption) for Pandoc-compatible flavors
690        let pipe_table_caption_ranges = profile_section!("Pipe-table caption ranges", profile, {
691            if flavor.is_pandoc_compatible() {
692                crate::utils::pandoc::detect_pipe_table_caption_ranges(content)
693            } else {
694                Vec::new()
695            }
696        });
697
698        // Pre-compute Pandoc YAML metadata block ranges (--- ... --- or ...) for Pandoc-compatible flavors
699        let pandoc_metadata_ranges = profile_section!("Pandoc metadata ranges", profile, {
700            if flavor.is_pandoc_compatible() {
701                crate::utils::pandoc::detect_yaml_metadata_block_ranges(content)
702            } else {
703                Vec::new()
704            }
705        });
706
707        // Pre-compute Pandoc grid-table ranges (+---+---+) for Pandoc-compatible flavors
708        let grid_table_ranges = profile_section!("Grid table ranges", profile, {
709            if flavor.is_pandoc_compatible() {
710                crate::utils::pandoc::detect_grid_table_ranges(content)
711            } else {
712                Vec::new()
713            }
714        });
715
716        // Pre-compute Pandoc multi-line table ranges for Pandoc-compatible flavors
717        let multi_line_table_ranges = profile_section!("Multi-line table ranges", profile, {
718            if flavor.is_pandoc_compatible() {
719                crate::utils::pandoc::detect_multi_line_table_ranges(content)
720            } else {
721                Vec::new()
722            }
723        });
724
725        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
726        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
727            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
728            let mut ranges = Vec::new();
729            for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
730                ranges.push((mat.start(), mat.end()));
731            }
732            ranges
733        });
734
735        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
736
737        Self {
738            content,
739            content_lines,
740            line_offsets,
741            code_blocks,
742            code_block_details,
743            strong_spans,
744            line_to_list,
745            list_start_values,
746            lines,
747            links,
748            images,
749            broken_links,
750            footnote_refs,
751            reference_defs,
752            reference_defs_map,
753            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
754            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
755            list_blocks,
756            char_frequency,
757            html_tags_cache: OnceLock::new(),
758            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
759            table_rows_cache: OnceLock::new(),
760            bare_urls_cache: OnceLock::new(),
761            has_mixed_list_nesting_cache: OnceLock::new(),
762            html_comment_ranges,
763            table_blocks,
764            line_index,
765            jinja_ranges,
766            flavor,
767            source_file,
768            jsx_expression_ranges,
769            mdx_comment_ranges,
770            citation_ranges,
771            pandoc_div_ranges,
772            colon_fence_ranges,
773            inline_footnote_ranges,
774            pandoc_header_slugs,
775            example_list_marker_ranges,
776            example_reference_ranges,
777            sub_super_ranges,
778            inline_code_attr_ranges,
779            bracketed_span_ranges,
780            line_block_ranges,
781            pipe_table_caption_ranges,
782            pandoc_metadata_ranges,
783            grid_table_ranges,
784            multi_line_table_ranges,
785            shortcode_ranges,
786            link_title_ranges,
787            code_span_byte_ranges: code_span_ranges,
788            inline_config,
789            obsidian_comment_ranges,
790            lazy_cont_lines_cache: OnceLock::new(),
791        }
792    }
793
794    /// Binary search for whether `pos` falls inside any range in a sorted, non-overlapping
795    /// slice of `(start, end)` byte ranges. O(log n) instead of O(n).
796    #[inline]
797    fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
798        // Find the rightmost range whose start <= pos
799        let idx = ranges.partition_point(|&(start, _)| start <= pos);
800        // If idx == 0, no range starts at or before pos
801        idx > 0 && pos < ranges[idx - 1].1
802    }
803
804    /// Check if a byte position is within a code span. O(log n).
805    pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
806        Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
807    }
808
809    /// Check if `pos` is inside any link byte range. O(log n).
810    pub fn is_in_link(&self, pos: usize) -> bool {
811        let idx = self.links.partition_point(|link| link.byte_offset <= pos);
812        if idx > 0 && pos < self.links[idx - 1].byte_end {
813            return true;
814        }
815        let idx = self.images.partition_point(|img| img.byte_offset <= pos);
816        if idx > 0 && pos < self.images[idx - 1].byte_end {
817            return true;
818        }
819        self.is_in_reference_def(pos)
820    }
821
822    /// Get parsed inline configuration state.
823    pub fn inline_config(&self) -> &InlineConfig {
824        &self.inline_config
825    }
826
827    /// Byte ranges of Azure DevOps colon code fences (`:::lang … :::`).
828    /// Empty for all other flavors.
829    pub fn colon_fence_ranges(&self) -> &[(usize, usize)] {
830        &self.colon_fence_ranges
831    }
832
833    /// Get pre-split content lines, avoiding repeated `content.lines().collect()` allocations.
834    ///
835    /// Lines are 0-indexed (line 0 corresponds to line number 1 in the document).
836    pub fn raw_lines(&self) -> &[&'a str] {
837        &self.content_lines
838    }
839
840    /// Check if a rule is disabled at a specific line number (1-indexed)
841    ///
842    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
843    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
844    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
845        self.inline_config.is_rule_disabled(rule_name, line_number)
846    }
847
848    /// Get code spans - computed lazily on first access
849    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
850        Arc::clone(
851            self.code_spans_cache
852                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
853        )
854    }
855
856    /// Get math spans - computed lazily on first access
857    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
858        Arc::clone(
859            self.math_spans_cache
860                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
861        )
862    }
863
864    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
865    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
866        let math_spans = self.math_spans();
867        // Binary search: find the last span whose byte_offset <= byte_pos
868        let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
869        idx > 0 && byte_pos < math_spans[idx - 1].byte_end
870    }
871
872    /// Get HTML comment ranges - pre-computed during LintContext construction
873    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
874        &self.html_comment_ranges
875    }
876
877    /// Check if a byte position is inside an Obsidian comment
878    ///
879    /// Returns false for non-Obsidian flavors.
880    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
881        Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
882    }
883
884    /// Check if a line/column position is inside an Obsidian comment
885    ///
886    /// Line number is 1-indexed, column is 1-indexed.
887    /// Returns false for non-Obsidian flavors.
888    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
889        if self.obsidian_comment_ranges.is_empty() {
890            return false;
891        }
892
893        // Convert line/column (1-indexed, char-based) to byte position
894        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
895        self.is_in_obsidian_comment(byte_pos)
896    }
897
898    /// Get HTML tags - computed lazily on first access
899    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
900        Arc::clone(self.html_tags_cache.get_or_init(|| {
901            let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
902            // Filter out HTML tags inside kramdown extension blocks
903            Arc::new(
904                tags.into_iter()
905                    .filter(|tag| {
906                        !self
907                            .lines
908                            .get(tag.line - 1)
909                            .is_some_and(|l| l.in_kramdown_extension_block)
910                    })
911                    .collect(),
912            )
913        }))
914    }
915
916    /// Get emphasis spans - pre-computed during construction
917    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
918        Arc::clone(
919            self.emphasis_spans_cache
920                .get()
921                .expect("emphasis_spans_cache initialized during construction"),
922        )
923    }
924
925    /// Get table rows - computed lazily on first access
926    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
927        Arc::clone(
928            self.table_rows_cache
929                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
930        )
931    }
932
933    /// Get bare URLs - computed lazily on first access
934    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
935        Arc::clone(self.bare_urls_cache.get_or_init(|| {
936            Arc::new(element_parsers::parse_bare_urls(
937                self.content,
938                &self.lines,
939                &self.code_blocks,
940            ))
941        }))
942    }
943
944    /// Get lazy continuation lines - computed lazily on first access
945    pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
946        Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
947            Arc::new(element_parsers::detect_lazy_continuation_lines(
948                self.content,
949                &self.lines,
950                &self.line_offsets,
951            ))
952        }))
953    }
954
955    /// Check if document has mixed ordered/unordered list nesting.
956    /// Result is cached after first computation (document-level invariant).
957    /// This is used by MD007 for smart style auto-detection.
958    pub fn has_mixed_list_nesting(&self) -> bool {
959        *self
960            .has_mixed_list_nesting_cache
961            .get_or_init(|| self.compute_mixed_list_nesting())
962    }
963
964    /// Internal computation for mixed list nesting (only called once per LintContext).
965    fn compute_mixed_list_nesting(&self) -> bool {
966        // Track parent list items by their marker position and type
967        // Using marker_column instead of indent because it works correctly
968        // for blockquoted content where indent doesn't account for the prefix
969        // Stack stores: (marker_column, is_ordered)
970        let mut stack: Vec<(usize, bool)> = Vec::new();
971        let mut last_was_blank = false;
972
973        for line_info in &self.lines {
974            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
975            if line_info.in_code_block
976                || line_info.in_front_matter
977                || line_info.in_mkdocstrings
978                || line_info.in_html_comment
979                || line_info.in_mdx_comment
980                || line_info.in_esm_block
981            {
982                continue;
983            }
984
985            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
986            if line_info.is_blank {
987                last_was_blank = true;
988                continue;
989            }
990
991            if let Some(list_item) = &line_info.list_item {
992                // Normalize column 1 to column 0 (consistent with MD007 check function)
993                let current_pos = if list_item.marker_column == 1 {
994                    0
995                } else {
996                    list_item.marker_column
997                };
998
999                // If there was a blank line and this item is at root level, reset stack
1000                if last_was_blank && current_pos == 0 {
1001                    stack.clear();
1002                }
1003                last_was_blank = false;
1004
1005                // Pop items at same or greater position (they're siblings or deeper, not parents)
1006                while let Some(&(pos, _)) = stack.last() {
1007                    if pos >= current_pos {
1008                        stack.pop();
1009                    } else {
1010                        break;
1011                    }
1012                }
1013
1014                // Check if immediate parent has different type - this is mixed nesting
1015                if let Some(&(_, parent_is_ordered)) = stack.last()
1016                    && parent_is_ordered != list_item.is_ordered
1017                {
1018                    return true; // Found mixed nesting - early exit
1019                }
1020
1021                stack.push((current_pos, list_item.is_ordered));
1022            } else {
1023                // Non-list line (but not blank) - could be paragraph or other content
1024                last_was_blank = false;
1025            }
1026        }
1027
1028        false
1029    }
1030
1031    /// Map a byte offset to (line, column)
1032    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1033        match self.line_offsets.binary_search(&offset) {
1034            Ok(line) => (line + 1, 1),
1035            Err(line) => {
1036                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1037                (line, offset - line_start + 1)
1038            }
1039        }
1040    }
1041
1042    /// Check if a position is within a code block or code span. O(log n).
1043    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1044        // Check code blocks first (already uses binary search internally)
1045        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1046            return true;
1047        }
1048
1049        // Check inline code spans via binary search
1050        self.is_byte_offset_in_code_span(pos)
1051    }
1052
1053    /// Get line information by line number (1-indexed)
1054    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1055        if line_num > 0 {
1056            self.lines.get(line_num - 1)
1057        } else {
1058            None
1059        }
1060    }
1061
1062    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1063    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1064        let normalized_id = ref_id.to_lowercase();
1065        self.reference_defs_map
1066            .get(&normalized_id)
1067            .map(|&idx| self.reference_defs[idx].url.as_str())
1068    }
1069
1070    /// Check if a line is part of a list block
1071    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1072        self.list_blocks
1073            .iter()
1074            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1075    }
1076
1077    /// Check if a line is within an HTML block
1078    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1079        if line_num == 0 || line_num > self.lines.len() {
1080            return false;
1081        }
1082        self.lines[line_num - 1].in_html_block
1083    }
1084
1085    /// Check if a 1-indexed line number is inside a GFM table block.
1086    ///
1087    /// Returns `true` for the header line, delimiter line, and all body rows.
1088    /// `TableBlock` spans are stored 0-indexed; this helper accepts the
1089    /// 1-indexed line numbers used elsewhere in the rule API.
1090    pub fn is_in_table_block(&self, line_num: usize) -> bool {
1091        if line_num == 0 {
1092            return false;
1093        }
1094        let line_idx = line_num - 1;
1095        self.table_blocks
1096            .iter()
1097            .any(|block| line_idx >= block.start_line && line_idx <= block.end_line)
1098    }
1099
1100    /// Check if a line and column is within a code span
1101    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1102        if line_num == 0 || line_num > self.lines.len() {
1103            return false;
1104        }
1105
1106        // Use the code spans cache to check
1107        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1108        // Convert col to 0-indexed for comparison
1109        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1110        let code_spans = self.code_spans();
1111        code_spans.iter().any(|span| {
1112            // Check if line is within the span's line range
1113            if line_num < span.line || line_num > span.end_line {
1114                return false;
1115            }
1116
1117            if span.line == span.end_line {
1118                // Single-line span: check column bounds
1119                col_0indexed >= span.start_col && col_0indexed < span.end_col
1120            } else if line_num == span.line {
1121                // First line of multi-line span: anything after start_col is in span
1122                col_0indexed >= span.start_col
1123            } else if line_num == span.end_line {
1124                // Last line of multi-line span: anything before end_col is in span
1125                col_0indexed < span.end_col
1126            } else {
1127                // Middle line of multi-line span: entire line is in span
1128                true
1129            }
1130        })
1131    }
1132
1133    /// Check if a byte offset is within a code span. O(log n).
1134    #[inline]
1135    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1136        let code_spans = self.code_spans();
1137        let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
1138        idx > 0 && byte_offset < code_spans[idx - 1].byte_end
1139    }
1140
1141    /// Check if a byte position is within a reference definition. O(log n).
1142    #[inline]
1143    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1144        let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
1145        idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
1146    }
1147
1148    /// Check if a byte position is within an HTML comment. O(log n).
1149    #[inline]
1150    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1151        let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1152        idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1153    }
1154
1155    /// Check if a byte position is within an HTML tag (including multiline tags).
1156    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines. O(log n).
1157    #[inline]
1158    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1159        let tags = self.html_tags();
1160        let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1161        idx > 0 && byte_pos < tags[idx - 1].byte_end
1162    }
1163
1164    /// Check if a byte position is within a Jinja template ({{ }} or {% %}). O(log n).
1165    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1166        Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1167    }
1168
1169    /// Check if a byte position is within a JSX expression (MDX: {expression}). O(log n).
1170    #[inline]
1171    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1172        Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1173    }
1174
1175    /// Check if a byte position is within an MDX comment ({/* ... */}). O(log n).
1176    #[inline]
1177    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1178        Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1179    }
1180
1181    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`).
1182    /// Active for Pandoc-compatible flavors. O(log n).
1183    #[inline]
1184    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1185        let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1186        idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1187    }
1188
1189    /// Pre-computed Pandoc/Quarto citation ranges.
1190    #[inline]
1191    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1192        &self.citation_ranges
1193    }
1194
1195    /// Check if a byte position is within a Pandoc/Quarto div block (`::: ... :::`).
1196    /// Active for Pandoc-compatible flavors. O(log n) via binary search over sorted ranges.
1197    #[inline]
1198    pub fn is_in_div_block(&self, byte_pos: usize) -> bool {
1199        let idx = self.pandoc_div_ranges.partition_point(|r| r.start <= byte_pos);
1200        idx > 0 && byte_pos < self.pandoc_div_ranges[idx - 1].end
1201    }
1202
1203    /// Check if a byte position is within a Pandoc inline footnote (`^[note text]`).
1204    /// Active for Pandoc-compatible flavors. O(log n).
1205    #[inline]
1206    pub fn is_in_inline_footnote(&self, byte_pos: usize) -> bool {
1207        let idx = self.inline_footnote_ranges.partition_point(|r| r.start <= byte_pos);
1208        idx > 0 && byte_pos < self.inline_footnote_ranges[idx - 1].end
1209    }
1210
1211    /// Check if a byte position is within a Pandoc example-list marker (`(@)` /
1212    /// `(@label)` at line start). Active for Pandoc-compatible flavors. O(log n).
1213    #[inline]
1214    pub fn is_in_example_list_marker(&self, byte_pos: usize) -> bool {
1215        let idx = self.example_list_marker_ranges.partition_point(|r| r.start <= byte_pos);
1216        idx > 0 && byte_pos < self.example_list_marker_ranges[idx - 1].end
1217    }
1218
1219    /// Check if a byte position is within a Pandoc example reference (`(@label)`
1220    /// inline). Active for Pandoc-compatible flavors. O(log n).
1221    #[inline]
1222    pub fn is_in_example_reference(&self, byte_pos: usize) -> bool {
1223        let idx = self.example_reference_ranges.partition_point(|r| r.start <= byte_pos);
1224        idx > 0 && byte_pos < self.example_reference_ranges[idx - 1].end
1225    }
1226
1227    /// Check if a byte position is within a Pandoc subscript (`~x~`) or
1228    /// superscript (`^x^`) span. Active for Pandoc-compatible flavors. O(log n).
1229    #[inline]
1230    pub fn is_in_subscript_or_superscript(&self, byte_pos: usize) -> bool {
1231        let idx = self.sub_super_ranges.partition_point(|r| r.start <= byte_pos);
1232        idx > 0 && byte_pos < self.sub_super_ranges[idx - 1].end
1233    }
1234
1235    /// Check if a byte position is within a Pandoc inline-code attribute block
1236    /// (`{.lang}` immediately following `` `code` ``). Active for Pandoc-compatible
1237    /// flavors. O(log n).
1238    #[inline]
1239    pub fn is_in_inline_code_attr(&self, byte_pos: usize) -> bool {
1240        let idx = self.inline_code_attr_ranges.partition_point(|r| r.start <= byte_pos);
1241        idx > 0 && byte_pos < self.inline_code_attr_ranges[idx - 1].end
1242    }
1243
1244    /// Check if a byte position is within a Pandoc bracketed span (`[text]{attrs}`).
1245    /// Active for Pandoc-compatible flavors. O(log n).
1246    #[inline]
1247    pub fn is_in_bracketed_span(&self, byte_pos: usize) -> bool {
1248        let idx = self.bracketed_span_ranges.partition_point(|r| r.start <= byte_pos);
1249        idx > 0 && byte_pos < self.bracketed_span_ranges[idx - 1].end
1250    }
1251
1252    /// Returns true if `byte_pos` falls inside a Pandoc line block (`| text`).
1253    /// Active for Pandoc-compatible flavors. O(log n).
1254    #[inline]
1255    pub fn is_in_line_block(&self, byte_pos: usize) -> bool {
1256        let idx = self.line_block_ranges.partition_point(|r| r.start <= byte_pos);
1257        idx > 0 && byte_pos < self.line_block_ranges[idx - 1].end
1258    }
1259
1260    /// Returns true if `byte_pos` falls inside a Pandoc pipe-table caption
1261    /// (`: caption` adjacent to a pipe table). Active for Pandoc-compatible
1262    /// flavors. O(log n).
1263    #[inline]
1264    pub fn is_in_pipe_table_caption(&self, byte_pos: usize) -> bool {
1265        let idx = self.pipe_table_caption_ranges.partition_point(|r| r.start <= byte_pos);
1266        idx > 0 && byte_pos < self.pipe_table_caption_ranges[idx - 1].end
1267    }
1268
1269    /// Returns true if `byte_pos` falls inside a Pandoc YAML metadata block.
1270    /// Active for Pandoc-compatible flavors. O(log n).
1271    #[inline]
1272    pub fn is_in_pandoc_metadata(&self, byte_pos: usize) -> bool {
1273        let idx = self.pandoc_metadata_ranges.partition_point(|r| r.start <= byte_pos);
1274        idx > 0 && byte_pos < self.pandoc_metadata_ranges[idx - 1].end
1275    }
1276
1277    /// Returns true if `byte_pos` falls inside a Pandoc grid table.
1278    /// Active for Pandoc-compatible flavors. O(log n).
1279    #[inline]
1280    pub fn is_in_grid_table(&self, byte_pos: usize) -> bool {
1281        let idx = self.grid_table_ranges.partition_point(|r| r.start <= byte_pos);
1282        idx > 0 && byte_pos < self.grid_table_ranges[idx - 1].end
1283    }
1284
1285    /// Returns true if `byte_pos` falls inside a Pandoc multi-line table.
1286    /// Active for Pandoc-compatible flavors. O(log n).
1287    #[inline]
1288    pub fn is_in_multi_line_table(&self, byte_pos: usize) -> bool {
1289        let idx = self.multi_line_table_ranges.partition_point(|r| r.start <= byte_pos);
1290        idx > 0 && byte_pos < self.multi_line_table_ranges[idx - 1].end
1291    }
1292
1293    /// Returns true if `link_text`, after Pandoc slugification, matches a heading
1294    /// in the document. Returns false for non-Pandoc-compatible flavors because
1295    /// the `pandoc_header_slugs` set is empty when the pre-pass detector is gated
1296    /// off. Use this when the caller has raw bracketed text (`[Section name]`).
1297    pub fn matches_implicit_header_reference(&self, link_text: &str) -> bool {
1298        let slug = crate::utils::pandoc::pandoc_header_slug(link_text);
1299        self.pandoc_header_slugs.contains(&slug)
1300    }
1301
1302    /// Returns true if `slug` (already in Pandoc-slug form) matches a heading
1303    /// in the document. Returns false for non-Pandoc-compatible flavors because
1304    /// the `pandoc_header_slugs` set is empty when the pre-pass detector is gated
1305    /// off. Use this when the caller already has a slug (e.g. the fragment of a
1306    /// URL after `#`). O(1).
1307    #[inline]
1308    pub fn has_pandoc_slug(&self, slug: &str) -> bool {
1309        self.pandoc_header_slugs.contains(slug)
1310    }
1311
1312    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}}). O(log n).
1313    #[inline]
1314    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1315        Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1316    }
1317
1318    /// Pre-computed Hugo/Quarto shortcode ranges.
1319    #[inline]
1320    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1321        &self.shortcode_ranges
1322    }
1323
1324    /// Check if a byte position is within a link reference definition title. O(log n).
1325    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1326        Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1327    }
1328
1329    /// Check if content has any instances of a specific character (fast)
1330    pub fn has_char(&self, ch: char) -> bool {
1331        match ch {
1332            '#' => self.char_frequency.hash_count > 0,
1333            '*' => self.char_frequency.asterisk_count > 0,
1334            '_' => self.char_frequency.underscore_count > 0,
1335            '-' => self.char_frequency.hyphen_count > 0,
1336            '+' => self.char_frequency.plus_count > 0,
1337            '>' => self.char_frequency.gt_count > 0,
1338            '|' => self.char_frequency.pipe_count > 0,
1339            '[' => self.char_frequency.bracket_count > 0,
1340            '`' => self.char_frequency.backtick_count > 0,
1341            '<' => self.char_frequency.lt_count > 0,
1342            '!' => self.char_frequency.exclamation_count > 0,
1343            '\n' => self.char_frequency.newline_count > 0,
1344            _ => self.content.contains(ch), // Fallback for other characters
1345        }
1346    }
1347
1348    /// Get count of a specific character (fast)
1349    pub fn char_count(&self, ch: char) -> usize {
1350        match ch {
1351            '#' => self.char_frequency.hash_count,
1352            '*' => self.char_frequency.asterisk_count,
1353            '_' => self.char_frequency.underscore_count,
1354            '-' => self.char_frequency.hyphen_count,
1355            '+' => self.char_frequency.plus_count,
1356            '>' => self.char_frequency.gt_count,
1357            '|' => self.char_frequency.pipe_count,
1358            '[' => self.char_frequency.bracket_count,
1359            '`' => self.char_frequency.backtick_count,
1360            '<' => self.char_frequency.lt_count,
1361            '!' => self.char_frequency.exclamation_count,
1362            '\n' => self.char_frequency.newline_count,
1363            _ => self.content.matches(ch).count(), // Fallback for other characters
1364        }
1365    }
1366
1367    /// Check if content likely contains headings (fast)
1368    pub fn likely_has_headings(&self) -> bool {
1369        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') // Setext H1 underlines use '='
1370    }
1371
1372    /// Check if content likely contains lists (fast)
1373    pub fn likely_has_lists(&self) -> bool {
1374        self.char_frequency.asterisk_count > 0
1375            || self.char_frequency.hyphen_count > 0
1376            || self.char_frequency.plus_count > 0
1377    }
1378
1379    /// Check if content likely contains emphasis (fast)
1380    pub fn likely_has_emphasis(&self) -> bool {
1381        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1382    }
1383
1384    /// Check if content likely contains tables (fast)
1385    pub fn likely_has_tables(&self) -> bool {
1386        self.char_frequency.pipe_count > 2
1387    }
1388
1389    /// Check if content likely contains blockquotes (fast)
1390    pub fn likely_has_blockquotes(&self) -> bool {
1391        self.char_frequency.gt_count > 0
1392    }
1393
1394    /// Check if content likely contains code (fast)
1395    pub fn likely_has_code(&self) -> bool {
1396        self.char_frequency.backtick_count > 0
1397    }
1398
1399    /// Check if content likely contains links or images (fast)
1400    pub fn likely_has_links_or_images(&self) -> bool {
1401        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1402    }
1403
1404    /// Check if content likely contains HTML (fast)
1405    pub fn likely_has_html(&self) -> bool {
1406        self.char_frequency.lt_count > 0
1407    }
1408
1409    /// Get the blockquote prefix for inserting a blank line at the given line index.
1410    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1411    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1412    /// Returns an empty string if the line is not inside a blockquote.
1413    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1414        if let Some(line_info) = self.lines.get(line_idx)
1415            && let Some(ref bq) = line_info.blockquote
1416        {
1417            bq.prefix.trim_end().to_string()
1418        } else {
1419            String::new()
1420        }
1421    }
1422
1423    /// Find the line index for a given byte offset using binary search.
1424    /// Returns (line_index, line_number, column) where:
1425    /// - line_index is the 0-based index in the lines array
1426    /// - line_number is the 1-based line number
1427    /// - column is the byte offset within that line
1428    #[inline]
1429    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1430        // Binary search to find the line containing this byte offset
1431        let idx = match lines.binary_search_by(|line| {
1432            if byte_offset < line.byte_offset {
1433                std::cmp::Ordering::Greater
1434            } else if byte_offset > line.byte_offset + line.byte_len {
1435                std::cmp::Ordering::Less
1436            } else {
1437                std::cmp::Ordering::Equal
1438            }
1439        }) {
1440            Ok(idx) => idx,
1441            Err(idx) => idx.saturating_sub(1),
1442        };
1443
1444        let line = &lines[idx];
1445        let line_num = idx + 1;
1446        let col = byte_offset.saturating_sub(line.byte_offset);
1447
1448        (idx, line_num, col)
1449    }
1450
1451    /// Check if a byte offset is within a code span using binary search
1452    #[inline]
1453    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1454        // Since spans are sorted by byte_offset, use partition_point for binary search
1455        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1456
1457        // Check the span that starts at or before our offset
1458        if idx > 0 {
1459            let span = &code_spans[idx - 1];
1460            if offset >= span.byte_offset && offset < span.byte_end {
1461                return true;
1462            }
1463        }
1464
1465        false
1466    }
1467
1468    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
1469    ///
1470    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
1471    /// This is the standard iterator for rules that need to process headings.
1472    ///
1473    /// # Examples
1474    ///
1475    /// ```
1476    /// use rumdl_lib::lint_context::LintContext;
1477    /// use rumdl_lib::config::MarkdownFlavor;
1478    ///
1479    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
1480    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1481    ///
1482    /// for heading in ctx.valid_headings() {
1483    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
1484    /// }
1485    /// // Only prints valid headings, skips `#NoSpace`
1486    /// ```
1487    #[must_use]
1488    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1489        ValidHeadingsIter::new(&self.lines)
1490    }
1491
1492    /// Check if the document contains any valid CommonMark headings
1493    ///
1494    /// Returns `true` if there is at least one heading with proper space after `#`.
1495    #[must_use]
1496    pub fn has_valid_headings(&self) -> bool {
1497        self.lines
1498            .iter()
1499            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1500    }
1501}
1502
1503/// Detect footnote definitions and mark their continuation lines.
1504///
1505/// Uses pulldown-cmark to find footnote definition ranges and fenced code
1506/// blocks within them, then:
1507/// 1. Sets `in_footnote_definition = true` on all lines within
1508/// 2. Clears `in_code_block = false` on continuation lines that were
1509///    misidentified as indented code blocks (but preserves real fenced
1510///    code blocks within footnotes)
1511fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1512    use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1513
1514    let options = crate::utils::rumdl_parser_options();
1515    let parser = Parser::new_ext(content, options).into_offset_iter();
1516
1517    // Collect footnote ranges and fenced code block ranges within them
1518    let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1519    let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1520    let mut in_footnote = false;
1521
1522    for (event, range) in parser {
1523        match event {
1524            Event::Start(Tag::FootnoteDefinition(_)) => {
1525                in_footnote = true;
1526                footnote_ranges.push((range.start, range.end));
1527            }
1528            Event::End(TagEnd::FootnoteDefinition) => {
1529                in_footnote = false;
1530            }
1531            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1532                fenced_code_ranges.push((range.start, range.end));
1533            }
1534            _ => {}
1535        }
1536    }
1537
1538    let byte_to_line = |byte_offset: usize| -> usize {
1539        line_offsets
1540            .partition_point(|&offset| offset <= byte_offset)
1541            .saturating_sub(1)
1542    };
1543
1544    // Mark footnote definition lines
1545    for &(start, end) in &footnote_ranges {
1546        let start_line = byte_to_line(start);
1547        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1548
1549        for line in &mut lines[start_line..end_line] {
1550            line.in_footnote_definition = true;
1551            line.in_code_block = false;
1552        }
1553    }
1554
1555    // Restore in_code_block for fenced code blocks within footnotes
1556    for &(start, end) in &fenced_code_ranges {
1557        let start_line = byte_to_line(start);
1558        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1559
1560        for line in &mut lines[start_line..end_line] {
1561            line.in_code_block = true;
1562        }
1563    }
1564}
rumdl_lib/lint_context/mod.rs

rumdl_lib/lint_context/
mod.rs