rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20/// Macro for profiling sections - only active in non-WASM builds
21#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23    ($name:expr, $profile:expr, $code:expr) => {{
24        let start = std::time::Instant::now();
25        let result = $code;
26        if $profile {
27            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28        }
29        result
30    }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38/// Grouped byte ranges for skip context detection
39/// Used to reduce parameter count in internal functions
40pub(super) struct SkipByteRanges<'a> {
41    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) pandoc_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
50pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52/// Type alias for byte ranges used in JSX expression and MDX comment detection
53pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56    pub content: &'a str,
57    content_lines: Vec<&'a str>, // Pre-split lines from content (avoids repeated allocations)
58    pub line_offsets: Vec<usize>,
59    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
60    pub code_block_details: Vec<CodeBlockDetail>, // Per-block metadata (fenced/indented, info string)
61    pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, // Pre-computed strong emphasis spans
62    pub line_to_list: crate::utils::code_block_utils::LineToListMap, // Ordered list membership by line
63    pub list_start_values: crate::utils::code_block_utils::ListStartValues, // Start values per list ID
64    pub lines: Vec<LineInfo>,             // Pre-computed line information
65    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
66    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
67    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
68    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
69    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
70    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
71    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
72    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
73    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
74    pub char_frequency: CharFrequency,    // Character frequency analysis
75    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
76    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
77    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
78    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
79    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
80    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
81    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
82    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
83    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
84    pub flavor: MarkdownFlavor,           // Markdown flavor being used
85    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
86    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
87    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
88    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (@key, [@key])
89    pandoc_div_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto div block ranges (::: ... :::)
90    inline_footnote_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc inline footnote ranges (^[...])
91    pandoc_header_slugs: std::collections::HashSet<String>, // Pre-computed Pandoc implicit header reference slugs
92    example_list_marker_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc example-list marker ranges (@) / (@label)
93    example_reference_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc example reference ranges (@label) inline
94    sub_super_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc subscript (~x~) and superscript (^x^) ranges
95    inline_code_attr_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc inline code attribute ranges (`code`{.lang})
96    bracketed_span_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc bracketed span ranges ([text]{attrs})
97    line_block_ranges: Vec<crate::utils::skip_context::ByteRange>,     // Pre-computed Pandoc line block ranges (| text)
98    pipe_table_caption_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc pipe-table caption ranges (: caption)
99    pandoc_metadata_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc YAML metadata block ranges (--- ... --- or ...)
100    grid_table_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc grid-table ranges (+---+---+)
101    multi_line_table_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc multi-line table ranges
102    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
103    link_title_ranges: Vec<(usize, usize)>, // Pre-computed sorted link title byte ranges
104    code_span_byte_ranges: Vec<(usize, usize)>, // Pre-computed code span byte ranges from pulldown-cmark
105    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
106    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
107    lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, // Lazy-loaded lazy continuation lines
108}
109
110impl<'a> LintContext<'a> {
111    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
112        #[cfg(not(target_arch = "wasm32"))]
113        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
114
115        let line_offsets = profile_section!("Line offsets", profile, {
116            let mut offsets = vec![0];
117            for (i, c) in content.char_indices() {
118                if c == '\n' {
119                    offsets.push(i + 1);
120                }
121            }
122            offsets
123        });
124
125        // Compute content_lines once for all functions that need it
126        let content_lines: Vec<&str> = content.lines().collect();
127
128        // Detect front matter boundaries once for all functions that need it
129        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
130
131        // Detect code blocks and code spans once and cache them
132        let parse_result = profile_section!(
133            "Code blocks",
134            profile,
135            CodeBlockUtils::detect_code_blocks_and_spans(content)
136        );
137        let mut code_blocks = parse_result.code_blocks;
138        let code_span_ranges = parse_result.code_spans;
139        let code_block_details = parse_result.code_block_details;
140        let strong_spans = parse_result.strong_spans;
141        let line_to_list = parse_result.line_to_list;
142        let list_start_values = parse_result.list_start_values;
143
144        // Pre-compute HTML comment ranges ONCE for all operations
145        let html_comment_ranges = profile_section!(
146            "HTML comment ranges",
147            profile,
148            crate::utils::skip_context::compute_html_comment_ranges(content)
149        );
150
151        // Pre-compute autodoc block ranges (avoids O(n^2) scaling)
152        // Detected for all flavors: `:::` blocks are structurally unique and should
153        // never be reflowed as prose, even without MkDocs flavor.
154        let autodoc_ranges = profile_section!(
155            "Autodoc block ranges",
156            profile,
157            crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
158        );
159
160        // Pre-compute Pandoc/Quarto div block ranges for Pandoc-compatible flavors
161        let pandoc_div_ranges = profile_section!("Pandoc div ranges", profile, {
162            if flavor.is_pandoc_compatible() {
163                crate::utils::pandoc::detect_div_block_ranges(content)
164            } else {
165                Vec::new()
166            }
167        });
168
169        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
170        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
171            if flavor == MarkdownFlavor::MkDocs {
172                crate::utils::pymdown_blocks::detect_block_ranges(content)
173            } else {
174                Vec::new()
175            }
176        });
177
178        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
179        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
180        let skip_ranges = SkipByteRanges {
181            html_comment_ranges: &html_comment_ranges,
182            autodoc_ranges: &autodoc_ranges,
183            pandoc_div_ranges: &pandoc_div_ranges,
184            pymdown_block_ranges: &pymdown_block_ranges,
185        };
186        let (mut lines, emphasis_spans) = profile_section!(
187            "Basic line info",
188            profile,
189            line_computation::compute_basic_line_info(
190                content,
191                &content_lines,
192                &line_offsets,
193                &code_blocks,
194                flavor,
195                &skip_ranges,
196                front_matter_end,
197            )
198        );
199
200        // Detect HTML blocks BEFORE heading detection
201        profile_section!(
202            "HTML blocks",
203            profile,
204            heading_detection::detect_html_blocks(content, &mut lines)
205        );
206
207        // Detect ESM import/export blocks in MDX files BEFORE heading detection
208        profile_section!(
209            "ESM blocks",
210            profile,
211            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
212        );
213
214        // Detect JSX component blocks in MDX files (e.g. <Tabs>...</Tabs>)
215        profile_section!(
216            "JSX block detection",
217            profile,
218            flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
219        );
220
221        // Detect JSX expressions and MDX comments in MDX files
222        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
223            "JSX/MDX detection",
224            profile,
225            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
226        );
227
228        // Detect `<div markdown>`-style HTML blocks (grid cards, etc.) regardless of flavor.
229        // The `markdown` attribute is an explicit, author-supplied signal; recognizing it
230        // in all flavors keeps `rumdl fmt` from mangling Material grid cards when the
231        // MkDocs flavor isn't active.
232        profile_section!(
233            "Markdown-in-HTML blocks",
234            profile,
235            flavor_detection::detect_markdown_html_blocks(&content_lines, &mut lines)
236        );
237
238        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
239        profile_section!(
240            "MkDocs constructs",
241            profile,
242            flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
243        );
244
245        // Detect footnote definitions and correct false code block detection.
246        // With ENABLE_FOOTNOTES, pulldown-cmark correctly parses multi-line
247        // footnotes, but the code block detector may still mark 4-space-indented
248        // footnote continuation lines as indented code blocks.
249        profile_section!(
250            "Footnote definitions",
251            profile,
252            detect_footnote_definitions(content, &mut lines, &line_offsets)
253        );
254
255        // Filter code_blocks to remove false positives from footnote continuation content.
256        // Same pattern as MkDocs/JSX corrections below.
257        {
258            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
259            for &(start, end) in &code_blocks {
260                let start_line = line_offsets
261                    .partition_point(|&offset| offset <= start)
262                    .saturating_sub(1);
263                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
264
265                let mut sub_start: Option<usize> = None;
266                for (i, &offset) in line_offsets[start_line..end_line]
267                    .iter()
268                    .enumerate()
269                    .map(|(j, o)| (j + start_line, o))
270                {
271                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
272                    if is_real_code && sub_start.is_none() {
273                        let byte_start = if i == start_line { start } else { offset };
274                        sub_start = Some(byte_start);
275                    } else if !is_real_code && sub_start.is_some() {
276                        new_code_blocks.push((sub_start.unwrap(), offset));
277                        sub_start = None;
278                    }
279                }
280                if let Some(s) = sub_start {
281                    new_code_blocks.push((s, end));
282                }
283            }
284            code_blocks = new_code_blocks;
285        }
286
287        // Filter code_blocks to remove false positives from MkDocs admonition/tab content
288        // and `<div markdown>` HTML blocks (grid cards).
289        // pulldown-cmark treats 4-space-indented content as indented code blocks, but inside
290        // these containers this is regular markdown content. detect_mkdocs_line_info and
291        // detect_markdown_html_blocks already corrected LineInfo.in_code_block for these lines,
292        // but the code_blocks byte ranges are still stale. We split ranges rather than using
293        // all-or-nothing removal, so fenced code blocks within the containers are preserved.
294        let has_markdown_html = lines.iter().any(|l| l.in_mkdocs_html_markdown);
295        if flavor == MarkdownFlavor::MkDocs || has_markdown_html {
296            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
297            for &(start, end) in &code_blocks {
298                let start_line = line_offsets
299                    .partition_point(|&offset| offset <= start)
300                    .saturating_sub(1);
301                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
302
303                // Walk lines in this range, collecting sub-ranges where in_code_block is true
304                let mut sub_start: Option<usize> = None;
305                for (i, &offset) in line_offsets[start_line..end_line]
306                    .iter()
307                    .enumerate()
308                    .map(|(j, o)| (j + start_line, o))
309                {
310                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
311                    if is_real_code && sub_start.is_none() {
312                        let byte_start = if i == start_line { start } else { offset };
313                        sub_start = Some(byte_start);
314                    } else if !is_real_code && sub_start.is_some() {
315                        new_code_blocks.push((sub_start.unwrap(), offset));
316                        sub_start = None;
317                    }
318                }
319                if let Some(s) = sub_start {
320                    new_code_blocks.push((s, end));
321                }
322            }
323            code_blocks = new_code_blocks;
324        }
325
326        // Filter code_blocks for MDX JSX blocks (same pattern as MkDocs above).
327        // detect_jsx_blocks already corrected LineInfo.in_code_block for indented content
328        // inside JSX component blocks, but code_blocks byte ranges need updating too.
329        if flavor.supports_jsx() {
330            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
331            for &(start, end) in &code_blocks {
332                let start_line = line_offsets
333                    .partition_point(|&offset| offset <= start)
334                    .saturating_sub(1);
335                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
336
337                let mut sub_start: Option<usize> = None;
338                for (i, &offset) in line_offsets[start_line..end_line]
339                    .iter()
340                    .enumerate()
341                    .map(|(j, o)| (j + start_line, o))
342                {
343                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
344                    if is_real_code && sub_start.is_none() {
345                        let byte_start = if i == start_line { start } else { offset };
346                        sub_start = Some(byte_start);
347                    } else if !is_real_code && sub_start.is_some() {
348                        new_code_blocks.push((sub_start.unwrap(), offset));
349                        sub_start = None;
350                    }
351                }
352                if let Some(s) = sub_start {
353                    new_code_blocks.push((s, end));
354                }
355            }
356            code_blocks = new_code_blocks;
357        }
358
359        // Detect kramdown constructs (extension blocks, IALs, ALDs) in kramdown flavor
360        profile_section!(
361            "Kramdown constructs",
362            profile,
363            flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
364        );
365
366        // Layer 1: Sanitize content-derived fields inside kramdown extension blocks
367        // so downstream heading detection and collection builders never see them.
368        // This must run BEFORE detect_headings_and_blockquotes to prevent headings
369        // from being populated inside extension blocks.
370        for line in &mut lines {
371            if line.in_kramdown_extension_block {
372                line.list_item = None;
373                line.is_horizontal_rule = false;
374                line.blockquote = None;
375                line.is_kramdown_block_ial = false;
376            }
377        }
378
379        // Detect Obsidian comments (%%...%%) in Obsidian flavor
380        let obsidian_comment_ranges = profile_section!(
381            "Obsidian comments",
382            profile,
383            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
384        );
385
386        // Run pulldown-cmark parse for links, images, and link byte ranges in a single pass.
387        // Link byte ranges are needed for heading detection; links/images are finalized later
388        // after code_spans are available.
389        let pulldown_result = profile_section!(
390            "Links, images & link ranges",
391            profile,
392            link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
393        );
394
395        // Now detect headings and blockquotes
396        profile_section!(
397            "Headings & blockquotes",
398            profile,
399            heading_detection::detect_headings_and_blockquotes(
400                &content_lines,
401                &mut lines,
402                flavor,
403                &html_comment_ranges,
404                &pulldown_result.link_byte_ranges,
405                front_matter_end,
406            )
407        );
408
409        // Clear headings that were detected inside kramdown extension blocks
410        for line in &mut lines {
411            if line.in_kramdown_extension_block {
412                line.heading = None;
413            }
414        }
415
416        // Parse code spans early so we can exclude them from link/image parsing
417        let mut code_spans = profile_section!(
418            "Code spans",
419            profile,
420            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
421        );
422
423        // Supplement code spans for MkDocs container content that pulldown-cmark missed.
424        // pulldown-cmark treats 4-space-indented MkDocs content as indented code blocks,
425        // so backtick code spans within admonitions/tabs/markdown HTML are invisible to it.
426        if flavor == MarkdownFlavor::MkDocs {
427            let extra = profile_section!(
428                "MkDocs code spans",
429                profile,
430                element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
431            );
432            if !extra.is_empty() {
433                code_spans.extend(extra);
434                code_spans.sort_by_key(|span| span.byte_offset);
435            }
436        }
437
438        // Supplement code spans for MDX JSX component body content that pulldown-cmark missed.
439        // pulldown-cmark treats JSX component opening tags (e.g. `<ParamField>`) as HTML block
440        // starters, so backtick code spans within component bodies are invisible to the initial
441        // parse.
442        if flavor == MarkdownFlavor::MDX {
443            let extra = profile_section!(
444                "MDX JSX code spans",
445                profile,
446                element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
447            );
448            if !extra.is_empty() {
449                code_spans.extend(extra);
450                code_spans.sort_by_key(|span| span.byte_offset);
451            }
452        }
453
454        // Mark lines that are continuations of multi-line code spans
455        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
456        for span in &code_spans {
457            if span.end_line > span.line {
458                // Mark lines after the first line as continuations
459                for line_num in (span.line + 1)..=span.end_line {
460                    if let Some(line_info) = lines.get_mut(line_num - 1) {
461                        line_info.in_code_span_continuation = true;
462                    }
463                }
464            }
465        }
466
467        // Finalize links and images: filter by code_spans and run regex fallbacks
468        let (links, images, broken_links, footnote_refs) = profile_section!(
469            "Links & images finalize",
470            profile,
471            link_parser::finalize_links_and_images(
472                content,
473                &lines,
474                &code_blocks,
475                &code_spans,
476                flavor,
477                &html_comment_ranges,
478                pulldown_result
479            )
480        );
481
482        let reference_defs = profile_section!(
483            "Reference defs",
484            profile,
485            link_parser::parse_reference_defs(content, &lines)
486        );
487
488        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
489
490        // Compute character frequency for fast content analysis
491        let char_frequency = profile_section!(
492            "Char frequency",
493            profile,
494            line_computation::compute_char_frequency(content)
495        );
496
497        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
498        let table_blocks = profile_section!(
499            "Table blocks",
500            profile,
501            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
502                content,
503                &code_blocks,
504                &code_spans,
505                &html_comment_ranges,
506            )
507        );
508
509        // Layer 2: Filter pre-computed collections to exclude items inside kramdown extension blocks.
510        // Rules that iterate these collections automatically skip kramdown content.
511        let links = links
512            .into_iter()
513            .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
514            .collect::<Vec<_>>();
515        let images = images
516            .into_iter()
517            .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
518            .collect::<Vec<_>>();
519        let broken_links = broken_links
520            .into_iter()
521            .filter(|bl| {
522                // BrokenLinkInfo has span but no line field; find line from byte offset
523                let line_idx = line_offsets
524                    .partition_point(|&offset| offset <= bl.span.start)
525                    .saturating_sub(1);
526                !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
527            })
528            .collect::<Vec<_>>();
529        let footnote_refs = footnote_refs
530            .into_iter()
531            .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
532            .collect::<Vec<_>>();
533        let reference_defs = reference_defs
534            .into_iter()
535            .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
536            .collect::<Vec<_>>();
537        let list_blocks = list_blocks
538            .into_iter()
539            .filter(|block| {
540                !lines
541                    .get(block.start_line - 1)
542                    .is_some_and(|l| l.in_kramdown_extension_block)
543            })
544            .collect::<Vec<_>>();
545        let table_blocks = table_blocks
546            .into_iter()
547            .filter(|block| {
548                // TableBlock.start_line is 0-indexed
549                !lines
550                    .get(block.start_line)
551                    .is_some_and(|l| l.in_kramdown_extension_block)
552            })
553            .collect::<Vec<_>>();
554        let emphasis_spans = emphasis_spans
555            .into_iter()
556            .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
557            .collect::<Vec<_>>();
558
559        // Rebuild reference_defs_map after filtering
560        let reference_defs_map: HashMap<String, usize> = reference_defs
561            .iter()
562            .enumerate()
563            .map(|(idx, def)| (def.id.to_lowercase(), idx))
564            .collect();
565
566        // Pre-compute sorted link title byte ranges for binary search
567        let link_title_ranges: Vec<(usize, usize)> = reference_defs
568            .iter()
569            .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
570                (Some(start), Some(end)) => Some((start, end)),
571                _ => None,
572            })
573            .collect();
574
575        // Reuse already-computed line_offsets and code_blocks instead of re-detecting
576        let line_index = profile_section!(
577            "Line index",
578            profile,
579            crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
580                content,
581                line_offsets.clone(),
582                &code_blocks,
583            )
584        );
585
586        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
587        let jinja_ranges = profile_section!(
588            "Jinja ranges",
589            profile,
590            crate::utils::jinja_utils::find_jinja_ranges(content)
591        );
592
593        // Pre-compute Pandoc/Quarto citation ranges for Pandoc-compatible flavors
594        let citation_ranges = profile_section!("Citation ranges", profile, {
595            if flavor.is_pandoc_compatible() {
596                crate::utils::pandoc::find_citation_ranges(content)
597            } else {
598                Vec::new()
599            }
600        });
601
602        // Pre-compute Pandoc inline footnote ranges for Pandoc-compatible flavors
603        let inline_footnote_ranges = profile_section!("Inline footnote ranges", profile, {
604            if flavor.is_pandoc_compatible() {
605                crate::utils::pandoc::detect_inline_footnote_ranges(content)
606            } else {
607                Vec::new()
608            }
609        });
610
611        // Pre-compute Pandoc implicit header reference slugs for Pandoc-compatible flavors
612        let pandoc_header_slugs = profile_section!("Pandoc header slugs", profile, {
613            if flavor.is_pandoc_compatible() {
614                crate::utils::pandoc::collect_pandoc_header_slugs(content)
615            } else {
616                std::collections::HashSet::new()
617            }
618        });
619
620        // Pre-compute Pandoc example-list marker ranges for Pandoc-compatible flavors
621        let example_list_marker_ranges = profile_section!("Example list markers", profile, {
622            if flavor.is_pandoc_compatible() {
623                crate::utils::pandoc::detect_example_list_marker_ranges(content)
624            } else {
625                Vec::new()
626            }
627        });
628
629        // Pre-compute Pandoc example reference ranges for Pandoc-compatible flavors
630        let example_reference_ranges = profile_section!("Example references", profile, {
631            if flavor.is_pandoc_compatible() {
632                crate::utils::pandoc::detect_example_reference_ranges(content, &example_list_marker_ranges)
633            } else {
634                Vec::new()
635            }
636        });
637
638        // Pre-compute Pandoc subscript (~x~) and superscript (^x^) ranges
639        let sub_super_ranges = profile_section!("Subscript/superscript ranges", profile, {
640            if flavor.is_pandoc_compatible() {
641                crate::utils::pandoc::detect_subscript_superscript_ranges(content)
642            } else {
643                Vec::new()
644            }
645        });
646
647        // Pre-compute Pandoc inline code attribute ranges (`code`{.lang}) for Pandoc-compatible flavors
648        let inline_code_attr_ranges = profile_section!("Inline code attribute ranges", profile, {
649            if flavor.is_pandoc_compatible() {
650                crate::utils::pandoc::detect_inline_code_attr_ranges(content)
651            } else {
652                Vec::new()
653            }
654        });
655
656        // Pre-compute Pandoc bracketed span ranges ([text]{attrs}) for Pandoc-compatible flavors
657        let bracketed_span_ranges = profile_section!("Bracketed span ranges", profile, {
658            if flavor.is_pandoc_compatible() {
659                crate::utils::pandoc::detect_bracketed_span_ranges(content)
660            } else {
661                Vec::new()
662            }
663        });
664
665        // Pre-compute Pandoc line block ranges (| text) for Pandoc-compatible flavors
666        let line_block_ranges = profile_section!("Line block ranges", profile, {
667            if flavor.is_pandoc_compatible() {
668                crate::utils::pandoc::detect_line_block_ranges(content)
669            } else {
670                Vec::new()
671            }
672        });
673
674        // Pre-compute Pandoc pipe-table caption ranges (: caption) for Pandoc-compatible flavors
675        let pipe_table_caption_ranges = profile_section!("Pipe-table caption ranges", profile, {
676            if flavor.is_pandoc_compatible() {
677                crate::utils::pandoc::detect_pipe_table_caption_ranges(content)
678            } else {
679                Vec::new()
680            }
681        });
682
683        // Pre-compute Pandoc YAML metadata block ranges (--- ... --- or ...) for Pandoc-compatible flavors
684        let pandoc_metadata_ranges = profile_section!("Pandoc metadata ranges", profile, {
685            if flavor.is_pandoc_compatible() {
686                crate::utils::pandoc::detect_yaml_metadata_block_ranges(content)
687            } else {
688                Vec::new()
689            }
690        });
691
692        // Pre-compute Pandoc grid-table ranges (+---+---+) for Pandoc-compatible flavors
693        let grid_table_ranges = profile_section!("Grid table ranges", profile, {
694            if flavor.is_pandoc_compatible() {
695                crate::utils::pandoc::detect_grid_table_ranges(content)
696            } else {
697                Vec::new()
698            }
699        });
700
701        // Pre-compute Pandoc multi-line table ranges for Pandoc-compatible flavors
702        let multi_line_table_ranges = profile_section!("Multi-line table ranges", profile, {
703            if flavor.is_pandoc_compatible() {
704                crate::utils::pandoc::detect_multi_line_table_ranges(content)
705            } else {
706                Vec::new()
707            }
708        });
709
710        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
711        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
712            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
713            let mut ranges = Vec::new();
714            for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
715                ranges.push((mat.start(), mat.end()));
716            }
717            ranges
718        });
719
720        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
721
722        Self {
723            content,
724            content_lines,
725            line_offsets,
726            code_blocks,
727            code_block_details,
728            strong_spans,
729            line_to_list,
730            list_start_values,
731            lines,
732            links,
733            images,
734            broken_links,
735            footnote_refs,
736            reference_defs,
737            reference_defs_map,
738            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
739            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
740            list_blocks,
741            char_frequency,
742            html_tags_cache: OnceLock::new(),
743            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
744            table_rows_cache: OnceLock::new(),
745            bare_urls_cache: OnceLock::new(),
746            has_mixed_list_nesting_cache: OnceLock::new(),
747            html_comment_ranges,
748            table_blocks,
749            line_index,
750            jinja_ranges,
751            flavor,
752            source_file,
753            jsx_expression_ranges,
754            mdx_comment_ranges,
755            citation_ranges,
756            pandoc_div_ranges,
757            inline_footnote_ranges,
758            pandoc_header_slugs,
759            example_list_marker_ranges,
760            example_reference_ranges,
761            sub_super_ranges,
762            inline_code_attr_ranges,
763            bracketed_span_ranges,
764            line_block_ranges,
765            pipe_table_caption_ranges,
766            pandoc_metadata_ranges,
767            grid_table_ranges,
768            multi_line_table_ranges,
769            shortcode_ranges,
770            link_title_ranges,
771            code_span_byte_ranges: code_span_ranges,
772            inline_config,
773            obsidian_comment_ranges,
774            lazy_cont_lines_cache: OnceLock::new(),
775        }
776    }
777
778    /// Binary search for whether `pos` falls inside any range in a sorted, non-overlapping
779    /// slice of `(start, end)` byte ranges. O(log n) instead of O(n).
780    #[inline]
781    fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
782        // Find the rightmost range whose start <= pos
783        let idx = ranges.partition_point(|&(start, _)| start <= pos);
784        // If idx == 0, no range starts at or before pos
785        idx > 0 && pos < ranges[idx - 1].1
786    }
787
788    /// Check if a byte position is within a code span. O(log n).
789    pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
790        Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
791    }
792
793    /// Check if `pos` is inside any link byte range. O(log n).
794    pub fn is_in_link(&self, pos: usize) -> bool {
795        let idx = self.links.partition_point(|link| link.byte_offset <= pos);
796        if idx > 0 && pos < self.links[idx - 1].byte_end {
797            return true;
798        }
799        let idx = self.images.partition_point(|img| img.byte_offset <= pos);
800        if idx > 0 && pos < self.images[idx - 1].byte_end {
801            return true;
802        }
803        self.is_in_reference_def(pos)
804    }
805
806    /// Get parsed inline configuration state.
807    pub fn inline_config(&self) -> &InlineConfig {
808        &self.inline_config
809    }
810
811    /// Get pre-split content lines, avoiding repeated `content.lines().collect()` allocations.
812    ///
813    /// Lines are 0-indexed (line 0 corresponds to line number 1 in the document).
814    pub fn raw_lines(&self) -> &[&'a str] {
815        &self.content_lines
816    }
817
818    /// Check if a rule is disabled at a specific line number (1-indexed)
819    ///
820    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
821    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
822    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
823        self.inline_config.is_rule_disabled(rule_name, line_number)
824    }
825
826    /// Get code spans - computed lazily on first access
827    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
828        Arc::clone(
829            self.code_spans_cache
830                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
831        )
832    }
833
834    /// Get math spans - computed lazily on first access
835    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
836        Arc::clone(
837            self.math_spans_cache
838                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
839        )
840    }
841
842    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
843    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
844        let math_spans = self.math_spans();
845        // Binary search: find the last span whose byte_offset <= byte_pos
846        let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
847        idx > 0 && byte_pos < math_spans[idx - 1].byte_end
848    }
849
850    /// Get HTML comment ranges - pre-computed during LintContext construction
851    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
852        &self.html_comment_ranges
853    }
854
855    /// Check if a byte position is inside an Obsidian comment
856    ///
857    /// Returns false for non-Obsidian flavors.
858    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
859        Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
860    }
861
862    /// Check if a line/column position is inside an Obsidian comment
863    ///
864    /// Line number is 1-indexed, column is 1-indexed.
865    /// Returns false for non-Obsidian flavors.
866    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
867        if self.obsidian_comment_ranges.is_empty() {
868            return false;
869        }
870
871        // Convert line/column (1-indexed, char-based) to byte position
872        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
873        self.is_in_obsidian_comment(byte_pos)
874    }
875
876    /// Get HTML tags - computed lazily on first access
877    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
878        Arc::clone(self.html_tags_cache.get_or_init(|| {
879            let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
880            // Filter out HTML tags inside kramdown extension blocks
881            Arc::new(
882                tags.into_iter()
883                    .filter(|tag| {
884                        !self
885                            .lines
886                            .get(tag.line - 1)
887                            .is_some_and(|l| l.in_kramdown_extension_block)
888                    })
889                    .collect(),
890            )
891        }))
892    }
893
894    /// Get emphasis spans - pre-computed during construction
895    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
896        Arc::clone(
897            self.emphasis_spans_cache
898                .get()
899                .expect("emphasis_spans_cache initialized during construction"),
900        )
901    }
902
903    /// Get table rows - computed lazily on first access
904    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
905        Arc::clone(
906            self.table_rows_cache
907                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
908        )
909    }
910
911    /// Get bare URLs - computed lazily on first access
912    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
913        Arc::clone(self.bare_urls_cache.get_or_init(|| {
914            Arc::new(element_parsers::parse_bare_urls(
915                self.content,
916                &self.lines,
917                &self.code_blocks,
918            ))
919        }))
920    }
921
922    /// Get lazy continuation lines - computed lazily on first access
923    pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
924        Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
925            Arc::new(element_parsers::detect_lazy_continuation_lines(
926                self.content,
927                &self.lines,
928                &self.line_offsets,
929            ))
930        }))
931    }
932
933    /// Check if document has mixed ordered/unordered list nesting.
934    /// Result is cached after first computation (document-level invariant).
935    /// This is used by MD007 for smart style auto-detection.
936    pub fn has_mixed_list_nesting(&self) -> bool {
937        *self
938            .has_mixed_list_nesting_cache
939            .get_or_init(|| self.compute_mixed_list_nesting())
940    }
941
942    /// Internal computation for mixed list nesting (only called once per LintContext).
943    fn compute_mixed_list_nesting(&self) -> bool {
944        // Track parent list items by their marker position and type
945        // Using marker_column instead of indent because it works correctly
946        // for blockquoted content where indent doesn't account for the prefix
947        // Stack stores: (marker_column, is_ordered)
948        let mut stack: Vec<(usize, bool)> = Vec::new();
949        let mut last_was_blank = false;
950
951        for line_info in &self.lines {
952            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
953            if line_info.in_code_block
954                || line_info.in_front_matter
955                || line_info.in_mkdocstrings
956                || line_info.in_html_comment
957                || line_info.in_mdx_comment
958                || line_info.in_esm_block
959            {
960                continue;
961            }
962
963            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
964            if line_info.is_blank {
965                last_was_blank = true;
966                continue;
967            }
968
969            if let Some(list_item) = &line_info.list_item {
970                // Normalize column 1 to column 0 (consistent with MD007 check function)
971                let current_pos = if list_item.marker_column == 1 {
972                    0
973                } else {
974                    list_item.marker_column
975                };
976
977                // If there was a blank line and this item is at root level, reset stack
978                if last_was_blank && current_pos == 0 {
979                    stack.clear();
980                }
981                last_was_blank = false;
982
983                // Pop items at same or greater position (they're siblings or deeper, not parents)
984                while let Some(&(pos, _)) = stack.last() {
985                    if pos >= current_pos {
986                        stack.pop();
987                    } else {
988                        break;
989                    }
990                }
991
992                // Check if immediate parent has different type - this is mixed nesting
993                if let Some(&(_, parent_is_ordered)) = stack.last()
994                    && parent_is_ordered != list_item.is_ordered
995                {
996                    return true; // Found mixed nesting - early exit
997                }
998
999                stack.push((current_pos, list_item.is_ordered));
1000            } else {
1001                // Non-list line (but not blank) - could be paragraph or other content
1002                last_was_blank = false;
1003            }
1004        }
1005
1006        false
1007    }
1008
1009    /// Map a byte offset to (line, column)
1010    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
1011        match self.line_offsets.binary_search(&offset) {
1012            Ok(line) => (line + 1, 1),
1013            Err(line) => {
1014                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
1015                (line, offset - line_start + 1)
1016            }
1017        }
1018    }
1019
1020    /// Check if a position is within a code block or code span. O(log n).
1021    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
1022        // Check code blocks first (already uses binary search internally)
1023        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
1024            return true;
1025        }
1026
1027        // Check inline code spans via binary search
1028        self.is_byte_offset_in_code_span(pos)
1029    }
1030
1031    /// Get line information by line number (1-indexed)
1032    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
1033        if line_num > 0 {
1034            self.lines.get(line_num - 1)
1035        } else {
1036            None
1037        }
1038    }
1039
1040    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
1041    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
1042        let normalized_id = ref_id.to_lowercase();
1043        self.reference_defs_map
1044            .get(&normalized_id)
1045            .map(|&idx| self.reference_defs[idx].url.as_str())
1046    }
1047
1048    /// Check if a line is part of a list block
1049    pub fn is_in_list_block(&self, line_num: usize) -> bool {
1050        self.list_blocks
1051            .iter()
1052            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
1053    }
1054
1055    /// Check if a line is within an HTML block
1056    pub fn is_in_html_block(&self, line_num: usize) -> bool {
1057        if line_num == 0 || line_num > self.lines.len() {
1058            return false;
1059        }
1060        self.lines[line_num - 1].in_html_block
1061    }
1062
1063    /// Check if a 1-indexed line number is inside a GFM table block.
1064    ///
1065    /// Returns `true` for the header line, delimiter line, and all body rows.
1066    /// `TableBlock` spans are stored 0-indexed; this helper accepts the
1067    /// 1-indexed line numbers used elsewhere in the rule API.
1068    pub fn is_in_table_block(&self, line_num: usize) -> bool {
1069        if line_num == 0 {
1070            return false;
1071        }
1072        let line_idx = line_num - 1;
1073        self.table_blocks
1074            .iter()
1075            .any(|block| line_idx >= block.start_line && line_idx <= block.end_line)
1076    }
1077
1078    /// Check if a line and column is within a code span
1079    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
1080        if line_num == 0 || line_num > self.lines.len() {
1081            return false;
1082        }
1083
1084        // Use the code spans cache to check
1085        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
1086        // Convert col to 0-indexed for comparison
1087        let col_0indexed = if col > 0 { col - 1 } else { 0 };
1088        let code_spans = self.code_spans();
1089        code_spans.iter().any(|span| {
1090            // Check if line is within the span's line range
1091            if line_num < span.line || line_num > span.end_line {
1092                return false;
1093            }
1094
1095            if span.line == span.end_line {
1096                // Single-line span: check column bounds
1097                col_0indexed >= span.start_col && col_0indexed < span.end_col
1098            } else if line_num == span.line {
1099                // First line of multi-line span: anything after start_col is in span
1100                col_0indexed >= span.start_col
1101            } else if line_num == span.end_line {
1102                // Last line of multi-line span: anything before end_col is in span
1103                col_0indexed < span.end_col
1104            } else {
1105                // Middle line of multi-line span: entire line is in span
1106                true
1107            }
1108        })
1109    }
1110
1111    /// Check if a byte offset is within a code span. O(log n).
1112    #[inline]
1113    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1114        let code_spans = self.code_spans();
1115        let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
1116        idx > 0 && byte_offset < code_spans[idx - 1].byte_end
1117    }
1118
1119    /// Check if a byte position is within a reference definition. O(log n).
1120    #[inline]
1121    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1122        let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
1123        idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
1124    }
1125
1126    /// Check if a byte position is within an HTML comment. O(log n).
1127    #[inline]
1128    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1129        let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1130        idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1131    }
1132
1133    /// Check if a byte position is within an HTML tag (including multiline tags).
1134    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines. O(log n).
1135    #[inline]
1136    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1137        let tags = self.html_tags();
1138        let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1139        idx > 0 && byte_pos < tags[idx - 1].byte_end
1140    }
1141
1142    /// Check if a byte position is within a Jinja template ({{ }} or {% %}). O(log n).
1143    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1144        Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1145    }
1146
1147    /// Check if a byte position is within a JSX expression (MDX: {expression}). O(log n).
1148    #[inline]
1149    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1150        Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1151    }
1152
1153    /// Check if a byte position is within an MDX comment ({/* ... */}). O(log n).
1154    #[inline]
1155    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1156        Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1157    }
1158
1159    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`).
1160    /// Active for Pandoc-compatible flavors. O(log n).
1161    #[inline]
1162    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1163        let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1164        idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1165    }
1166
1167    /// Pre-computed Pandoc/Quarto citation ranges.
1168    #[inline]
1169    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1170        &self.citation_ranges
1171    }
1172
1173    /// Check if a byte position is within a Pandoc/Quarto div block (`::: ... :::`).
1174    /// Active for Pandoc-compatible flavors. O(log n) via binary search over sorted ranges.
1175    #[inline]
1176    pub fn is_in_div_block(&self, byte_pos: usize) -> bool {
1177        let idx = self.pandoc_div_ranges.partition_point(|r| r.start <= byte_pos);
1178        idx > 0 && byte_pos < self.pandoc_div_ranges[idx - 1].end
1179    }
1180
1181    /// Check if a byte position is within a Pandoc inline footnote (`^[note text]`).
1182    /// Active for Pandoc-compatible flavors. O(log n).
1183    #[inline]
1184    pub fn is_in_inline_footnote(&self, byte_pos: usize) -> bool {
1185        let idx = self.inline_footnote_ranges.partition_point(|r| r.start <= byte_pos);
1186        idx > 0 && byte_pos < self.inline_footnote_ranges[idx - 1].end
1187    }
1188
1189    /// Check if a byte position is within a Pandoc example-list marker (`(@)` /
1190    /// `(@label)` at line start). Active for Pandoc-compatible flavors. O(log n).
1191    #[inline]
1192    pub fn is_in_example_list_marker(&self, byte_pos: usize) -> bool {
1193        let idx = self.example_list_marker_ranges.partition_point(|r| r.start <= byte_pos);
1194        idx > 0 && byte_pos < self.example_list_marker_ranges[idx - 1].end
1195    }
1196
1197    /// Check if a byte position is within a Pandoc example reference (`(@label)`
1198    /// inline). Active for Pandoc-compatible flavors. O(log n).
1199    #[inline]
1200    pub fn is_in_example_reference(&self, byte_pos: usize) -> bool {
1201        let idx = self.example_reference_ranges.partition_point(|r| r.start <= byte_pos);
1202        idx > 0 && byte_pos < self.example_reference_ranges[idx - 1].end
1203    }
1204
1205    /// Check if a byte position is within a Pandoc subscript (`~x~`) or
1206    /// superscript (`^x^`) span. Active for Pandoc-compatible flavors. O(log n).
1207    #[inline]
1208    pub fn is_in_subscript_or_superscript(&self, byte_pos: usize) -> bool {
1209        let idx = self.sub_super_ranges.partition_point(|r| r.start <= byte_pos);
1210        idx > 0 && byte_pos < self.sub_super_ranges[idx - 1].end
1211    }
1212
1213    /// Check if a byte position is within a Pandoc inline-code attribute block
1214    /// (`{.lang}` immediately following `` `code` ``). Active for Pandoc-compatible
1215    /// flavors. O(log n).
1216    #[inline]
1217    pub fn is_in_inline_code_attr(&self, byte_pos: usize) -> bool {
1218        let idx = self.inline_code_attr_ranges.partition_point(|r| r.start <= byte_pos);
1219        idx > 0 && byte_pos < self.inline_code_attr_ranges[idx - 1].end
1220    }
1221
1222    /// Check if a byte position is within a Pandoc bracketed span (`[text]{attrs}`).
1223    /// Active for Pandoc-compatible flavors. O(log n).
1224    #[inline]
1225    pub fn is_in_bracketed_span(&self, byte_pos: usize) -> bool {
1226        let idx = self.bracketed_span_ranges.partition_point(|r| r.start <= byte_pos);
1227        idx > 0 && byte_pos < self.bracketed_span_ranges[idx - 1].end
1228    }
1229
1230    /// Returns true if `byte_pos` falls inside a Pandoc line block (`| text`).
1231    /// Active for Pandoc-compatible flavors. O(log n).
1232    #[inline]
1233    pub fn is_in_line_block(&self, byte_pos: usize) -> bool {
1234        let idx = self.line_block_ranges.partition_point(|r| r.start <= byte_pos);
1235        idx > 0 && byte_pos < self.line_block_ranges[idx - 1].end
1236    }
1237
1238    /// Returns true if `byte_pos` falls inside a Pandoc pipe-table caption
1239    /// (`: caption` adjacent to a pipe table). Active for Pandoc-compatible
1240    /// flavors. O(log n).
1241    #[inline]
1242    pub fn is_in_pipe_table_caption(&self, byte_pos: usize) -> bool {
1243        let idx = self.pipe_table_caption_ranges.partition_point(|r| r.start <= byte_pos);
1244        idx > 0 && byte_pos < self.pipe_table_caption_ranges[idx - 1].end
1245    }
1246
1247    /// Returns true if `byte_pos` falls inside a Pandoc YAML metadata block.
1248    /// Active for Pandoc-compatible flavors. O(log n).
1249    #[inline]
1250    pub fn is_in_pandoc_metadata(&self, byte_pos: usize) -> bool {
1251        let idx = self.pandoc_metadata_ranges.partition_point(|r| r.start <= byte_pos);
1252        idx > 0 && byte_pos < self.pandoc_metadata_ranges[idx - 1].end
1253    }
1254
1255    /// Returns true if `byte_pos` falls inside a Pandoc grid table.
1256    /// Active for Pandoc-compatible flavors. O(log n).
1257    #[inline]
1258    pub fn is_in_grid_table(&self, byte_pos: usize) -> bool {
1259        let idx = self.grid_table_ranges.partition_point(|r| r.start <= byte_pos);
1260        idx > 0 && byte_pos < self.grid_table_ranges[idx - 1].end
1261    }
1262
1263    /// Returns true if `byte_pos` falls inside a Pandoc multi-line table.
1264    /// Active for Pandoc-compatible flavors. O(log n).
1265    #[inline]
1266    pub fn is_in_multi_line_table(&self, byte_pos: usize) -> bool {
1267        let idx = self.multi_line_table_ranges.partition_point(|r| r.start <= byte_pos);
1268        idx > 0 && byte_pos < self.multi_line_table_ranges[idx - 1].end
1269    }
1270
1271    /// Returns true if `link_text`, after Pandoc slugification, matches a heading
1272    /// in the document. Returns false for non-Pandoc-compatible flavors because
1273    /// the `pandoc_header_slugs` set is empty when the pre-pass detector is gated
1274    /// off. Use this when the caller has raw bracketed text (`[Section name]`).
1275    pub fn matches_implicit_header_reference(&self, link_text: &str) -> bool {
1276        let slug = crate::utils::pandoc::pandoc_header_slug(link_text);
1277        self.pandoc_header_slugs.contains(&slug)
1278    }
1279
1280    /// Returns true if `slug` (already in Pandoc-slug form) matches a heading
1281    /// in the document. Returns false for non-Pandoc-compatible flavors because
1282    /// the `pandoc_header_slugs` set is empty when the pre-pass detector is gated
1283    /// off. Use this when the caller already has a slug (e.g. the fragment of a
1284    /// URL after `#`). O(1).
1285    #[inline]
1286    pub fn has_pandoc_slug(&self, slug: &str) -> bool {
1287        self.pandoc_header_slugs.contains(slug)
1288    }
1289
1290    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}}). O(log n).
1291    #[inline]
1292    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1293        Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1294    }
1295
1296    /// Pre-computed Hugo/Quarto shortcode ranges.
1297    #[inline]
1298    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1299        &self.shortcode_ranges
1300    }
1301
1302    /// Check if a byte position is within a link reference definition title. O(log n).
1303    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1304        Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1305    }
1306
1307    /// Check if content has any instances of a specific character (fast)
1308    pub fn has_char(&self, ch: char) -> bool {
1309        match ch {
1310            '#' => self.char_frequency.hash_count > 0,
1311            '*' => self.char_frequency.asterisk_count > 0,
1312            '_' => self.char_frequency.underscore_count > 0,
1313            '-' => self.char_frequency.hyphen_count > 0,
1314            '+' => self.char_frequency.plus_count > 0,
1315            '>' => self.char_frequency.gt_count > 0,
1316            '|' => self.char_frequency.pipe_count > 0,
1317            '[' => self.char_frequency.bracket_count > 0,
1318            '`' => self.char_frequency.backtick_count > 0,
1319            '<' => self.char_frequency.lt_count > 0,
1320            '!' => self.char_frequency.exclamation_count > 0,
1321            '\n' => self.char_frequency.newline_count > 0,
1322            _ => self.content.contains(ch), // Fallback for other characters
1323        }
1324    }
1325
1326    /// Get count of a specific character (fast)
1327    pub fn char_count(&self, ch: char) -> usize {
1328        match ch {
1329            '#' => self.char_frequency.hash_count,
1330            '*' => self.char_frequency.asterisk_count,
1331            '_' => self.char_frequency.underscore_count,
1332            '-' => self.char_frequency.hyphen_count,
1333            '+' => self.char_frequency.plus_count,
1334            '>' => self.char_frequency.gt_count,
1335            '|' => self.char_frequency.pipe_count,
1336            '[' => self.char_frequency.bracket_count,
1337            '`' => self.char_frequency.backtick_count,
1338            '<' => self.char_frequency.lt_count,
1339            '!' => self.char_frequency.exclamation_count,
1340            '\n' => self.char_frequency.newline_count,
1341            _ => self.content.matches(ch).count(), // Fallback for other characters
1342        }
1343    }
1344
1345    /// Check if content likely contains headings (fast)
1346    pub fn likely_has_headings(&self) -> bool {
1347        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') // Setext H1 underlines use '='
1348    }
1349
1350    /// Check if content likely contains lists (fast)
1351    pub fn likely_has_lists(&self) -> bool {
1352        self.char_frequency.asterisk_count > 0
1353            || self.char_frequency.hyphen_count > 0
1354            || self.char_frequency.plus_count > 0
1355    }
1356
1357    /// Check if content likely contains emphasis (fast)
1358    pub fn likely_has_emphasis(&self) -> bool {
1359        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1360    }
1361
1362    /// Check if content likely contains tables (fast)
1363    pub fn likely_has_tables(&self) -> bool {
1364        self.char_frequency.pipe_count > 2
1365    }
1366
1367    /// Check if content likely contains blockquotes (fast)
1368    pub fn likely_has_blockquotes(&self) -> bool {
1369        self.char_frequency.gt_count > 0
1370    }
1371
1372    /// Check if content likely contains code (fast)
1373    pub fn likely_has_code(&self) -> bool {
1374        self.char_frequency.backtick_count > 0
1375    }
1376
1377    /// Check if content likely contains links or images (fast)
1378    pub fn likely_has_links_or_images(&self) -> bool {
1379        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1380    }
1381
1382    /// Check if content likely contains HTML (fast)
1383    pub fn likely_has_html(&self) -> bool {
1384        self.char_frequency.lt_count > 0
1385    }
1386
1387    /// Get the blockquote prefix for inserting a blank line at the given line index.
1388    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1389    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1390    /// Returns an empty string if the line is not inside a blockquote.
1391    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1392        if let Some(line_info) = self.lines.get(line_idx)
1393            && let Some(ref bq) = line_info.blockquote
1394        {
1395            bq.prefix.trim_end().to_string()
1396        } else {
1397            String::new()
1398        }
1399    }
1400
1401    /// Find the line index for a given byte offset using binary search.
1402    /// Returns (line_index, line_number, column) where:
1403    /// - line_index is the 0-based index in the lines array
1404    /// - line_number is the 1-based line number
1405    /// - column is the byte offset within that line
1406    #[inline]
1407    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1408        // Binary search to find the line containing this byte offset
1409        let idx = match lines.binary_search_by(|line| {
1410            if byte_offset < line.byte_offset {
1411                std::cmp::Ordering::Greater
1412            } else if byte_offset > line.byte_offset + line.byte_len {
1413                std::cmp::Ordering::Less
1414            } else {
1415                std::cmp::Ordering::Equal
1416            }
1417        }) {
1418            Ok(idx) => idx,
1419            Err(idx) => idx.saturating_sub(1),
1420        };
1421
1422        let line = &lines[idx];
1423        let line_num = idx + 1;
1424        let col = byte_offset.saturating_sub(line.byte_offset);
1425
1426        (idx, line_num, col)
1427    }
1428
1429    /// Check if a byte offset is within a code span using binary search
1430    #[inline]
1431    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1432        // Since spans are sorted by byte_offset, use partition_point for binary search
1433        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1434
1435        // Check the span that starts at or before our offset
1436        if idx > 0 {
1437            let span = &code_spans[idx - 1];
1438            if offset >= span.byte_offset && offset < span.byte_end {
1439                return true;
1440            }
1441        }
1442
1443        false
1444    }
1445
1446    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
1447    ///
1448    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
1449    /// This is the standard iterator for rules that need to process headings.
1450    ///
1451    /// # Examples
1452    ///
1453    /// ```
1454    /// use rumdl_lib::lint_context::LintContext;
1455    /// use rumdl_lib::config::MarkdownFlavor;
1456    ///
1457    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
1458    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1459    ///
1460    /// for heading in ctx.valid_headings() {
1461    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
1462    /// }
1463    /// // Only prints valid headings, skips `#NoSpace`
1464    /// ```
1465    #[must_use]
1466    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1467        ValidHeadingsIter::new(&self.lines)
1468    }
1469
1470    /// Check if the document contains any valid CommonMark headings
1471    ///
1472    /// Returns `true` if there is at least one heading with proper space after `#`.
1473    #[must_use]
1474    pub fn has_valid_headings(&self) -> bool {
1475        self.lines
1476            .iter()
1477            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1478    }
1479}
1480
1481/// Detect footnote definitions and mark their continuation lines.
1482///
1483/// Uses pulldown-cmark to find footnote definition ranges and fenced code
1484/// blocks within them, then:
1485/// 1. Sets `in_footnote_definition = true` on all lines within
1486/// 2. Clears `in_code_block = false` on continuation lines that were
1487///    misidentified as indented code blocks (but preserves real fenced
1488///    code blocks within footnotes)
1489fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1490    use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1491
1492    let options = crate::utils::rumdl_parser_options();
1493    let parser = Parser::new_ext(content, options).into_offset_iter();
1494
1495    // Collect footnote ranges and fenced code block ranges within them
1496    let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1497    let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1498    let mut in_footnote = false;
1499
1500    for (event, range) in parser {
1501        match event {
1502            Event::Start(Tag::FootnoteDefinition(_)) => {
1503                in_footnote = true;
1504                footnote_ranges.push((range.start, range.end));
1505            }
1506            Event::End(TagEnd::FootnoteDefinition) => {
1507                in_footnote = false;
1508            }
1509            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1510                fenced_code_ranges.push((range.start, range.end));
1511            }
1512            _ => {}
1513        }
1514    }
1515
1516    let byte_to_line = |byte_offset: usize| -> usize {
1517        line_offsets
1518            .partition_point(|&offset| offset <= byte_offset)
1519            .saturating_sub(1)
1520    };
1521
1522    // Mark footnote definition lines
1523    for &(start, end) in &footnote_ranges {
1524        let start_line = byte_to_line(start);
1525        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1526
1527        for line in &mut lines[start_line..end_line] {
1528            line.in_footnote_definition = true;
1529            line.in_code_block = false;
1530        }
1531    }
1532
1533    // Restore in_code_block for fenced code blocks within footnotes
1534    for &(start, end) in &fenced_code_ranges {
1535        let start_line = byte_to_line(start);
1536        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1537
1538        for line in &mut lines[start_line..end_line] {
1539            line.in_code_block = true;
1540        }
1541    }
1542}
rumdl_lib/lint_context/mod.rs

rumdl_lib/lint_context/
mod.rs