Skip to main content

rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20/// Macro for profiling sections - only active in non-WASM builds
21#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23    ($name:expr, $profile:expr, $code:expr) => {{
24        let start = std::time::Instant::now();
25        let result = $code;
26        if $profile {
27            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28        }
29        result
30    }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38/// Grouped byte ranges for skip context detection
39/// Used to reduce parameter count in internal functions
40pub(super) struct SkipByteRanges<'a> {
41    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
50pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52/// Type alias for byte ranges used in JSX expression and MDX comment detection
53pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56    pub content: &'a str,
57    content_lines: Vec<&'a str>, // Pre-split lines from content (avoids repeated allocations)
58    pub line_offsets: Vec<usize>,
59    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
60    pub code_block_details: Vec<CodeBlockDetail>, // Per-block metadata (fenced/indented, info string)
61    pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, // Pre-computed strong emphasis spans
62    pub line_to_list: crate::utils::code_block_utils::LineToListMap, // Ordered list membership by line
63    pub list_start_values: crate::utils::code_block_utils::ListStartValues, // Start values per list ID
64    pub lines: Vec<LineInfo>,             // Pre-computed line information
65    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
66    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
67    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
68    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
69    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
70    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
71    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
72    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
73    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
74    pub char_frequency: CharFrequency,    // Character frequency analysis
75    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
76    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
77    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
78    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
79    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
80    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
81    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
82    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
83    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
84    pub flavor: MarkdownFlavor,           // Markdown flavor being used
85    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
86    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
87    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
88    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
89    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
90    link_title_ranges: Vec<(usize, usize)>, // Pre-computed sorted link title byte ranges
91    code_span_byte_ranges: Vec<(usize, usize)>, // Pre-computed code span byte ranges from pulldown-cmark
92    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
93    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
94    lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, // Lazy-loaded lazy continuation lines
95}
96
97impl<'a> LintContext<'a> {
98    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
99        #[cfg(not(target_arch = "wasm32"))]
100        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
101
102        let line_offsets = profile_section!("Line offsets", profile, {
103            let mut offsets = vec![0];
104            for (i, c) in content.char_indices() {
105                if c == '\n' {
106                    offsets.push(i + 1);
107                }
108            }
109            offsets
110        });
111
112        // Compute content_lines once for all functions that need it
113        let content_lines: Vec<&str> = content.lines().collect();
114
115        // Detect front matter boundaries once for all functions that need it
116        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
117
118        // Detect code blocks and code spans once and cache them
119        let parse_result = profile_section!(
120            "Code blocks",
121            profile,
122            CodeBlockUtils::detect_code_blocks_and_spans(content)
123        );
124        let mut code_blocks = parse_result.code_blocks;
125        let code_span_ranges = parse_result.code_spans;
126        let code_block_details = parse_result.code_block_details;
127        let strong_spans = parse_result.strong_spans;
128        let line_to_list = parse_result.line_to_list;
129        let list_start_values = parse_result.list_start_values;
130
131        // Pre-compute HTML comment ranges ONCE for all operations
132        let html_comment_ranges = profile_section!(
133            "HTML comment ranges",
134            profile,
135            crate::utils::skip_context::compute_html_comment_ranges(content)
136        );
137
138        // Pre-compute autodoc block ranges (avoids O(n^2) scaling)
139        // Detected for all flavors: `:::` blocks are structurally unique and should
140        // never be reflowed as prose, even without MkDocs flavor.
141        let autodoc_ranges = profile_section!(
142            "Autodoc block ranges",
143            profile,
144            crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
145        );
146
147        // Pre-compute Quarto div block ranges for Quarto flavor
148        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
149            if flavor == MarkdownFlavor::Quarto {
150                crate::utils::quarto_divs::detect_div_block_ranges(content)
151            } else {
152                Vec::new()
153            }
154        });
155
156        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
157        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
158            if flavor == MarkdownFlavor::MkDocs {
159                crate::utils::pymdown_blocks::detect_block_ranges(content)
160            } else {
161                Vec::new()
162            }
163        });
164
165        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
166        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
167        let skip_ranges = SkipByteRanges {
168            html_comment_ranges: &html_comment_ranges,
169            autodoc_ranges: &autodoc_ranges,
170            quarto_div_ranges: &quarto_div_ranges,
171            pymdown_block_ranges: &pymdown_block_ranges,
172        };
173        let (mut lines, emphasis_spans) = profile_section!(
174            "Basic line info",
175            profile,
176            line_computation::compute_basic_line_info(
177                content,
178                &content_lines,
179                &line_offsets,
180                &code_blocks,
181                flavor,
182                &skip_ranges,
183                front_matter_end,
184            )
185        );
186
187        // Detect HTML blocks BEFORE heading detection
188        profile_section!(
189            "HTML blocks",
190            profile,
191            heading_detection::detect_html_blocks(content, &mut lines)
192        );
193
194        // Detect ESM import/export blocks in MDX files BEFORE heading detection
195        profile_section!(
196            "ESM blocks",
197            profile,
198            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
199        );
200
201        // Detect JSX component blocks in MDX files (e.g. <Tabs>...</Tabs>)
202        profile_section!(
203            "JSX block detection",
204            profile,
205            flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
206        );
207
208        // Detect JSX expressions and MDX comments in MDX files
209        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
210            "JSX/MDX detection",
211            profile,
212            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
213        );
214
215        // Detect `<div markdown>`-style HTML blocks (grid cards, etc.) regardless of flavor.
216        // The `markdown` attribute is an explicit, author-supplied signal; recognizing it
217        // in all flavors keeps `rumdl fmt` from mangling Material grid cards when the
218        // MkDocs flavor isn't active.
219        profile_section!(
220            "Markdown-in-HTML blocks",
221            profile,
222            flavor_detection::detect_markdown_html_blocks(&content_lines, &mut lines)
223        );
224
225        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
226        profile_section!(
227            "MkDocs constructs",
228            profile,
229            flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
230        );
231
232        // Detect footnote definitions and correct false code block detection.
233        // With ENABLE_FOOTNOTES, pulldown-cmark correctly parses multi-line
234        // footnotes, but the code block detector may still mark 4-space-indented
235        // footnote continuation lines as indented code blocks.
236        profile_section!(
237            "Footnote definitions",
238            profile,
239            detect_footnote_definitions(content, &mut lines, &line_offsets)
240        );
241
242        // Filter code_blocks to remove false positives from footnote continuation content.
243        // Same pattern as MkDocs/JSX corrections below.
244        {
245            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
246            for &(start, end) in &code_blocks {
247                let start_line = line_offsets
248                    .partition_point(|&offset| offset <= start)
249                    .saturating_sub(1);
250                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
251
252                let mut sub_start: Option<usize> = None;
253                for (i, &offset) in line_offsets[start_line..end_line]
254                    .iter()
255                    .enumerate()
256                    .map(|(j, o)| (j + start_line, o))
257                {
258                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
259                    if is_real_code && sub_start.is_none() {
260                        let byte_start = if i == start_line { start } else { offset };
261                        sub_start = Some(byte_start);
262                    } else if !is_real_code && sub_start.is_some() {
263                        new_code_blocks.push((sub_start.unwrap(), offset));
264                        sub_start = None;
265                    }
266                }
267                if let Some(s) = sub_start {
268                    new_code_blocks.push((s, end));
269                }
270            }
271            code_blocks = new_code_blocks;
272        }
273
274        // Filter code_blocks to remove false positives from MkDocs admonition/tab content
275        // and `<div markdown>` HTML blocks (grid cards).
276        // pulldown-cmark treats 4-space-indented content as indented code blocks, but inside
277        // these containers this is regular markdown content. detect_mkdocs_line_info and
278        // detect_markdown_html_blocks already corrected LineInfo.in_code_block for these lines,
279        // but the code_blocks byte ranges are still stale. We split ranges rather than using
280        // all-or-nothing removal, so fenced code blocks within the containers are preserved.
281        let has_markdown_html = lines.iter().any(|l| l.in_mkdocs_html_markdown);
282        if flavor == MarkdownFlavor::MkDocs || has_markdown_html {
283            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
284            for &(start, end) in &code_blocks {
285                let start_line = line_offsets
286                    .partition_point(|&offset| offset <= start)
287                    .saturating_sub(1);
288                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
289
290                // Walk lines in this range, collecting sub-ranges where in_code_block is true
291                let mut sub_start: Option<usize> = None;
292                for (i, &offset) in line_offsets[start_line..end_line]
293                    .iter()
294                    .enumerate()
295                    .map(|(j, o)| (j + start_line, o))
296                {
297                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
298                    if is_real_code && sub_start.is_none() {
299                        let byte_start = if i == start_line { start } else { offset };
300                        sub_start = Some(byte_start);
301                    } else if !is_real_code && sub_start.is_some() {
302                        new_code_blocks.push((sub_start.unwrap(), offset));
303                        sub_start = None;
304                    }
305                }
306                if let Some(s) = sub_start {
307                    new_code_blocks.push((s, end));
308                }
309            }
310            code_blocks = new_code_blocks;
311        }
312
313        // Filter code_blocks for MDX JSX blocks (same pattern as MkDocs above).
314        // detect_jsx_blocks already corrected LineInfo.in_code_block for indented content
315        // inside JSX component blocks, but code_blocks byte ranges need updating too.
316        if flavor.supports_jsx() {
317            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
318            for &(start, end) in &code_blocks {
319                let start_line = line_offsets
320                    .partition_point(|&offset| offset <= start)
321                    .saturating_sub(1);
322                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
323
324                let mut sub_start: Option<usize> = None;
325                for (i, &offset) in line_offsets[start_line..end_line]
326                    .iter()
327                    .enumerate()
328                    .map(|(j, o)| (j + start_line, o))
329                {
330                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
331                    if is_real_code && sub_start.is_none() {
332                        let byte_start = if i == start_line { start } else { offset };
333                        sub_start = Some(byte_start);
334                    } else if !is_real_code && sub_start.is_some() {
335                        new_code_blocks.push((sub_start.unwrap(), offset));
336                        sub_start = None;
337                    }
338                }
339                if let Some(s) = sub_start {
340                    new_code_blocks.push((s, end));
341                }
342            }
343            code_blocks = new_code_blocks;
344        }
345
346        // Detect kramdown constructs (extension blocks, IALs, ALDs) in kramdown flavor
347        profile_section!(
348            "Kramdown constructs",
349            profile,
350            flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
351        );
352
353        // Layer 1: Sanitize content-derived fields inside kramdown extension blocks
354        // so downstream heading detection and collection builders never see them.
355        // This must run BEFORE detect_headings_and_blockquotes to prevent headings
356        // from being populated inside extension blocks.
357        for line in &mut lines {
358            if line.in_kramdown_extension_block {
359                line.list_item = None;
360                line.is_horizontal_rule = false;
361                line.blockquote = None;
362                line.is_kramdown_block_ial = false;
363            }
364        }
365
366        // Detect Obsidian comments (%%...%%) in Obsidian flavor
367        let obsidian_comment_ranges = profile_section!(
368            "Obsidian comments",
369            profile,
370            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
371        );
372
373        // Run pulldown-cmark parse for links, images, and link byte ranges in a single pass.
374        // Link byte ranges are needed for heading detection; links/images are finalized later
375        // after code_spans are available.
376        let pulldown_result = profile_section!(
377            "Links, images & link ranges",
378            profile,
379            link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
380        );
381
382        // Now detect headings and blockquotes
383        profile_section!(
384            "Headings & blockquotes",
385            profile,
386            heading_detection::detect_headings_and_blockquotes(
387                &content_lines,
388                &mut lines,
389                flavor,
390                &html_comment_ranges,
391                &pulldown_result.link_byte_ranges,
392                front_matter_end,
393            )
394        );
395
396        // Clear headings that were detected inside kramdown extension blocks
397        for line in &mut lines {
398            if line.in_kramdown_extension_block {
399                line.heading = None;
400            }
401        }
402
403        // Parse code spans early so we can exclude them from link/image parsing
404        let mut code_spans = profile_section!(
405            "Code spans",
406            profile,
407            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
408        );
409
410        // Supplement code spans for MkDocs container content that pulldown-cmark missed.
411        // pulldown-cmark treats 4-space-indented MkDocs content as indented code blocks,
412        // so backtick code spans within admonitions/tabs/markdown HTML are invisible to it.
413        if flavor == MarkdownFlavor::MkDocs {
414            let extra = profile_section!(
415                "MkDocs code spans",
416                profile,
417                element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
418            );
419            if !extra.is_empty() {
420                code_spans.extend(extra);
421                code_spans.sort_by_key(|span| span.byte_offset);
422            }
423        }
424
425        // Supplement code spans for MDX JSX component body content that pulldown-cmark missed.
426        // pulldown-cmark treats JSX component opening tags (e.g. `<ParamField>`) as HTML block
427        // starters, so backtick code spans within component bodies are invisible to the initial
428        // parse.
429        if flavor == MarkdownFlavor::MDX {
430            let extra = profile_section!(
431                "MDX JSX code spans",
432                profile,
433                element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
434            );
435            if !extra.is_empty() {
436                code_spans.extend(extra);
437                code_spans.sort_by_key(|span| span.byte_offset);
438            }
439        }
440
441        // Mark lines that are continuations of multi-line code spans
442        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
443        for span in &code_spans {
444            if span.end_line > span.line {
445                // Mark lines after the first line as continuations
446                for line_num in (span.line + 1)..=span.end_line {
447                    if let Some(line_info) = lines.get_mut(line_num - 1) {
448                        line_info.in_code_span_continuation = true;
449                    }
450                }
451            }
452        }
453
454        // Finalize links and images: filter by code_spans and run regex fallbacks
455        let (links, images, broken_links, footnote_refs) = profile_section!(
456            "Links & images finalize",
457            profile,
458            link_parser::finalize_links_and_images(
459                content,
460                &lines,
461                &code_blocks,
462                &code_spans,
463                flavor,
464                &html_comment_ranges,
465                pulldown_result
466            )
467        );
468
469        let reference_defs = profile_section!(
470            "Reference defs",
471            profile,
472            link_parser::parse_reference_defs(content, &lines)
473        );
474
475        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
476
477        // Compute character frequency for fast content analysis
478        let char_frequency = profile_section!(
479            "Char frequency",
480            profile,
481            line_computation::compute_char_frequency(content)
482        );
483
484        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
485        let table_blocks = profile_section!(
486            "Table blocks",
487            profile,
488            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
489                content,
490                &code_blocks,
491                &code_spans,
492                &html_comment_ranges,
493            )
494        );
495
496        // Layer 2: Filter pre-computed collections to exclude items inside kramdown extension blocks.
497        // Rules that iterate these collections automatically skip kramdown content.
498        let links = links
499            .into_iter()
500            .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
501            .collect::<Vec<_>>();
502        let images = images
503            .into_iter()
504            .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
505            .collect::<Vec<_>>();
506        let broken_links = broken_links
507            .into_iter()
508            .filter(|bl| {
509                // BrokenLinkInfo has span but no line field; find line from byte offset
510                let line_idx = line_offsets
511                    .partition_point(|&offset| offset <= bl.span.start)
512                    .saturating_sub(1);
513                !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
514            })
515            .collect::<Vec<_>>();
516        let footnote_refs = footnote_refs
517            .into_iter()
518            .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
519            .collect::<Vec<_>>();
520        let reference_defs = reference_defs
521            .into_iter()
522            .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
523            .collect::<Vec<_>>();
524        let list_blocks = list_blocks
525            .into_iter()
526            .filter(|block| {
527                !lines
528                    .get(block.start_line - 1)
529                    .is_some_and(|l| l.in_kramdown_extension_block)
530            })
531            .collect::<Vec<_>>();
532        let table_blocks = table_blocks
533            .into_iter()
534            .filter(|block| {
535                // TableBlock.start_line is 0-indexed
536                !lines
537                    .get(block.start_line)
538                    .is_some_and(|l| l.in_kramdown_extension_block)
539            })
540            .collect::<Vec<_>>();
541        let emphasis_spans = emphasis_spans
542            .into_iter()
543            .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
544            .collect::<Vec<_>>();
545
546        // Rebuild reference_defs_map after filtering
547        let reference_defs_map: HashMap<String, usize> = reference_defs
548            .iter()
549            .enumerate()
550            .map(|(idx, def)| (def.id.to_lowercase(), idx))
551            .collect();
552
553        // Pre-compute sorted link title byte ranges for binary search
554        let link_title_ranges: Vec<(usize, usize)> = reference_defs
555            .iter()
556            .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
557                (Some(start), Some(end)) => Some((start, end)),
558                _ => None,
559            })
560            .collect();
561
562        // Reuse already-computed line_offsets and code_blocks instead of re-detecting
563        let line_index = profile_section!(
564            "Line index",
565            profile,
566            crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
567                content,
568                line_offsets.clone(),
569                &code_blocks,
570            )
571        );
572
573        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
574        let jinja_ranges = profile_section!(
575            "Jinja ranges",
576            profile,
577            crate::utils::jinja_utils::find_jinja_ranges(content)
578        );
579
580        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
581        let citation_ranges = profile_section!("Citation ranges", profile, {
582            if flavor == MarkdownFlavor::Quarto {
583                crate::utils::quarto_divs::find_citation_ranges(content)
584            } else {
585                Vec::new()
586            }
587        });
588
589        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
590        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
591            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
592            let mut ranges = Vec::new();
593            for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
594                ranges.push((mat.start(), mat.end()));
595            }
596            ranges
597        });
598
599        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
600
601        Self {
602            content,
603            content_lines,
604            line_offsets,
605            code_blocks,
606            code_block_details,
607            strong_spans,
608            line_to_list,
609            list_start_values,
610            lines,
611            links,
612            images,
613            broken_links,
614            footnote_refs,
615            reference_defs,
616            reference_defs_map,
617            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
618            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
619            list_blocks,
620            char_frequency,
621            html_tags_cache: OnceLock::new(),
622            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
623            table_rows_cache: OnceLock::new(),
624            bare_urls_cache: OnceLock::new(),
625            has_mixed_list_nesting_cache: OnceLock::new(),
626            html_comment_ranges,
627            table_blocks,
628            line_index,
629            jinja_ranges,
630            flavor,
631            source_file,
632            jsx_expression_ranges,
633            mdx_comment_ranges,
634            citation_ranges,
635            shortcode_ranges,
636            link_title_ranges,
637            code_span_byte_ranges: code_span_ranges,
638            inline_config,
639            obsidian_comment_ranges,
640            lazy_cont_lines_cache: OnceLock::new(),
641        }
642    }
643
644    /// Binary search for whether `pos` falls inside any range in a sorted, non-overlapping
645    /// slice of `(start, end)` byte ranges. O(log n) instead of O(n).
646    #[inline]
647    fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
648        // Find the rightmost range whose start <= pos
649        let idx = ranges.partition_point(|&(start, _)| start <= pos);
650        // If idx == 0, no range starts at or before pos
651        idx > 0 && pos < ranges[idx - 1].1
652    }
653
654    /// Check if a byte position is within a code span. O(log n).
655    pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
656        Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
657    }
658
659    /// Check if `pos` is inside any link byte range. O(log n).
660    pub fn is_in_link(&self, pos: usize) -> bool {
661        let idx = self.links.partition_point(|link| link.byte_offset <= pos);
662        if idx > 0 && pos < self.links[idx - 1].byte_end {
663            return true;
664        }
665        let idx = self.images.partition_point(|img| img.byte_offset <= pos);
666        if idx > 0 && pos < self.images[idx - 1].byte_end {
667            return true;
668        }
669        self.is_in_reference_def(pos)
670    }
671
672    /// Get parsed inline configuration state.
673    pub fn inline_config(&self) -> &InlineConfig {
674        &self.inline_config
675    }
676
677    /// Get pre-split content lines, avoiding repeated `content.lines().collect()` allocations.
678    ///
679    /// Lines are 0-indexed (line 0 corresponds to line number 1 in the document).
680    pub fn raw_lines(&self) -> &[&'a str] {
681        &self.content_lines
682    }
683
684    /// Check if a rule is disabled at a specific line number (1-indexed)
685    ///
686    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
687    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
688    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
689        self.inline_config.is_rule_disabled(rule_name, line_number)
690    }
691
692    /// Get code spans - computed lazily on first access
693    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
694        Arc::clone(
695            self.code_spans_cache
696                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
697        )
698    }
699
700    /// Get math spans - computed lazily on first access
701    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
702        Arc::clone(
703            self.math_spans_cache
704                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
705        )
706    }
707
708    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
709    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
710        let math_spans = self.math_spans();
711        // Binary search: find the last span whose byte_offset <= byte_pos
712        let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
713        idx > 0 && byte_pos < math_spans[idx - 1].byte_end
714    }
715
716    /// Get HTML comment ranges - pre-computed during LintContext construction
717    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
718        &self.html_comment_ranges
719    }
720
721    /// Check if a byte position is inside an Obsidian comment
722    ///
723    /// Returns false for non-Obsidian flavors.
724    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
725        Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
726    }
727
728    /// Check if a line/column position is inside an Obsidian comment
729    ///
730    /// Line number is 1-indexed, column is 1-indexed.
731    /// Returns false for non-Obsidian flavors.
732    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
733        if self.obsidian_comment_ranges.is_empty() {
734            return false;
735        }
736
737        // Convert line/column (1-indexed, char-based) to byte position
738        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
739        self.is_in_obsidian_comment(byte_pos)
740    }
741
742    /// Get HTML tags - computed lazily on first access
743    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
744        Arc::clone(self.html_tags_cache.get_or_init(|| {
745            let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
746            // Filter out HTML tags inside kramdown extension blocks
747            Arc::new(
748                tags.into_iter()
749                    .filter(|tag| {
750                        !self
751                            .lines
752                            .get(tag.line - 1)
753                            .is_some_and(|l| l.in_kramdown_extension_block)
754                    })
755                    .collect(),
756            )
757        }))
758    }
759
760    /// Get emphasis spans - pre-computed during construction
761    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
762        Arc::clone(
763            self.emphasis_spans_cache
764                .get()
765                .expect("emphasis_spans_cache initialized during construction"),
766        )
767    }
768
769    /// Get table rows - computed lazily on first access
770    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
771        Arc::clone(
772            self.table_rows_cache
773                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
774        )
775    }
776
777    /// Get bare URLs - computed lazily on first access
778    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
779        Arc::clone(self.bare_urls_cache.get_or_init(|| {
780            Arc::new(element_parsers::parse_bare_urls(
781                self.content,
782                &self.lines,
783                &self.code_blocks,
784            ))
785        }))
786    }
787
788    /// Get lazy continuation lines - computed lazily on first access
789    pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
790        Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
791            Arc::new(element_parsers::detect_lazy_continuation_lines(
792                self.content,
793                &self.lines,
794                &self.line_offsets,
795            ))
796        }))
797    }
798
799    /// Check if document has mixed ordered/unordered list nesting.
800    /// Result is cached after first computation (document-level invariant).
801    /// This is used by MD007 for smart style auto-detection.
802    pub fn has_mixed_list_nesting(&self) -> bool {
803        *self
804            .has_mixed_list_nesting_cache
805            .get_or_init(|| self.compute_mixed_list_nesting())
806    }
807
808    /// Internal computation for mixed list nesting (only called once per LintContext).
809    fn compute_mixed_list_nesting(&self) -> bool {
810        // Track parent list items by their marker position and type
811        // Using marker_column instead of indent because it works correctly
812        // for blockquoted content where indent doesn't account for the prefix
813        // Stack stores: (marker_column, is_ordered)
814        let mut stack: Vec<(usize, bool)> = Vec::new();
815        let mut last_was_blank = false;
816
817        for line_info in &self.lines {
818            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
819            if line_info.in_code_block
820                || line_info.in_front_matter
821                || line_info.in_mkdocstrings
822                || line_info.in_html_comment
823                || line_info.in_mdx_comment
824                || line_info.in_esm_block
825            {
826                continue;
827            }
828
829            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
830            if line_info.is_blank {
831                last_was_blank = true;
832                continue;
833            }
834
835            if let Some(list_item) = &line_info.list_item {
836                // Normalize column 1 to column 0 (consistent with MD007 check function)
837                let current_pos = if list_item.marker_column == 1 {
838                    0
839                } else {
840                    list_item.marker_column
841                };
842
843                // If there was a blank line and this item is at root level, reset stack
844                if last_was_blank && current_pos == 0 {
845                    stack.clear();
846                }
847                last_was_blank = false;
848
849                // Pop items at same or greater position (they're siblings or deeper, not parents)
850                while let Some(&(pos, _)) = stack.last() {
851                    if pos >= current_pos {
852                        stack.pop();
853                    } else {
854                        break;
855                    }
856                }
857
858                // Check if immediate parent has different type - this is mixed nesting
859                if let Some(&(_, parent_is_ordered)) = stack.last()
860                    && parent_is_ordered != list_item.is_ordered
861                {
862                    return true; // Found mixed nesting - early exit
863                }
864
865                stack.push((current_pos, list_item.is_ordered));
866            } else {
867                // Non-list line (but not blank) - could be paragraph or other content
868                last_was_blank = false;
869            }
870        }
871
872        false
873    }
874
875    /// Map a byte offset to (line, column)
876    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
877        match self.line_offsets.binary_search(&offset) {
878            Ok(line) => (line + 1, 1),
879            Err(line) => {
880                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
881                (line, offset - line_start + 1)
882            }
883        }
884    }
885
886    /// Check if a position is within a code block or code span. O(log n).
887    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
888        // Check code blocks first (already uses binary search internally)
889        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
890            return true;
891        }
892
893        // Check inline code spans via binary search
894        self.is_byte_offset_in_code_span(pos)
895    }
896
897    /// Get line information by line number (1-indexed)
898    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
899        if line_num > 0 {
900            self.lines.get(line_num - 1)
901        } else {
902            None
903        }
904    }
905
906    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
907    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
908        let normalized_id = ref_id.to_lowercase();
909        self.reference_defs_map
910            .get(&normalized_id)
911            .map(|&idx| self.reference_defs[idx].url.as_str())
912    }
913
914    /// Check if a line is part of a list block
915    pub fn is_in_list_block(&self, line_num: usize) -> bool {
916        self.list_blocks
917            .iter()
918            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
919    }
920
921    /// Check if a line is within an HTML block
922    pub fn is_in_html_block(&self, line_num: usize) -> bool {
923        if line_num == 0 || line_num > self.lines.len() {
924            return false;
925        }
926        self.lines[line_num - 1].in_html_block
927    }
928
929    /// Check if a line and column is within a code span
930    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
931        if line_num == 0 || line_num > self.lines.len() {
932            return false;
933        }
934
935        // Use the code spans cache to check
936        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
937        // Convert col to 0-indexed for comparison
938        let col_0indexed = if col > 0 { col - 1 } else { 0 };
939        let code_spans = self.code_spans();
940        code_spans.iter().any(|span| {
941            // Check if line is within the span's line range
942            if line_num < span.line || line_num > span.end_line {
943                return false;
944            }
945
946            if span.line == span.end_line {
947                // Single-line span: check column bounds
948                col_0indexed >= span.start_col && col_0indexed < span.end_col
949            } else if line_num == span.line {
950                // First line of multi-line span: anything after start_col is in span
951                col_0indexed >= span.start_col
952            } else if line_num == span.end_line {
953                // Last line of multi-line span: anything before end_col is in span
954                col_0indexed < span.end_col
955            } else {
956                // Middle line of multi-line span: entire line is in span
957                true
958            }
959        })
960    }
961
962    /// Check if a byte offset is within a code span. O(log n).
963    #[inline]
964    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
965        let code_spans = self.code_spans();
966        let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
967        idx > 0 && byte_offset < code_spans[idx - 1].byte_end
968    }
969
970    /// Check if a byte position is within a reference definition. O(log n).
971    #[inline]
972    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
973        let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
974        idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
975    }
976
977    /// Check if a byte position is within an HTML comment. O(log n).
978    #[inline]
979    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
980        let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
981        idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
982    }
983
984    /// Check if a byte position is within an HTML tag (including multiline tags).
985    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines. O(log n).
986    #[inline]
987    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
988        let tags = self.html_tags();
989        let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
990        idx > 0 && byte_pos < tags[idx - 1].byte_end
991    }
992
993    /// Check if a byte position is within a Jinja template ({{ }} or {% %}). O(log n).
994    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
995        Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
996    }
997
998    /// Check if a byte position is within a JSX expression (MDX: {expression}). O(log n).
999    #[inline]
1000    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1001        Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1002    }
1003
1004    /// Check if a byte position is within an MDX comment ({/* ... */}). O(log n).
1005    #[inline]
1006    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1007        Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1008    }
1009
1010    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`).
1011    /// Only active in Quarto flavor. O(log n).
1012    #[inline]
1013    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1014        let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1015        idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1016    }
1017
1018    /// Pre-computed Pandoc/Quarto citation ranges.
1019    #[inline]
1020    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1021        &self.citation_ranges
1022    }
1023
1024    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}}). O(log n).
1025    #[inline]
1026    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1027        Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1028    }
1029
1030    /// Pre-computed Hugo/Quarto shortcode ranges.
1031    #[inline]
1032    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1033        &self.shortcode_ranges
1034    }
1035
1036    /// Check if a byte position is within a link reference definition title. O(log n).
1037    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1038        Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1039    }
1040
1041    /// Check if content has any instances of a specific character (fast)
1042    pub fn has_char(&self, ch: char) -> bool {
1043        match ch {
1044            '#' => self.char_frequency.hash_count > 0,
1045            '*' => self.char_frequency.asterisk_count > 0,
1046            '_' => self.char_frequency.underscore_count > 0,
1047            '-' => self.char_frequency.hyphen_count > 0,
1048            '+' => self.char_frequency.plus_count > 0,
1049            '>' => self.char_frequency.gt_count > 0,
1050            '|' => self.char_frequency.pipe_count > 0,
1051            '[' => self.char_frequency.bracket_count > 0,
1052            '`' => self.char_frequency.backtick_count > 0,
1053            '<' => self.char_frequency.lt_count > 0,
1054            '!' => self.char_frequency.exclamation_count > 0,
1055            '\n' => self.char_frequency.newline_count > 0,
1056            _ => self.content.contains(ch), // Fallback for other characters
1057        }
1058    }
1059
1060    /// Get count of a specific character (fast)
1061    pub fn char_count(&self, ch: char) -> usize {
1062        match ch {
1063            '#' => self.char_frequency.hash_count,
1064            '*' => self.char_frequency.asterisk_count,
1065            '_' => self.char_frequency.underscore_count,
1066            '-' => self.char_frequency.hyphen_count,
1067            '+' => self.char_frequency.plus_count,
1068            '>' => self.char_frequency.gt_count,
1069            '|' => self.char_frequency.pipe_count,
1070            '[' => self.char_frequency.bracket_count,
1071            '`' => self.char_frequency.backtick_count,
1072            '<' => self.char_frequency.lt_count,
1073            '!' => self.char_frequency.exclamation_count,
1074            '\n' => self.char_frequency.newline_count,
1075            _ => self.content.matches(ch).count(), // Fallback for other characters
1076        }
1077    }
1078
1079    /// Check if content likely contains headings (fast)
1080    pub fn likely_has_headings(&self) -> bool {
1081        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') // Setext H1 underlines use '='
1082    }
1083
1084    /// Check if content likely contains lists (fast)
1085    pub fn likely_has_lists(&self) -> bool {
1086        self.char_frequency.asterisk_count > 0
1087            || self.char_frequency.hyphen_count > 0
1088            || self.char_frequency.plus_count > 0
1089    }
1090
1091    /// Check if content likely contains emphasis (fast)
1092    pub fn likely_has_emphasis(&self) -> bool {
1093        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1094    }
1095
1096    /// Check if content likely contains tables (fast)
1097    pub fn likely_has_tables(&self) -> bool {
1098        self.char_frequency.pipe_count > 2
1099    }
1100
1101    /// Check if content likely contains blockquotes (fast)
1102    pub fn likely_has_blockquotes(&self) -> bool {
1103        self.char_frequency.gt_count > 0
1104    }
1105
1106    /// Check if content likely contains code (fast)
1107    pub fn likely_has_code(&self) -> bool {
1108        self.char_frequency.backtick_count > 0
1109    }
1110
1111    /// Check if content likely contains links or images (fast)
1112    pub fn likely_has_links_or_images(&self) -> bool {
1113        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1114    }
1115
1116    /// Check if content likely contains HTML (fast)
1117    pub fn likely_has_html(&self) -> bool {
1118        self.char_frequency.lt_count > 0
1119    }
1120
1121    /// Get the blockquote prefix for inserting a blank line at the given line index.
1122    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1123    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1124    /// Returns an empty string if the line is not inside a blockquote.
1125    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1126        if let Some(line_info) = self.lines.get(line_idx)
1127            && let Some(ref bq) = line_info.blockquote
1128        {
1129            bq.prefix.trim_end().to_string()
1130        } else {
1131            String::new()
1132        }
1133    }
1134
1135    /// Find the line index for a given byte offset using binary search.
1136    /// Returns (line_index, line_number, column) where:
1137    /// - line_index is the 0-based index in the lines array
1138    /// - line_number is the 1-based line number
1139    /// - column is the byte offset within that line
1140    #[inline]
1141    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1142        // Binary search to find the line containing this byte offset
1143        let idx = match lines.binary_search_by(|line| {
1144            if byte_offset < line.byte_offset {
1145                std::cmp::Ordering::Greater
1146            } else if byte_offset > line.byte_offset + line.byte_len {
1147                std::cmp::Ordering::Less
1148            } else {
1149                std::cmp::Ordering::Equal
1150            }
1151        }) {
1152            Ok(idx) => idx,
1153            Err(idx) => idx.saturating_sub(1),
1154        };
1155
1156        let line = &lines[idx];
1157        let line_num = idx + 1;
1158        let col = byte_offset.saturating_sub(line.byte_offset);
1159
1160        (idx, line_num, col)
1161    }
1162
1163    /// Check if a byte offset is within a code span using binary search
1164    #[inline]
1165    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1166        // Since spans are sorted by byte_offset, use partition_point for binary search
1167        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1168
1169        // Check the span that starts at or before our offset
1170        if idx > 0 {
1171            let span = &code_spans[idx - 1];
1172            if offset >= span.byte_offset && offset < span.byte_end {
1173                return true;
1174            }
1175        }
1176
1177        false
1178    }
1179
1180    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
1181    ///
1182    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
1183    /// This is the standard iterator for rules that need to process headings.
1184    ///
1185    /// # Examples
1186    ///
1187    /// ```
1188    /// use rumdl_lib::lint_context::LintContext;
1189    /// use rumdl_lib::config::MarkdownFlavor;
1190    ///
1191    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
1192    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1193    ///
1194    /// for heading in ctx.valid_headings() {
1195    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
1196    /// }
1197    /// // Only prints valid headings, skips `#NoSpace`
1198    /// ```
1199    #[must_use]
1200    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1201        ValidHeadingsIter::new(&self.lines)
1202    }
1203
1204    /// Check if the document contains any valid CommonMark headings
1205    ///
1206    /// Returns `true` if there is at least one heading with proper space after `#`.
1207    #[must_use]
1208    pub fn has_valid_headings(&self) -> bool {
1209        self.lines
1210            .iter()
1211            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1212    }
1213}
1214
1215/// Detect footnote definitions and mark their continuation lines.
1216///
1217/// Uses pulldown-cmark to find footnote definition ranges and fenced code
1218/// blocks within them, then:
1219/// 1. Sets `in_footnote_definition = true` on all lines within
1220/// 2. Clears `in_code_block = false` on continuation lines that were
1221///    misidentified as indented code blocks (but preserves real fenced
1222///    code blocks within footnotes)
1223fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1224    use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1225
1226    let options = crate::utils::rumdl_parser_options();
1227    let parser = Parser::new_ext(content, options).into_offset_iter();
1228
1229    // Collect footnote ranges and fenced code block ranges within them
1230    let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1231    let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1232    let mut in_footnote = false;
1233
1234    for (event, range) in parser {
1235        match event {
1236            Event::Start(Tag::FootnoteDefinition(_)) => {
1237                in_footnote = true;
1238                footnote_ranges.push((range.start, range.end));
1239            }
1240            Event::End(TagEnd::FootnoteDefinition) => {
1241                in_footnote = false;
1242            }
1243            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1244                fenced_code_ranges.push((range.start, range.end));
1245            }
1246            _ => {}
1247        }
1248    }
1249
1250    let byte_to_line = |byte_offset: usize| -> usize {
1251        line_offsets
1252            .partition_point(|&offset| offset <= byte_offset)
1253            .saturating_sub(1)
1254    };
1255
1256    // Mark footnote definition lines
1257    for &(start, end) in &footnote_ranges {
1258        let start_line = byte_to_line(start);
1259        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1260
1261        for line in &mut lines[start_line..end_line] {
1262            line.in_footnote_definition = true;
1263            line.in_code_block = false;
1264        }
1265    }
1266
1267    // Restore in_code_block for fenced code blocks within footnotes
1268    for &(start, end) in &fenced_code_ranges {
1269        let start_line = byte_to_line(start);
1270        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1271
1272        for line in &mut lines[start_line..end_line] {
1273            line.in_code_block = true;
1274        }
1275    }
1276}