rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::{CodeBlockDetail, CodeBlockUtils};
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20/// Macro for profiling sections - only active in non-WASM builds
21#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23    ($name:expr, $profile:expr, $code:expr) => {{
24        let start = std::time::Instant::now();
25        let result = $code;
26        if $profile {
27            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28        }
29        result
30    }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38/// Grouped byte ranges for skip context detection
39/// Used to reduce parameter count in internal functions
40pub(super) struct SkipByteRanges<'a> {
41    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
50pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52/// Type alias for byte ranges used in JSX expression and MDX comment detection
53pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56    pub content: &'a str,
57    content_lines: Vec<&'a str>, // Pre-split lines from content (avoids repeated allocations)
58    pub line_offsets: Vec<usize>,
59    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
60    pub code_block_details: Vec<CodeBlockDetail>, // Per-block metadata (fenced/indented, info string)
61    pub strong_spans: Vec<crate::utils::code_block_utils::StrongSpanDetail>, // Pre-computed strong emphasis spans
62    pub line_to_list: crate::utils::code_block_utils::LineToListMap, // Ordered list membership by line
63    pub list_start_values: crate::utils::code_block_utils::ListStartValues, // Start values per list ID
64    pub lines: Vec<LineInfo>,             // Pre-computed line information
65    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
66    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
67    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
68    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
69    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
70    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
71    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
72    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
73    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
74    pub char_frequency: CharFrequency,    // Character frequency analysis
75    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
76    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
77    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
78    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
79    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
80    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
81    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
82    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
83    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
84    pub flavor: MarkdownFlavor,           // Markdown flavor being used
85    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
86    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
87    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
88    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
89    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
90    link_title_ranges: Vec<(usize, usize)>, // Pre-computed sorted link title byte ranges
91    code_span_byte_ranges: Vec<(usize, usize)>, // Pre-computed code span byte ranges from pulldown-cmark
92    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
93    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
94    lazy_cont_lines_cache: OnceLock<Arc<Vec<LazyContLine>>>, // Lazy-loaded lazy continuation lines
95}
96
97impl<'a> LintContext<'a> {
98    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
99        #[cfg(not(target_arch = "wasm32"))]
100        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
101        #[cfg(target_arch = "wasm32")]
102        let profile = false;
103
104        let line_offsets = profile_section!("Line offsets", profile, {
105            let mut offsets = vec![0];
106            for (i, c) in content.char_indices() {
107                if c == '\n' {
108                    offsets.push(i + 1);
109                }
110            }
111            offsets
112        });
113
114        // Compute content_lines once for all functions that need it
115        let content_lines: Vec<&str> = content.lines().collect();
116
117        // Detect front matter boundaries once for all functions that need it
118        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
119
120        // Detect code blocks and code spans once and cache them
121        let parse_result = profile_section!(
122            "Code blocks",
123            profile,
124            CodeBlockUtils::detect_code_blocks_and_spans(content)
125        );
126        let mut code_blocks = parse_result.code_blocks;
127        let code_span_ranges = parse_result.code_spans;
128        let code_block_details = parse_result.code_block_details;
129        let strong_spans = parse_result.strong_spans;
130        let line_to_list = parse_result.line_to_list;
131        let list_start_values = parse_result.list_start_values;
132
133        // Pre-compute HTML comment ranges ONCE for all operations
134        let html_comment_ranges = profile_section!(
135            "HTML comment ranges",
136            profile,
137            crate::utils::skip_context::compute_html_comment_ranges(content)
138        );
139
140        // Pre-compute autodoc block ranges (avoids O(n^2) scaling)
141        // Detected for all flavors: `:::` blocks are structurally unique and should
142        // never be reflowed as prose, even without MkDocs flavor.
143        let autodoc_ranges = profile_section!(
144            "Autodoc block ranges",
145            profile,
146            crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
147        );
148
149        // Pre-compute Quarto div block ranges for Quarto flavor
150        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
151            if flavor == MarkdownFlavor::Quarto {
152                crate::utils::quarto_divs::detect_div_block_ranges(content)
153            } else {
154                Vec::new()
155            }
156        });
157
158        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
159        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
160            if flavor == MarkdownFlavor::MkDocs {
161                crate::utils::pymdown_blocks::detect_block_ranges(content)
162            } else {
163                Vec::new()
164            }
165        });
166
167        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
168        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
169        let skip_ranges = SkipByteRanges {
170            html_comment_ranges: &html_comment_ranges,
171            autodoc_ranges: &autodoc_ranges,
172            quarto_div_ranges: &quarto_div_ranges,
173            pymdown_block_ranges: &pymdown_block_ranges,
174        };
175        let (mut lines, emphasis_spans) = profile_section!(
176            "Basic line info",
177            profile,
178            line_computation::compute_basic_line_info(
179                content,
180                &content_lines,
181                &line_offsets,
182                &code_blocks,
183                flavor,
184                &skip_ranges,
185                front_matter_end,
186            )
187        );
188
189        // Detect HTML blocks BEFORE heading detection
190        profile_section!(
191            "HTML blocks",
192            profile,
193            heading_detection::detect_html_blocks(content, &mut lines)
194        );
195
196        // Detect ESM import/export blocks in MDX files BEFORE heading detection
197        profile_section!(
198            "ESM blocks",
199            profile,
200            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
201        );
202
203        // Detect JSX component blocks in MDX files (e.g. <Tabs>...</Tabs>)
204        profile_section!(
205            "JSX block detection",
206            profile,
207            flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
208        );
209
210        // Detect JSX expressions and MDX comments in MDX files
211        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
212            "JSX/MDX detection",
213            profile,
214            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
215        );
216
217        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
218        profile_section!(
219            "MkDocs constructs",
220            profile,
221            flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
222        );
223
224        // Detect footnote definitions and correct false code block detection.
225        // With ENABLE_FOOTNOTES, pulldown-cmark correctly parses multi-line
226        // footnotes, but the code block detector may still mark 4-space-indented
227        // footnote continuation lines as indented code blocks.
228        profile_section!(
229            "Footnote definitions",
230            profile,
231            detect_footnote_definitions(content, &mut lines, &line_offsets)
232        );
233
234        // Filter code_blocks to remove false positives from footnote continuation content.
235        // Same pattern as MkDocs/JSX corrections below.
236        {
237            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
238            for &(start, end) in &code_blocks {
239                let start_line = line_offsets
240                    .partition_point(|&offset| offset <= start)
241                    .saturating_sub(1);
242                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
243
244                let mut sub_start: Option<usize> = None;
245                for (i, &offset) in line_offsets[start_line..end_line]
246                    .iter()
247                    .enumerate()
248                    .map(|(j, o)| (j + start_line, o))
249                {
250                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
251                    if is_real_code && sub_start.is_none() {
252                        let byte_start = if i == start_line { start } else { offset };
253                        sub_start = Some(byte_start);
254                    } else if !is_real_code && sub_start.is_some() {
255                        new_code_blocks.push((sub_start.unwrap(), offset));
256                        sub_start = None;
257                    }
258                }
259                if let Some(s) = sub_start {
260                    new_code_blocks.push((s, end));
261                }
262            }
263            code_blocks = new_code_blocks;
264        }
265
266        // Filter code_blocks to remove false positives from MkDocs admonition/tab content.
267        // pulldown-cmark treats 4-space-indented content as indented code blocks, but inside
268        // MkDocs admonitions and content tabs this is regular markdown content.
269        // detect_mkdocs_line_info already corrected LineInfo.in_code_block for these lines,
270        // but the code_blocks byte ranges are still stale. We split ranges rather than using
271        // all-or-nothing removal, so fenced code blocks within admonitions are preserved.
272        if flavor == MarkdownFlavor::MkDocs {
273            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
274            for &(start, end) in &code_blocks {
275                let start_line = line_offsets
276                    .partition_point(|&offset| offset <= start)
277                    .saturating_sub(1);
278                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
279
280                // Walk lines in this range, collecting sub-ranges where in_code_block is true
281                let mut sub_start: Option<usize> = None;
282                for (i, &offset) in line_offsets[start_line..end_line]
283                    .iter()
284                    .enumerate()
285                    .map(|(j, o)| (j + start_line, o))
286                {
287                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
288                    if is_real_code && sub_start.is_none() {
289                        let byte_start = if i == start_line { start } else { offset };
290                        sub_start = Some(byte_start);
291                    } else if !is_real_code && sub_start.is_some() {
292                        new_code_blocks.push((sub_start.unwrap(), offset));
293                        sub_start = None;
294                    }
295                }
296                if let Some(s) = sub_start {
297                    new_code_blocks.push((s, end));
298                }
299            }
300            code_blocks = new_code_blocks;
301        }
302
303        // Filter code_blocks for MDX JSX blocks (same pattern as MkDocs above).
304        // detect_jsx_blocks already corrected LineInfo.in_code_block for indented content
305        // inside JSX component blocks, but code_blocks byte ranges need updating too.
306        if flavor.supports_jsx() {
307            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
308            for &(start, end) in &code_blocks {
309                let start_line = line_offsets
310                    .partition_point(|&offset| offset <= start)
311                    .saturating_sub(1);
312                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
313
314                let mut sub_start: Option<usize> = None;
315                for (i, &offset) in line_offsets[start_line..end_line]
316                    .iter()
317                    .enumerate()
318                    .map(|(j, o)| (j + start_line, o))
319                {
320                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
321                    if is_real_code && sub_start.is_none() {
322                        let byte_start = if i == start_line { start } else { offset };
323                        sub_start = Some(byte_start);
324                    } else if !is_real_code && sub_start.is_some() {
325                        new_code_blocks.push((sub_start.unwrap(), offset));
326                        sub_start = None;
327                    }
328                }
329                if let Some(s) = sub_start {
330                    new_code_blocks.push((s, end));
331                }
332            }
333            code_blocks = new_code_blocks;
334        }
335
336        // Detect kramdown constructs (extension blocks, IALs, ALDs) in kramdown flavor
337        profile_section!(
338            "Kramdown constructs",
339            profile,
340            flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
341        );
342
343        // Layer 1: Sanitize content-derived fields inside kramdown extension blocks
344        // so downstream heading detection and collection builders never see them.
345        // This must run BEFORE detect_headings_and_blockquotes to prevent headings
346        // from being populated inside extension blocks.
347        for line in &mut lines {
348            if line.in_kramdown_extension_block {
349                line.list_item = None;
350                line.is_horizontal_rule = false;
351                line.blockquote = None;
352                line.is_kramdown_block_ial = false;
353            }
354        }
355
356        // Detect Obsidian comments (%%...%%) in Obsidian flavor
357        let obsidian_comment_ranges = profile_section!(
358            "Obsidian comments",
359            profile,
360            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
361        );
362
363        // Run pulldown-cmark parse for links, images, and link byte ranges in a single pass.
364        // Link byte ranges are needed for heading detection; links/images are finalized later
365        // after code_spans are available.
366        let pulldown_result = profile_section!(
367            "Links, images & link ranges",
368            profile,
369            link_parser::parse_links_images_pulldown(content, &lines, &code_blocks, flavor, &html_comment_ranges)
370        );
371
372        // Now detect headings and blockquotes
373        profile_section!(
374            "Headings & blockquotes",
375            profile,
376            heading_detection::detect_headings_and_blockquotes(
377                &content_lines,
378                &mut lines,
379                flavor,
380                &html_comment_ranges,
381                &pulldown_result.link_byte_ranges,
382                front_matter_end,
383            )
384        );
385
386        // Clear headings that were detected inside kramdown extension blocks
387        for line in &mut lines {
388            if line.in_kramdown_extension_block {
389                line.heading = None;
390            }
391        }
392
393        // Parse code spans early so we can exclude them from link/image parsing
394        let mut code_spans = profile_section!(
395            "Code spans",
396            profile,
397            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
398        );
399
400        // Supplement code spans for MkDocs container content that pulldown-cmark missed.
401        // pulldown-cmark treats 4-space-indented MkDocs content as indented code blocks,
402        // so backtick code spans within admonitions/tabs/markdown HTML are invisible to it.
403        if flavor == MarkdownFlavor::MkDocs {
404            let extra = profile_section!(
405                "MkDocs code spans",
406                profile,
407                element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
408            );
409            if !extra.is_empty() {
410                code_spans.extend(extra);
411                code_spans.sort_by_key(|span| span.byte_offset);
412            }
413        }
414
415        // Supplement code spans for MDX JSX component body content that pulldown-cmark missed.
416        // pulldown-cmark treats JSX component opening tags (e.g. `<ParamField>`) as HTML block
417        // starters, so backtick code spans within component bodies are invisible to the initial
418        // parse.
419        if flavor == MarkdownFlavor::MDX {
420            let extra = profile_section!(
421                "MDX JSX code spans",
422                profile,
423                element_parsers::scan_jsx_block_code_spans(content, &lines, &code_span_ranges)
424            );
425            if !extra.is_empty() {
426                code_spans.extend(extra);
427                code_spans.sort_by_key(|span| span.byte_offset);
428            }
429        }
430
431        // Mark lines that are continuations of multi-line code spans
432        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
433        for span in &code_spans {
434            if span.end_line > span.line {
435                // Mark lines after the first line as continuations
436                for line_num in (span.line + 1)..=span.end_line {
437                    if let Some(line_info) = lines.get_mut(line_num - 1) {
438                        line_info.in_code_span_continuation = true;
439                    }
440                }
441            }
442        }
443
444        // Finalize links and images: filter by code_spans and run regex fallbacks
445        let (links, images, broken_links, footnote_refs) = profile_section!(
446            "Links & images finalize",
447            profile,
448            link_parser::finalize_links_and_images(
449                content,
450                &lines,
451                &code_blocks,
452                &code_spans,
453                flavor,
454                &html_comment_ranges,
455                pulldown_result
456            )
457        );
458
459        let reference_defs = profile_section!(
460            "Reference defs",
461            profile,
462            link_parser::parse_reference_defs(content, &lines)
463        );
464
465        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
466
467        // Compute character frequency for fast content analysis
468        let char_frequency = profile_section!(
469            "Char frequency",
470            profile,
471            line_computation::compute_char_frequency(content)
472        );
473
474        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
475        let table_blocks = profile_section!(
476            "Table blocks",
477            profile,
478            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
479                content,
480                &code_blocks,
481                &code_spans,
482                &html_comment_ranges,
483            )
484        );
485
486        // Layer 2: Filter pre-computed collections to exclude items inside kramdown extension blocks.
487        // Rules that iterate these collections automatically skip kramdown content.
488        let links = links
489            .into_iter()
490            .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
491            .collect::<Vec<_>>();
492        let images = images
493            .into_iter()
494            .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
495            .collect::<Vec<_>>();
496        let broken_links = broken_links
497            .into_iter()
498            .filter(|bl| {
499                // BrokenLinkInfo has span but no line field; find line from byte offset
500                let line_idx = line_offsets
501                    .partition_point(|&offset| offset <= bl.span.start)
502                    .saturating_sub(1);
503                !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
504            })
505            .collect::<Vec<_>>();
506        let footnote_refs = footnote_refs
507            .into_iter()
508            .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
509            .collect::<Vec<_>>();
510        let reference_defs = reference_defs
511            .into_iter()
512            .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
513            .collect::<Vec<_>>();
514        let list_blocks = list_blocks
515            .into_iter()
516            .filter(|block| {
517                !lines
518                    .get(block.start_line - 1)
519                    .is_some_and(|l| l.in_kramdown_extension_block)
520            })
521            .collect::<Vec<_>>();
522        let table_blocks = table_blocks
523            .into_iter()
524            .filter(|block| {
525                // TableBlock.start_line is 0-indexed
526                !lines
527                    .get(block.start_line)
528                    .is_some_and(|l| l.in_kramdown_extension_block)
529            })
530            .collect::<Vec<_>>();
531        let emphasis_spans = emphasis_spans
532            .into_iter()
533            .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
534            .collect::<Vec<_>>();
535
536        // Rebuild reference_defs_map after filtering
537        let reference_defs_map: HashMap<String, usize> = reference_defs
538            .iter()
539            .enumerate()
540            .map(|(idx, def)| (def.id.to_lowercase(), idx))
541            .collect();
542
543        // Pre-compute sorted link title byte ranges for binary search
544        let link_title_ranges: Vec<(usize, usize)> = reference_defs
545            .iter()
546            .filter_map(|def| match (def.title_byte_start, def.title_byte_end) {
547                (Some(start), Some(end)) => Some((start, end)),
548                _ => None,
549            })
550            .collect();
551
552        // Reuse already-computed line_offsets and code_blocks instead of re-detecting
553        let line_index = profile_section!(
554            "Line index",
555            profile,
556            crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
557                content,
558                line_offsets.clone(),
559                &code_blocks,
560            )
561        );
562
563        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
564        let jinja_ranges = profile_section!(
565            "Jinja ranges",
566            profile,
567            crate::utils::jinja_utils::find_jinja_ranges(content)
568        );
569
570        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
571        let citation_ranges = profile_section!("Citation ranges", profile, {
572            if flavor == MarkdownFlavor::Quarto {
573                crate::utils::quarto_divs::find_citation_ranges(content)
574            } else {
575                Vec::new()
576            }
577        });
578
579        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
580        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
581            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
582            let mut ranges = Vec::new();
583            for mat in HUGO_SHORTCODE_REGEX.find_iter(content) {
584                ranges.push((mat.start(), mat.end()));
585            }
586            ranges
587        });
588
589        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
590
591        Self {
592            content,
593            content_lines,
594            line_offsets,
595            code_blocks,
596            code_block_details,
597            strong_spans,
598            line_to_list,
599            list_start_values,
600            lines,
601            links,
602            images,
603            broken_links,
604            footnote_refs,
605            reference_defs,
606            reference_defs_map,
607            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
608            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
609            list_blocks,
610            char_frequency,
611            html_tags_cache: OnceLock::new(),
612            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
613            table_rows_cache: OnceLock::new(),
614            bare_urls_cache: OnceLock::new(),
615            has_mixed_list_nesting_cache: OnceLock::new(),
616            html_comment_ranges,
617            table_blocks,
618            line_index,
619            jinja_ranges,
620            flavor,
621            source_file,
622            jsx_expression_ranges,
623            mdx_comment_ranges,
624            citation_ranges,
625            shortcode_ranges,
626            link_title_ranges,
627            code_span_byte_ranges: code_span_ranges,
628            inline_config,
629            obsidian_comment_ranges,
630            lazy_cont_lines_cache: OnceLock::new(),
631        }
632    }
633
634    /// Binary search for whether `pos` falls inside any range in a sorted, non-overlapping
635    /// slice of `(start, end)` byte ranges. O(log n) instead of O(n).
636    #[inline]
637    fn binary_search_ranges(ranges: &[(usize, usize)], pos: usize) -> bool {
638        // Find the rightmost range whose start <= pos
639        let idx = ranges.partition_point(|&(start, _)| start <= pos);
640        // If idx == 0, no range starts at or before pos
641        idx > 0 && pos < ranges[idx - 1].1
642    }
643
644    /// Check if a byte position is within a code span. O(log n).
645    pub fn is_in_code_span_byte(&self, pos: usize) -> bool {
646        Self::binary_search_ranges(&self.code_span_byte_ranges, pos)
647    }
648
649    /// Check if `pos` is inside any link byte range. O(log n).
650    pub fn is_in_link(&self, pos: usize) -> bool {
651        let idx = self.links.partition_point(|link| link.byte_offset <= pos);
652        if idx > 0 && pos < self.links[idx - 1].byte_end {
653            return true;
654        }
655        let idx = self.images.partition_point(|img| img.byte_offset <= pos);
656        if idx > 0 && pos < self.images[idx - 1].byte_end {
657            return true;
658        }
659        self.is_in_reference_def(pos)
660    }
661
662    /// Get parsed inline configuration state.
663    pub fn inline_config(&self) -> &InlineConfig {
664        &self.inline_config
665    }
666
667    /// Get pre-split content lines, avoiding repeated `content.lines().collect()` allocations.
668    ///
669    /// Lines are 0-indexed (line 0 corresponds to line number 1 in the document).
670    pub fn raw_lines(&self) -> &[&'a str] {
671        &self.content_lines
672    }
673
674    /// Check if a rule is disabled at a specific line number (1-indexed)
675    ///
676    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
677    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
678    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
679        self.inline_config.is_rule_disabled(rule_name, line_number)
680    }
681
682    /// Get code spans - computed lazily on first access
683    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
684        Arc::clone(
685            self.code_spans_cache
686                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
687        )
688    }
689
690    /// Get math spans - computed lazily on first access
691    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
692        Arc::clone(
693            self.math_spans_cache
694                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
695        )
696    }
697
698    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
699    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
700        let math_spans = self.math_spans();
701        // Binary search: find the last span whose byte_offset <= byte_pos
702        let idx = math_spans.partition_point(|span| span.byte_offset <= byte_pos);
703        idx > 0 && byte_pos < math_spans[idx - 1].byte_end
704    }
705
706    /// Get HTML comment ranges - pre-computed during LintContext construction
707    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
708        &self.html_comment_ranges
709    }
710
711    /// Get Obsidian comment ranges - pre-computed during LintContext construction
712    /// Returns empty slice for non-Obsidian flavors
713    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
714        &self.obsidian_comment_ranges
715    }
716
717    /// Check if a byte position is inside an Obsidian comment
718    ///
719    /// Returns false for non-Obsidian flavors.
720    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
721        Self::binary_search_ranges(&self.obsidian_comment_ranges, byte_pos)
722    }
723
724    /// Check if a line/column position is inside an Obsidian comment
725    ///
726    /// Line number is 1-indexed, column is 1-indexed.
727    /// Returns false for non-Obsidian flavors.
728    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
729        if self.obsidian_comment_ranges.is_empty() {
730            return false;
731        }
732
733        // Convert line/column (1-indexed, char-based) to byte position
734        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
735        self.is_in_obsidian_comment(byte_pos)
736    }
737
738    /// Get HTML tags - computed lazily on first access
739    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
740        Arc::clone(self.html_tags_cache.get_or_init(|| {
741            let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
742            // Filter out HTML tags inside kramdown extension blocks
743            Arc::new(
744                tags.into_iter()
745                    .filter(|tag| {
746                        !self
747                            .lines
748                            .get(tag.line - 1)
749                            .is_some_and(|l| l.in_kramdown_extension_block)
750                    })
751                    .collect(),
752            )
753        }))
754    }
755
756    /// Get emphasis spans - pre-computed during construction
757    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
758        Arc::clone(
759            self.emphasis_spans_cache
760                .get()
761                .expect("emphasis_spans_cache initialized during construction"),
762        )
763    }
764
765    /// Get table rows - computed lazily on first access
766    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
767        Arc::clone(
768            self.table_rows_cache
769                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
770        )
771    }
772
773    /// Get bare URLs - computed lazily on first access
774    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
775        Arc::clone(self.bare_urls_cache.get_or_init(|| {
776            Arc::new(element_parsers::parse_bare_urls(
777                self.content,
778                &self.lines,
779                &self.code_blocks,
780            ))
781        }))
782    }
783
784    /// Get lazy continuation lines - computed lazily on first access
785    pub fn lazy_continuation_lines(&self) -> Arc<Vec<LazyContLine>> {
786        Arc::clone(self.lazy_cont_lines_cache.get_or_init(|| {
787            Arc::new(element_parsers::detect_lazy_continuation_lines(
788                self.content,
789                &self.lines,
790                &self.line_offsets,
791            ))
792        }))
793    }
794
795    /// Check if document has mixed ordered/unordered list nesting.
796    /// Result is cached after first computation (document-level invariant).
797    /// This is used by MD007 for smart style auto-detection.
798    pub fn has_mixed_list_nesting(&self) -> bool {
799        *self
800            .has_mixed_list_nesting_cache
801            .get_or_init(|| self.compute_mixed_list_nesting())
802    }
803
804    /// Internal computation for mixed list nesting (only called once per LintContext).
805    fn compute_mixed_list_nesting(&self) -> bool {
806        // Track parent list items by their marker position and type
807        // Using marker_column instead of indent because it works correctly
808        // for blockquoted content where indent doesn't account for the prefix
809        // Stack stores: (marker_column, is_ordered)
810        let mut stack: Vec<(usize, bool)> = Vec::new();
811        let mut last_was_blank = false;
812
813        for line_info in &self.lines {
814            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
815            if line_info.in_code_block
816                || line_info.in_front_matter
817                || line_info.in_mkdocstrings
818                || line_info.in_html_comment
819                || line_info.in_mdx_comment
820                || line_info.in_esm_block
821            {
822                continue;
823            }
824
825            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
826            if line_info.is_blank {
827                last_was_blank = true;
828                continue;
829            }
830
831            if let Some(list_item) = &line_info.list_item {
832                // Normalize column 1 to column 0 (consistent with MD007 check function)
833                let current_pos = if list_item.marker_column == 1 {
834                    0
835                } else {
836                    list_item.marker_column
837                };
838
839                // If there was a blank line and this item is at root level, reset stack
840                if last_was_blank && current_pos == 0 {
841                    stack.clear();
842                }
843                last_was_blank = false;
844
845                // Pop items at same or greater position (they're siblings or deeper, not parents)
846                while let Some(&(pos, _)) = stack.last() {
847                    if pos >= current_pos {
848                        stack.pop();
849                    } else {
850                        break;
851                    }
852                }
853
854                // Check if immediate parent has different type - this is mixed nesting
855                if let Some(&(_, parent_is_ordered)) = stack.last()
856                    && parent_is_ordered != list_item.is_ordered
857                {
858                    return true; // Found mixed nesting - early exit
859                }
860
861                stack.push((current_pos, list_item.is_ordered));
862            } else {
863                // Non-list line (but not blank) - could be paragraph or other content
864                last_was_blank = false;
865            }
866        }
867
868        false
869    }
870
871    /// Map a byte offset to (line, column)
872    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
873        match self.line_offsets.binary_search(&offset) {
874            Ok(line) => (line + 1, 1),
875            Err(line) => {
876                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
877                (line, offset - line_start + 1)
878            }
879        }
880    }
881
882    /// Check if a position is within a code block or code span. O(log n).
883    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
884        // Check code blocks first (already uses binary search internally)
885        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
886            return true;
887        }
888
889        // Check inline code spans via binary search
890        self.is_byte_offset_in_code_span(pos)
891    }
892
893    /// Get line information by line number (1-indexed)
894    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
895        if line_num > 0 {
896            self.lines.get(line_num - 1)
897        } else {
898            None
899        }
900    }
901
902    /// Get byte offset for a line number (1-indexed)
903    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
904        self.line_info(line_num).map(|info| info.byte_offset)
905    }
906
907    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
908    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
909        let normalized_id = ref_id.to_lowercase();
910        self.reference_defs_map
911            .get(&normalized_id)
912            .map(|&idx| self.reference_defs[idx].url.as_str())
913    }
914
915    /// Get a reference definition by its ID (O(1) lookup via HashMap)
916    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
917        let normalized_id = ref_id.to_lowercase();
918        self.reference_defs_map
919            .get(&normalized_id)
920            .map(|&idx| &self.reference_defs[idx])
921    }
922
923    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
924    pub fn has_reference_def(&self, ref_id: &str) -> bool {
925        let normalized_id = ref_id.to_lowercase();
926        self.reference_defs_map.contains_key(&normalized_id)
927    }
928
929    /// Check if a line is part of a list block
930    pub fn is_in_list_block(&self, line_num: usize) -> bool {
931        self.list_blocks
932            .iter()
933            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
934    }
935
936    /// Get the list block containing a specific line
937    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
938        self.list_blocks
939            .iter()
940            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
941    }
942
943    // Compatibility methods for DocumentStructure migration
944
945    /// Check if a line is within a code block
946    pub fn is_in_code_block(&self, line_num: usize) -> bool {
947        if line_num == 0 || line_num > self.lines.len() {
948            return false;
949        }
950        self.lines[line_num - 1].in_code_block
951    }
952
953    /// Check if a line is within front matter
954    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
955        if line_num == 0 || line_num > self.lines.len() {
956            return false;
957        }
958        self.lines[line_num - 1].in_front_matter
959    }
960
961    /// Check if a line is within an HTML block
962    pub fn is_in_html_block(&self, line_num: usize) -> bool {
963        if line_num == 0 || line_num > self.lines.len() {
964            return false;
965        }
966        self.lines[line_num - 1].in_html_block
967    }
968
969    /// Check if a line and column is within a code span
970    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
971        if line_num == 0 || line_num > self.lines.len() {
972            return false;
973        }
974
975        // Use the code spans cache to check
976        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
977        // Convert col to 0-indexed for comparison
978        let col_0indexed = if col > 0 { col - 1 } else { 0 };
979        let code_spans = self.code_spans();
980        code_spans.iter().any(|span| {
981            // Check if line is within the span's line range
982            if line_num < span.line || line_num > span.end_line {
983                return false;
984            }
985
986            if span.line == span.end_line {
987                // Single-line span: check column bounds
988                col_0indexed >= span.start_col && col_0indexed < span.end_col
989            } else if line_num == span.line {
990                // First line of multi-line span: anything after start_col is in span
991                col_0indexed >= span.start_col
992            } else if line_num == span.end_line {
993                // Last line of multi-line span: anything before end_col is in span
994                col_0indexed < span.end_col
995            } else {
996                // Middle line of multi-line span: entire line is in span
997                true
998            }
999        })
1000    }
1001
1002    /// Check if a byte offset is within a code span. O(log n).
1003    #[inline]
1004    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
1005        let code_spans = self.code_spans();
1006        let idx = code_spans.partition_point(|span| span.byte_offset <= byte_offset);
1007        idx > 0 && byte_offset < code_spans[idx - 1].byte_end
1008    }
1009
1010    /// Check if a byte position is within a reference definition. O(log n).
1011    #[inline]
1012    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
1013        let idx = self.reference_defs.partition_point(|rd| rd.byte_offset <= byte_pos);
1014        idx > 0 && byte_pos < self.reference_defs[idx - 1].byte_end
1015    }
1016
1017    /// Check if a byte position is within an HTML comment. O(log n).
1018    #[inline]
1019    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
1020        let idx = self.html_comment_ranges.partition_point(|r| r.start <= byte_pos);
1021        idx > 0 && byte_pos < self.html_comment_ranges[idx - 1].end
1022    }
1023
1024    /// Check if a byte position is within an HTML tag (including multiline tags).
1025    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines. O(log n).
1026    #[inline]
1027    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
1028        let tags = self.html_tags();
1029        let idx = tags.partition_point(|tag| tag.byte_offset <= byte_pos);
1030        idx > 0 && byte_pos < tags[idx - 1].byte_end
1031    }
1032
1033    /// Check if a byte position is within a Jinja template ({{ }} or {% %}). O(log n).
1034    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
1035        Self::binary_search_ranges(&self.jinja_ranges, byte_pos)
1036    }
1037
1038    /// Check if a byte position is within a JSX expression (MDX: {expression}). O(log n).
1039    #[inline]
1040    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
1041        Self::binary_search_ranges(&self.jsx_expression_ranges, byte_pos)
1042    }
1043
1044    /// Check if a byte position is within an MDX comment ({/* ... */}). O(log n).
1045    #[inline]
1046    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
1047        Self::binary_search_ranges(&self.mdx_comment_ranges, byte_pos)
1048    }
1049
1050    /// Get all JSX expression byte ranges
1051    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
1052        &self.jsx_expression_ranges
1053    }
1054
1055    /// Get all MDX comment byte ranges
1056    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
1057        &self.mdx_comment_ranges
1058    }
1059
1060    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`).
1061    /// Only active in Quarto flavor. O(log n).
1062    #[inline]
1063    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
1064        let idx = self.citation_ranges.partition_point(|r| r.start <= byte_pos);
1065        idx > 0 && byte_pos < self.citation_ranges[idx - 1].end
1066    }
1067
1068    /// Get all citation byte ranges (Quarto flavor only)
1069    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
1070        &self.citation_ranges
1071    }
1072
1073    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}}). O(log n).
1074    #[inline]
1075    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
1076        Self::binary_search_ranges(&self.shortcode_ranges, byte_pos)
1077    }
1078
1079    /// Get all shortcode byte ranges
1080    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
1081        &self.shortcode_ranges
1082    }
1083
1084    /// Check if a byte position is within a link reference definition title. O(log n).
1085    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
1086        Self::binary_search_ranges(&self.link_title_ranges, byte_pos)
1087    }
1088
1089    /// Check if content has any instances of a specific character (fast)
1090    pub fn has_char(&self, ch: char) -> bool {
1091        match ch {
1092            '#' => self.char_frequency.hash_count > 0,
1093            '*' => self.char_frequency.asterisk_count > 0,
1094            '_' => self.char_frequency.underscore_count > 0,
1095            '-' => self.char_frequency.hyphen_count > 0,
1096            '+' => self.char_frequency.plus_count > 0,
1097            '>' => self.char_frequency.gt_count > 0,
1098            '|' => self.char_frequency.pipe_count > 0,
1099            '[' => self.char_frequency.bracket_count > 0,
1100            '`' => self.char_frequency.backtick_count > 0,
1101            '<' => self.char_frequency.lt_count > 0,
1102            '!' => self.char_frequency.exclamation_count > 0,
1103            '\n' => self.char_frequency.newline_count > 0,
1104            _ => self.content.contains(ch), // Fallback for other characters
1105        }
1106    }
1107
1108    /// Get count of a specific character (fast)
1109    pub fn char_count(&self, ch: char) -> usize {
1110        match ch {
1111            '#' => self.char_frequency.hash_count,
1112            '*' => self.char_frequency.asterisk_count,
1113            '_' => self.char_frequency.underscore_count,
1114            '-' => self.char_frequency.hyphen_count,
1115            '+' => self.char_frequency.plus_count,
1116            '>' => self.char_frequency.gt_count,
1117            '|' => self.char_frequency.pipe_count,
1118            '[' => self.char_frequency.bracket_count,
1119            '`' => self.char_frequency.backtick_count,
1120            '<' => self.char_frequency.lt_count,
1121            '!' => self.char_frequency.exclamation_count,
1122            '\n' => self.char_frequency.newline_count,
1123            _ => self.content.matches(ch).count(), // Fallback for other characters
1124        }
1125    }
1126
1127    /// Check if content likely contains headings (fast)
1128    pub fn likely_has_headings(&self) -> bool {
1129        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 || self.content.contains('=') // Setext H1 underlines use '='
1130    }
1131
1132    /// Check if content likely contains lists (fast)
1133    pub fn likely_has_lists(&self) -> bool {
1134        self.char_frequency.asterisk_count > 0
1135            || self.char_frequency.hyphen_count > 0
1136            || self.char_frequency.plus_count > 0
1137    }
1138
1139    /// Check if content likely contains emphasis (fast)
1140    pub fn likely_has_emphasis(&self) -> bool {
1141        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1142    }
1143
1144    /// Check if content likely contains tables (fast)
1145    pub fn likely_has_tables(&self) -> bool {
1146        self.char_frequency.pipe_count > 2
1147    }
1148
1149    /// Check if content likely contains blockquotes (fast)
1150    pub fn likely_has_blockquotes(&self) -> bool {
1151        self.char_frequency.gt_count > 0
1152    }
1153
1154    /// Check if content likely contains code (fast)
1155    pub fn likely_has_code(&self) -> bool {
1156        self.char_frequency.backtick_count > 0
1157    }
1158
1159    /// Check if content likely contains links or images (fast)
1160    pub fn likely_has_links_or_images(&self) -> bool {
1161        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1162    }
1163
1164    /// Check if content likely contains HTML (fast)
1165    pub fn likely_has_html(&self) -> bool {
1166        self.char_frequency.lt_count > 0
1167    }
1168
1169    /// Get the blockquote prefix for inserting a blank line at the given line index.
1170    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1171    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1172    /// Returns an empty string if the line is not inside a blockquote.
1173    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1174        if let Some(line_info) = self.lines.get(line_idx)
1175            && let Some(ref bq) = line_info.blockquote
1176        {
1177            bq.prefix.trim_end().to_string()
1178        } else {
1179            String::new()
1180        }
1181    }
1182
1183    /// Get HTML tags on a specific line
1184    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1185        self.html_tags()
1186            .iter()
1187            .filter(|tag| tag.line == line_num)
1188            .cloned()
1189            .collect()
1190    }
1191
1192    /// Get emphasis spans on a specific line
1193    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1194        self.emphasis_spans()
1195            .iter()
1196            .filter(|span| span.line == line_num)
1197            .cloned()
1198            .collect()
1199    }
1200
1201    /// Get table rows on a specific line
1202    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1203        self.table_rows()
1204            .iter()
1205            .filter(|row| row.line == line_num)
1206            .cloned()
1207            .collect()
1208    }
1209
1210    /// Get bare URLs on a specific line
1211    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1212        self.bare_urls()
1213            .iter()
1214            .filter(|url| url.line == line_num)
1215            .cloned()
1216            .collect()
1217    }
1218
1219    /// Find the line index for a given byte offset using binary search.
1220    /// Returns (line_index, line_number, column) where:
1221    /// - line_index is the 0-based index in the lines array
1222    /// - line_number is the 1-based line number
1223    /// - column is the byte offset within that line
1224    #[inline]
1225    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1226        // Binary search to find the line containing this byte offset
1227        let idx = match lines.binary_search_by(|line| {
1228            if byte_offset < line.byte_offset {
1229                std::cmp::Ordering::Greater
1230            } else if byte_offset > line.byte_offset + line.byte_len {
1231                std::cmp::Ordering::Less
1232            } else {
1233                std::cmp::Ordering::Equal
1234            }
1235        }) {
1236            Ok(idx) => idx,
1237            Err(idx) => idx.saturating_sub(1),
1238        };
1239
1240        let line = &lines[idx];
1241        let line_num = idx + 1;
1242        let col = byte_offset.saturating_sub(line.byte_offset);
1243
1244        (idx, line_num, col)
1245    }
1246
1247    /// Check if a byte offset is within a code span using binary search
1248    #[inline]
1249    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1250        // Since spans are sorted by byte_offset, use partition_point for binary search
1251        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1252
1253        // Check the span that starts at or before our offset
1254        if idx > 0 {
1255            let span = &code_spans[idx - 1];
1256            if offset >= span.byte_offset && offset < span.byte_end {
1257                return true;
1258            }
1259        }
1260
1261        false
1262    }
1263
1264    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
1265    ///
1266    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
1267    /// This is the standard iterator for rules that need to process headings.
1268    ///
1269    /// # Examples
1270    ///
1271    /// ```
1272    /// use rumdl::lint_context::LintContext;
1273    /// use rumdl::config::MarkdownFlavor;
1274    ///
1275    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
1276    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1277    ///
1278    /// for heading in ctx.valid_headings() {
1279    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
1280    /// }
1281    /// // Only prints valid headings, skips `#NoSpace`
1282    /// ```
1283    #[must_use]
1284    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1285        ValidHeadingsIter::new(&self.lines)
1286    }
1287
1288    /// Check if the document contains any valid CommonMark headings
1289    ///
1290    /// Returns `true` if there is at least one heading with proper space after `#`.
1291    #[must_use]
1292    pub fn has_valid_headings(&self) -> bool {
1293        self.lines
1294            .iter()
1295            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1296    }
1297}
1298
1299/// Detect footnote definitions and mark their continuation lines.
1300///
1301/// Uses pulldown-cmark to find footnote definition ranges and fenced code
1302/// blocks within them, then:
1303/// 1. Sets `in_footnote_definition = true` on all lines within
1304/// 2. Clears `in_code_block = false` on continuation lines that were
1305///    misidentified as indented code blocks (but preserves real fenced
1306///    code blocks within footnotes)
1307fn detect_footnote_definitions(content: &str, lines: &mut [types::LineInfo], line_offsets: &[usize]) {
1308    use pulldown_cmark::{CodeBlockKind, Event, Parser, Tag, TagEnd};
1309
1310    let options = crate::utils::rumdl_parser_options();
1311    let parser = Parser::new_ext(content, options).into_offset_iter();
1312
1313    // Collect footnote ranges and fenced code block ranges within them
1314    let mut footnote_ranges: Vec<(usize, usize)> = Vec::new();
1315    let mut fenced_code_ranges: Vec<(usize, usize)> = Vec::new();
1316    let mut in_footnote = false;
1317
1318    for (event, range) in parser {
1319        match event {
1320            Event::Start(Tag::FootnoteDefinition(_)) => {
1321                in_footnote = true;
1322                footnote_ranges.push((range.start, range.end));
1323            }
1324            Event::End(TagEnd::FootnoteDefinition) => {
1325                in_footnote = false;
1326            }
1327            Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(_))) if in_footnote => {
1328                fenced_code_ranges.push((range.start, range.end));
1329            }
1330            _ => {}
1331        }
1332    }
1333
1334    let byte_to_line = |byte_offset: usize| -> usize {
1335        line_offsets
1336            .partition_point(|&offset| offset <= byte_offset)
1337            .saturating_sub(1)
1338    };
1339
1340    // Mark footnote definition lines
1341    for &(start, end) in &footnote_ranges {
1342        let start_line = byte_to_line(start);
1343        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1344
1345        for line in &mut lines[start_line..end_line] {
1346            line.in_footnote_definition = true;
1347            line.in_code_block = false;
1348        }
1349    }
1350
1351    // Restore in_code_block for fenced code blocks within footnotes
1352    for &(start, end) in &fenced_code_ranges {
1353        let start_line = byte_to_line(start);
1354        let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
1355
1356        for line in &mut lines[start_line..end_line] {
1357            line.in_code_block = true;
1358        }
1359    }
1360}
rumdl_lib/lint_context/mod.rs

rumdl_lib/lint_context/
mod.rs