rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::CodeBlockUtils;
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20/// Macro for profiling sections - only active in non-WASM builds
21#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23    ($name:expr, $profile:expr, $code:expr) => {{
24        let start = std::time::Instant::now();
25        let result = $code;
26        if $profile {
27            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28        }
29        result
30    }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38/// Grouped byte ranges for skip context detection
39/// Used to reduce parameter count in internal functions
40pub(super) struct SkipByteRanges<'a> {
41    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
50pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52/// Type alias for byte ranges used in JSX expression and MDX comment detection
53pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56    pub content: &'a str,
57    content_lines: Vec<&'a str>, // Pre-split lines from content (avoids repeated allocations)
58    pub line_offsets: Vec<usize>,
59    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
60    pub lines: Vec<LineInfo>,             // Pre-computed line information
61    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
62    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
63    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
64    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
65    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
66    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
67    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
68    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
69    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
70    pub char_frequency: CharFrequency,    // Character frequency analysis
71    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
72    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
73    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
74    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
75    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
76    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
77    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
78    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
79    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
80    pub flavor: MarkdownFlavor,           // Markdown flavor being used
81    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
82    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
83    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
84    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
85    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
86    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
87    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
88}
89
90impl<'a> LintContext<'a> {
91    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
92        #[cfg(not(target_arch = "wasm32"))]
93        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
94        #[cfg(target_arch = "wasm32")]
95        let profile = false;
96
97        let line_offsets = profile_section!("Line offsets", profile, {
98            let mut offsets = vec![0];
99            for (i, c) in content.char_indices() {
100                if c == '\n' {
101                    offsets.push(i + 1);
102                }
103            }
104            offsets
105        });
106
107        // Compute content_lines once for all functions that need it
108        let content_lines: Vec<&str> = content.lines().collect();
109
110        // Detect front matter boundaries once for all functions that need it
111        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
112
113        // Detect code blocks and code spans once and cache them
114        let (mut code_blocks, code_span_ranges) = profile_section!(
115            "Code blocks",
116            profile,
117            CodeBlockUtils::detect_code_blocks_and_spans(content)
118        );
119
120        // Pre-compute HTML comment ranges ONCE for all operations
121        let html_comment_ranges = profile_section!(
122            "HTML comment ranges",
123            profile,
124            crate::utils::skip_context::compute_html_comment_ranges(content)
125        );
126
127        // Pre-compute autodoc block ranges (avoids O(n^2) scaling)
128        // Detected for all flavors: `:::` blocks are structurally unique and should
129        // never be reflowed as prose, even without MkDocs flavor.
130        let autodoc_ranges = profile_section!(
131            "Autodoc block ranges",
132            profile,
133            crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
134        );
135
136        // Pre-compute Quarto div block ranges for Quarto flavor
137        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
138            if flavor == MarkdownFlavor::Quarto {
139                crate::utils::quarto_divs::detect_div_block_ranges(content)
140            } else {
141                Vec::new()
142            }
143        });
144
145        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
146        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
147            if flavor == MarkdownFlavor::MkDocs {
148                crate::utils::pymdown_blocks::detect_block_ranges(content)
149            } else {
150                Vec::new()
151            }
152        });
153
154        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
155        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
156        let skip_ranges = SkipByteRanges {
157            html_comment_ranges: &html_comment_ranges,
158            autodoc_ranges: &autodoc_ranges,
159            quarto_div_ranges: &quarto_div_ranges,
160            pymdown_block_ranges: &pymdown_block_ranges,
161        };
162        let (mut lines, emphasis_spans) = profile_section!(
163            "Basic line info",
164            profile,
165            line_computation::compute_basic_line_info(
166                content,
167                &content_lines,
168                &line_offsets,
169                &code_blocks,
170                flavor,
171                &skip_ranges,
172                front_matter_end,
173            )
174        );
175
176        // Detect HTML blocks BEFORE heading detection
177        profile_section!(
178            "HTML blocks",
179            profile,
180            heading_detection::detect_html_blocks(content, &mut lines)
181        );
182
183        // Detect ESM import/export blocks in MDX files BEFORE heading detection
184        profile_section!(
185            "ESM blocks",
186            profile,
187            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
188        );
189
190        // Detect JSX component blocks in MDX files (e.g. <Tabs>...</Tabs>)
191        profile_section!(
192            "JSX block detection",
193            profile,
194            flavor_detection::detect_jsx_blocks(content, &mut lines, flavor)
195        );
196
197        // Detect JSX expressions and MDX comments in MDX files
198        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
199            "JSX/MDX detection",
200            profile,
201            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
202        );
203
204        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
205        profile_section!(
206            "MkDocs constructs",
207            profile,
208            flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
209        );
210
211        // Filter code_blocks to remove false positives from MkDocs admonition/tab content.
212        // pulldown-cmark treats 4-space-indented content as indented code blocks, but inside
213        // MkDocs admonitions and content tabs this is regular markdown content.
214        // detect_mkdocs_line_info already corrected LineInfo.in_code_block for these lines,
215        // but the code_blocks byte ranges are still stale. We split ranges rather than using
216        // all-or-nothing removal, so fenced code blocks within admonitions are preserved.
217        if flavor == MarkdownFlavor::MkDocs {
218            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
219            for &(start, end) in &code_blocks {
220                let start_line = line_offsets
221                    .partition_point(|&offset| offset <= start)
222                    .saturating_sub(1);
223                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
224
225                // Walk lines in this range, collecting sub-ranges where in_code_block is true
226                let mut sub_start: Option<usize> = None;
227                for (i, &offset) in line_offsets[start_line..end_line]
228                    .iter()
229                    .enumerate()
230                    .map(|(j, o)| (j + start_line, o))
231                {
232                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
233                    if is_real_code && sub_start.is_none() {
234                        let byte_start = if i == start_line { start } else { offset };
235                        sub_start = Some(byte_start);
236                    } else if !is_real_code && sub_start.is_some() {
237                        new_code_blocks.push((sub_start.unwrap(), offset));
238                        sub_start = None;
239                    }
240                }
241                if let Some(s) = sub_start {
242                    new_code_blocks.push((s, end));
243                }
244            }
245            code_blocks = new_code_blocks;
246        }
247
248        // Filter code_blocks for MDX JSX blocks (same pattern as MkDocs above).
249        // detect_jsx_blocks already corrected LineInfo.in_code_block for indented content
250        // inside JSX component blocks, but code_blocks byte ranges need updating too.
251        if flavor.supports_jsx() {
252            let mut new_code_blocks = Vec::with_capacity(code_blocks.len());
253            for &(start, end) in &code_blocks {
254                let start_line = line_offsets
255                    .partition_point(|&offset| offset <= start)
256                    .saturating_sub(1);
257                let end_line = line_offsets.partition_point(|&offset| offset < end).min(lines.len());
258
259                let mut sub_start: Option<usize> = None;
260                for (i, &offset) in line_offsets[start_line..end_line]
261                    .iter()
262                    .enumerate()
263                    .map(|(j, o)| (j + start_line, o))
264                {
265                    let is_real_code = lines.get(i).is_some_and(|info| info.in_code_block);
266                    if is_real_code && sub_start.is_none() {
267                        let byte_start = if i == start_line { start } else { offset };
268                        sub_start = Some(byte_start);
269                    } else if !is_real_code && sub_start.is_some() {
270                        new_code_blocks.push((sub_start.unwrap(), offset));
271                        sub_start = None;
272                    }
273                }
274                if let Some(s) = sub_start {
275                    new_code_blocks.push((s, end));
276                }
277            }
278            code_blocks = new_code_blocks;
279        }
280
281        // Detect kramdown constructs (extension blocks, IALs, ALDs) in kramdown flavor
282        profile_section!(
283            "Kramdown constructs",
284            profile,
285            flavor_detection::detect_kramdown_line_info(content, &mut lines, flavor)
286        );
287
288        // Layer 1: Sanitize content-derived fields inside kramdown extension blocks
289        // so downstream heading detection and collection builders never see them.
290        // This must run BEFORE detect_headings_and_blockquotes to prevent headings
291        // from being populated inside extension blocks.
292        for line in &mut lines {
293            if line.in_kramdown_extension_block {
294                line.list_item = None;
295                line.is_horizontal_rule = false;
296                line.blockquote = None;
297                line.is_kramdown_block_ial = false;
298            }
299        }
300
301        // Detect Obsidian comments (%%...%%) in Obsidian flavor
302        let obsidian_comment_ranges = profile_section!(
303            "Obsidian comments",
304            profile,
305            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
306        );
307
308        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
309        let link_byte_ranges = profile_section!(
310            "Link byte ranges",
311            profile,
312            link_parser::collect_link_byte_ranges(content)
313        );
314
315        // Now detect headings and blockquotes
316        profile_section!(
317            "Headings & blockquotes",
318            profile,
319            heading_detection::detect_headings_and_blockquotes(
320                &content_lines,
321                &mut lines,
322                flavor,
323                &html_comment_ranges,
324                &link_byte_ranges,
325                front_matter_end,
326            )
327        );
328
329        // Clear headings that were detected inside kramdown extension blocks
330        for line in &mut lines {
331            if line.in_kramdown_extension_block {
332                line.heading = None;
333            }
334        }
335
336        // Parse code spans early so we can exclude them from link/image parsing
337        let mut code_spans = profile_section!(
338            "Code spans",
339            profile,
340            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
341        );
342
343        // Supplement code spans for MkDocs container content that pulldown-cmark missed.
344        // pulldown-cmark treats 4-space-indented MkDocs content as indented code blocks,
345        // so backtick code spans within admonitions/tabs/markdown HTML are invisible to it.
346        if flavor == MarkdownFlavor::MkDocs {
347            let extra = profile_section!(
348                "MkDocs code spans",
349                profile,
350                element_parsers::scan_mkdocs_container_code_spans(content, &lines, &code_span_ranges,)
351            );
352            if !extra.is_empty() {
353                code_spans.extend(extra);
354                code_spans.sort_by_key(|span| span.byte_offset);
355            }
356        }
357
358        // Mark lines that are continuations of multi-line code spans
359        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
360        for span in &code_spans {
361            if span.end_line > span.line {
362                // Mark lines after the first line as continuations
363                for line_num in (span.line + 1)..=span.end_line {
364                    if let Some(line_info) = lines.get_mut(line_num - 1) {
365                        line_info.in_code_span_continuation = true;
366                    }
367                }
368            }
369        }
370
371        // Parse links, images, references, and list blocks
372        let (links, broken_links, footnote_refs) = profile_section!(
373            "Links",
374            profile,
375            link_parser::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
376        );
377
378        let images = profile_section!(
379            "Images",
380            profile,
381            link_parser::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
382        );
383
384        let reference_defs = profile_section!(
385            "Reference defs",
386            profile,
387            link_parser::parse_reference_defs(content, &lines)
388        );
389
390        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
391
392        // Compute character frequency for fast content analysis
393        let char_frequency = profile_section!(
394            "Char frequency",
395            profile,
396            line_computation::compute_char_frequency(content)
397        );
398
399        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
400        let table_blocks = profile_section!(
401            "Table blocks",
402            profile,
403            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
404                content,
405                &code_blocks,
406                &code_spans,
407                &html_comment_ranges,
408            )
409        );
410
411        // Layer 2: Filter pre-computed collections to exclude items inside kramdown extension blocks.
412        // Rules that iterate these collections automatically skip kramdown content.
413        let links = links
414            .into_iter()
415            .filter(|link| !lines.get(link.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
416            .collect::<Vec<_>>();
417        let images = images
418            .into_iter()
419            .filter(|img| !lines.get(img.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
420            .collect::<Vec<_>>();
421        let broken_links = broken_links
422            .into_iter()
423            .filter(|bl| {
424                // BrokenLinkInfo has span but no line field; find line from byte offset
425                let line_idx = line_offsets
426                    .partition_point(|&offset| offset <= bl.span.start)
427                    .saturating_sub(1);
428                !lines.get(line_idx).is_some_and(|l| l.in_kramdown_extension_block)
429            })
430            .collect::<Vec<_>>();
431        let footnote_refs = footnote_refs
432            .into_iter()
433            .filter(|fr| !lines.get(fr.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
434            .collect::<Vec<_>>();
435        let reference_defs = reference_defs
436            .into_iter()
437            .filter(|def| !lines.get(def.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
438            .collect::<Vec<_>>();
439        let list_blocks = list_blocks
440            .into_iter()
441            .filter(|block| {
442                !lines
443                    .get(block.start_line - 1)
444                    .is_some_and(|l| l.in_kramdown_extension_block)
445            })
446            .collect::<Vec<_>>();
447        let table_blocks = table_blocks
448            .into_iter()
449            .filter(|block| {
450                // TableBlock.start_line is 0-indexed
451                !lines
452                    .get(block.start_line)
453                    .is_some_and(|l| l.in_kramdown_extension_block)
454            })
455            .collect::<Vec<_>>();
456        let emphasis_spans = emphasis_spans
457            .into_iter()
458            .filter(|span| !lines.get(span.line - 1).is_some_and(|l| l.in_kramdown_extension_block))
459            .collect::<Vec<_>>();
460
461        // Rebuild reference_defs_map after filtering
462        let reference_defs_map: HashMap<String, usize> = reference_defs
463            .iter()
464            .enumerate()
465            .map(|(idx, def)| (def.id.to_lowercase(), idx))
466            .collect();
467
468        // Reuse already-computed line_offsets and code_blocks instead of re-detecting
469        let line_index = profile_section!(
470            "Line index",
471            profile,
472            crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
473                content,
474                line_offsets.clone(),
475                &code_blocks,
476            )
477        );
478
479        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
480        let jinja_ranges = profile_section!(
481            "Jinja ranges",
482            profile,
483            crate::utils::jinja_utils::find_jinja_ranges(content)
484        );
485
486        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
487        let citation_ranges = profile_section!("Citation ranges", profile, {
488            if flavor == MarkdownFlavor::Quarto {
489                crate::utils::quarto_divs::find_citation_ranges(content)
490            } else {
491                Vec::new()
492            }
493        });
494
495        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
496        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
497            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
498            let mut ranges = Vec::new();
499            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
500                ranges.push((mat.start(), mat.end()));
501            }
502            ranges
503        });
504
505        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
506
507        Self {
508            content,
509            content_lines,
510            line_offsets,
511            code_blocks,
512            lines,
513            links,
514            images,
515            broken_links,
516            footnote_refs,
517            reference_defs,
518            reference_defs_map,
519            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
520            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
521            list_blocks,
522            char_frequency,
523            html_tags_cache: OnceLock::new(),
524            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
525            table_rows_cache: OnceLock::new(),
526            bare_urls_cache: OnceLock::new(),
527            has_mixed_list_nesting_cache: OnceLock::new(),
528            html_comment_ranges,
529            table_blocks,
530            line_index,
531            jinja_ranges,
532            flavor,
533            source_file,
534            jsx_expression_ranges,
535            mdx_comment_ranges,
536            citation_ranges,
537            shortcode_ranges,
538            inline_config,
539            obsidian_comment_ranges,
540        }
541    }
542
543    /// Get parsed inline configuration state.
544    pub fn inline_config(&self) -> &InlineConfig {
545        &self.inline_config
546    }
547
548    /// Get pre-split content lines, avoiding repeated `content.lines().collect()` allocations.
549    ///
550    /// Lines are 0-indexed (line 0 corresponds to line number 1 in the document).
551    pub fn raw_lines(&self) -> &[&'a str] {
552        &self.content_lines
553    }
554
555    /// Check if a rule is disabled at a specific line number (1-indexed)
556    ///
557    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
558    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
559    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
560        self.inline_config.is_rule_disabled(rule_name, line_number)
561    }
562
563    /// Get code spans - computed lazily on first access
564    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
565        Arc::clone(
566            self.code_spans_cache
567                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
568        )
569    }
570
571    /// Get math spans - computed lazily on first access
572    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
573        Arc::clone(
574            self.math_spans_cache
575                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
576        )
577    }
578
579    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
580    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
581        let math_spans = self.math_spans();
582        math_spans
583            .iter()
584            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
585    }
586
587    /// Get HTML comment ranges - pre-computed during LintContext construction
588    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
589        &self.html_comment_ranges
590    }
591
592    /// Get Obsidian comment ranges - pre-computed during LintContext construction
593    /// Returns empty slice for non-Obsidian flavors
594    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
595        &self.obsidian_comment_ranges
596    }
597
598    /// Check if a byte position is inside an Obsidian comment
599    ///
600    /// Returns false for non-Obsidian flavors.
601    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
602        self.obsidian_comment_ranges
603            .iter()
604            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
605    }
606
607    /// Check if a line/column position is inside an Obsidian comment
608    ///
609    /// Line number is 1-indexed, column is 1-indexed.
610    /// Returns false for non-Obsidian flavors.
611    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
612        if self.obsidian_comment_ranges.is_empty() {
613            return false;
614        }
615
616        // Convert line/column (1-indexed, char-based) to byte position
617        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
618        self.is_in_obsidian_comment(byte_pos)
619    }
620
621    /// Get HTML tags - computed lazily on first access
622    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
623        Arc::clone(self.html_tags_cache.get_or_init(|| {
624            let tags = element_parsers::parse_html_tags(self.content, &self.lines, &self.code_blocks, self.flavor);
625            // Filter out HTML tags inside kramdown extension blocks
626            Arc::new(
627                tags.into_iter()
628                    .filter(|tag| {
629                        !self
630                            .lines
631                            .get(tag.line - 1)
632                            .is_some_and(|l| l.in_kramdown_extension_block)
633                    })
634                    .collect(),
635            )
636        }))
637    }
638
639    /// Get emphasis spans - pre-computed during construction
640    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
641        Arc::clone(
642            self.emphasis_spans_cache
643                .get()
644                .expect("emphasis_spans_cache initialized during construction"),
645        )
646    }
647
648    /// Get table rows - computed lazily on first access
649    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
650        Arc::clone(
651            self.table_rows_cache
652                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
653        )
654    }
655
656    /// Get bare URLs - computed lazily on first access
657    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
658        Arc::clone(self.bare_urls_cache.get_or_init(|| {
659            Arc::new(element_parsers::parse_bare_urls(
660                self.content,
661                &self.lines,
662                &self.code_blocks,
663            ))
664        }))
665    }
666
667    /// Check if document has mixed ordered/unordered list nesting.
668    /// Result is cached after first computation (document-level invariant).
669    /// This is used by MD007 for smart style auto-detection.
670    pub fn has_mixed_list_nesting(&self) -> bool {
671        *self
672            .has_mixed_list_nesting_cache
673            .get_or_init(|| self.compute_mixed_list_nesting())
674    }
675
676    /// Internal computation for mixed list nesting (only called once per LintContext).
677    fn compute_mixed_list_nesting(&self) -> bool {
678        // Track parent list items by their marker position and type
679        // Using marker_column instead of indent because it works correctly
680        // for blockquoted content where indent doesn't account for the prefix
681        // Stack stores: (marker_column, is_ordered)
682        let mut stack: Vec<(usize, bool)> = Vec::new();
683        let mut last_was_blank = false;
684
685        for line_info in &self.lines {
686            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
687            if line_info.in_code_block
688                || line_info.in_front_matter
689                || line_info.in_mkdocstrings
690                || line_info.in_html_comment
691                || line_info.in_esm_block
692            {
693                continue;
694            }
695
696            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
697            if line_info.is_blank {
698                last_was_blank = true;
699                continue;
700            }
701
702            if let Some(list_item) = &line_info.list_item {
703                // Normalize column 1 to column 0 (consistent with MD007 check function)
704                let current_pos = if list_item.marker_column == 1 {
705                    0
706                } else {
707                    list_item.marker_column
708                };
709
710                // If there was a blank line and this item is at root level, reset stack
711                if last_was_blank && current_pos == 0 {
712                    stack.clear();
713                }
714                last_was_blank = false;
715
716                // Pop items at same or greater position (they're siblings or deeper, not parents)
717                while let Some(&(pos, _)) = stack.last() {
718                    if pos >= current_pos {
719                        stack.pop();
720                    } else {
721                        break;
722                    }
723                }
724
725                // Check if immediate parent has different type - this is mixed nesting
726                if let Some(&(_, parent_is_ordered)) = stack.last()
727                    && parent_is_ordered != list_item.is_ordered
728                {
729                    return true; // Found mixed nesting - early exit
730                }
731
732                stack.push((current_pos, list_item.is_ordered));
733            } else {
734                // Non-list line (but not blank) - could be paragraph or other content
735                last_was_blank = false;
736            }
737        }
738
739        false
740    }
741
742    /// Map a byte offset to (line, column)
743    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
744        match self.line_offsets.binary_search(&offset) {
745            Ok(line) => (line + 1, 1),
746            Err(line) => {
747                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
748                (line, offset - line_start + 1)
749            }
750        }
751    }
752
753    /// Check if a position is within a code block or code span
754    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
755        // Check code blocks first
756        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
757            return true;
758        }
759
760        // Check inline code spans (lazy load if needed)
761        self.code_spans()
762            .iter()
763            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
764    }
765
766    /// Get line information by line number (1-indexed)
767    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
768        if line_num > 0 {
769            self.lines.get(line_num - 1)
770        } else {
771            None
772        }
773    }
774
775    /// Get byte offset for a line number (1-indexed)
776    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
777        self.line_info(line_num).map(|info| info.byte_offset)
778    }
779
780    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
781    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
782        let normalized_id = ref_id.to_lowercase();
783        self.reference_defs_map
784            .get(&normalized_id)
785            .map(|&idx| self.reference_defs[idx].url.as_str())
786    }
787
788    /// Get a reference definition by its ID (O(1) lookup via HashMap)
789    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
790        let normalized_id = ref_id.to_lowercase();
791        self.reference_defs_map
792            .get(&normalized_id)
793            .map(|&idx| &self.reference_defs[idx])
794    }
795
796    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
797    pub fn has_reference_def(&self, ref_id: &str) -> bool {
798        let normalized_id = ref_id.to_lowercase();
799        self.reference_defs_map.contains_key(&normalized_id)
800    }
801
802    /// Check if a line is part of a list block
803    pub fn is_in_list_block(&self, line_num: usize) -> bool {
804        self.list_blocks
805            .iter()
806            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
807    }
808
809    /// Get the list block containing a specific line
810    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
811        self.list_blocks
812            .iter()
813            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
814    }
815
816    // Compatibility methods for DocumentStructure migration
817
818    /// Check if a line is within a code block
819    pub fn is_in_code_block(&self, line_num: usize) -> bool {
820        if line_num == 0 || line_num > self.lines.len() {
821            return false;
822        }
823        self.lines[line_num - 1].in_code_block
824    }
825
826    /// Check if a line is within front matter
827    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
828        if line_num == 0 || line_num > self.lines.len() {
829            return false;
830        }
831        self.lines[line_num - 1].in_front_matter
832    }
833
834    /// Check if a line is within an HTML block
835    pub fn is_in_html_block(&self, line_num: usize) -> bool {
836        if line_num == 0 || line_num > self.lines.len() {
837            return false;
838        }
839        self.lines[line_num - 1].in_html_block
840    }
841
842    /// Check if a line and column is within a code span
843    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
844        if line_num == 0 || line_num > self.lines.len() {
845            return false;
846        }
847
848        // Use the code spans cache to check
849        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
850        // Convert col to 0-indexed for comparison
851        let col_0indexed = if col > 0 { col - 1 } else { 0 };
852        let code_spans = self.code_spans();
853        code_spans.iter().any(|span| {
854            // Check if line is within the span's line range
855            if line_num < span.line || line_num > span.end_line {
856                return false;
857            }
858
859            if span.line == span.end_line {
860                // Single-line span: check column bounds
861                col_0indexed >= span.start_col && col_0indexed < span.end_col
862            } else if line_num == span.line {
863                // First line of multi-line span: anything after start_col is in span
864                col_0indexed >= span.start_col
865            } else if line_num == span.end_line {
866                // Last line of multi-line span: anything before end_col is in span
867                col_0indexed < span.end_col
868            } else {
869                // Middle line of multi-line span: entire line is in span
870                true
871            }
872        })
873    }
874
875    /// Check if a byte offset is within a code span
876    #[inline]
877    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
878        let code_spans = self.code_spans();
879        code_spans
880            .iter()
881            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
882    }
883
884    /// Check if a byte position is within a reference definition
885    #[inline]
886    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
887        self.reference_defs
888            .iter()
889            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
890    }
891
892    /// Check if a byte position is within an HTML comment
893    #[inline]
894    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
895        self.html_comment_ranges
896            .iter()
897            .any(|range| byte_pos >= range.start && byte_pos < range.end)
898    }
899
900    /// Check if a byte position is within an HTML tag (including multiline tags)
901    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
902    #[inline]
903    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
904        self.html_tags()
905            .iter()
906            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
907    }
908
909    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
910    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
911        self.jinja_ranges
912            .iter()
913            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
914    }
915
916    /// Check if a byte position is within a JSX expression (MDX: {expression})
917    #[inline]
918    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
919        self.jsx_expression_ranges
920            .iter()
921            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
922    }
923
924    /// Check if a byte position is within an MDX comment ({/* ... */})
925    #[inline]
926    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
927        self.mdx_comment_ranges
928            .iter()
929            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
930    }
931
932    /// Get all JSX expression byte ranges
933    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
934        &self.jsx_expression_ranges
935    }
936
937    /// Get all MDX comment byte ranges
938    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
939        &self.mdx_comment_ranges
940    }
941
942    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`)
943    /// Only active in Quarto flavor
944    #[inline]
945    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
946        self.citation_ranges
947            .iter()
948            .any(|range| byte_pos >= range.start && byte_pos < range.end)
949    }
950
951    /// Get all citation byte ranges (Quarto flavor only)
952    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
953        &self.citation_ranges
954    }
955
956    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
957    #[inline]
958    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
959        self.shortcode_ranges
960            .iter()
961            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
962    }
963
964    /// Get all shortcode byte ranges
965    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
966        &self.shortcode_ranges
967    }
968
969    /// Check if a byte position is within a link reference definition title
970    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
971        self.reference_defs.iter().any(|def| {
972            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
973                byte_pos >= start && byte_pos < end
974            } else {
975                false
976            }
977        })
978    }
979
980    /// Check if content has any instances of a specific character (fast)
981    pub fn has_char(&self, ch: char) -> bool {
982        match ch {
983            '#' => self.char_frequency.hash_count > 0,
984            '*' => self.char_frequency.asterisk_count > 0,
985            '_' => self.char_frequency.underscore_count > 0,
986            '-' => self.char_frequency.hyphen_count > 0,
987            '+' => self.char_frequency.plus_count > 0,
988            '>' => self.char_frequency.gt_count > 0,
989            '|' => self.char_frequency.pipe_count > 0,
990            '[' => self.char_frequency.bracket_count > 0,
991            '`' => self.char_frequency.backtick_count > 0,
992            '<' => self.char_frequency.lt_count > 0,
993            '!' => self.char_frequency.exclamation_count > 0,
994            '\n' => self.char_frequency.newline_count > 0,
995            _ => self.content.contains(ch), // Fallback for other characters
996        }
997    }
998
999    /// Get count of a specific character (fast)
1000    pub fn char_count(&self, ch: char) -> usize {
1001        match ch {
1002            '#' => self.char_frequency.hash_count,
1003            '*' => self.char_frequency.asterisk_count,
1004            '_' => self.char_frequency.underscore_count,
1005            '-' => self.char_frequency.hyphen_count,
1006            '+' => self.char_frequency.plus_count,
1007            '>' => self.char_frequency.gt_count,
1008            '|' => self.char_frequency.pipe_count,
1009            '[' => self.char_frequency.bracket_count,
1010            '`' => self.char_frequency.backtick_count,
1011            '<' => self.char_frequency.lt_count,
1012            '!' => self.char_frequency.exclamation_count,
1013            '\n' => self.char_frequency.newline_count,
1014            _ => self.content.matches(ch).count(), // Fallback for other characters
1015        }
1016    }
1017
1018    /// Check if content likely contains headings (fast)
1019    pub fn likely_has_headings(&self) -> bool {
1020        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
1021    }
1022
1023    /// Check if content likely contains lists (fast)
1024    pub fn likely_has_lists(&self) -> bool {
1025        self.char_frequency.asterisk_count > 0
1026            || self.char_frequency.hyphen_count > 0
1027            || self.char_frequency.plus_count > 0
1028    }
1029
1030    /// Check if content likely contains emphasis (fast)
1031    pub fn likely_has_emphasis(&self) -> bool {
1032        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
1033    }
1034
1035    /// Check if content likely contains tables (fast)
1036    pub fn likely_has_tables(&self) -> bool {
1037        self.char_frequency.pipe_count > 2
1038    }
1039
1040    /// Check if content likely contains blockquotes (fast)
1041    pub fn likely_has_blockquotes(&self) -> bool {
1042        self.char_frequency.gt_count > 0
1043    }
1044
1045    /// Check if content likely contains code (fast)
1046    pub fn likely_has_code(&self) -> bool {
1047        self.char_frequency.backtick_count > 0
1048    }
1049
1050    /// Check if content likely contains links or images (fast)
1051    pub fn likely_has_links_or_images(&self) -> bool {
1052        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
1053    }
1054
1055    /// Check if content likely contains HTML (fast)
1056    pub fn likely_has_html(&self) -> bool {
1057        self.char_frequency.lt_count > 0
1058    }
1059
1060    /// Get the blockquote prefix for inserting a blank line at the given line index.
1061    /// Returns the prefix without trailing content (e.g., ">" or ">>").
1062    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
1063    /// Returns an empty string if the line is not inside a blockquote.
1064    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
1065        if let Some(line_info) = self.lines.get(line_idx)
1066            && let Some(ref bq) = line_info.blockquote
1067        {
1068            bq.prefix.trim_end().to_string()
1069        } else {
1070            String::new()
1071        }
1072    }
1073
1074    /// Get HTML tags on a specific line
1075    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
1076        self.html_tags()
1077            .iter()
1078            .filter(|tag| tag.line == line_num)
1079            .cloned()
1080            .collect()
1081    }
1082
1083    /// Get emphasis spans on a specific line
1084    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
1085        self.emphasis_spans()
1086            .iter()
1087            .filter(|span| span.line == line_num)
1088            .cloned()
1089            .collect()
1090    }
1091
1092    /// Get table rows on a specific line
1093    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
1094        self.table_rows()
1095            .iter()
1096            .filter(|row| row.line == line_num)
1097            .cloned()
1098            .collect()
1099    }
1100
1101    /// Get bare URLs on a specific line
1102    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
1103        self.bare_urls()
1104            .iter()
1105            .filter(|url| url.line == line_num)
1106            .cloned()
1107            .collect()
1108    }
1109
1110    /// Find the line index for a given byte offset using binary search.
1111    /// Returns (line_index, line_number, column) where:
1112    /// - line_index is the 0-based index in the lines array
1113    /// - line_number is the 1-based line number
1114    /// - column is the byte offset within that line
1115    #[inline]
1116    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
1117        // Binary search to find the line containing this byte offset
1118        let idx = match lines.binary_search_by(|line| {
1119            if byte_offset < line.byte_offset {
1120                std::cmp::Ordering::Greater
1121            } else if byte_offset > line.byte_offset + line.byte_len {
1122                std::cmp::Ordering::Less
1123            } else {
1124                std::cmp::Ordering::Equal
1125            }
1126        }) {
1127            Ok(idx) => idx,
1128            Err(idx) => idx.saturating_sub(1),
1129        };
1130
1131        let line = &lines[idx];
1132        let line_num = idx + 1;
1133        let col = byte_offset.saturating_sub(line.byte_offset);
1134
1135        (idx, line_num, col)
1136    }
1137
1138    /// Check if a byte offset is within a code span using binary search
1139    #[inline]
1140    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
1141        // Since spans are sorted by byte_offset, use partition_point for binary search
1142        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
1143
1144        // Check the span that starts at or before our offset
1145        if idx > 0 {
1146            let span = &code_spans[idx - 1];
1147            if offset >= span.byte_offset && offset < span.byte_end {
1148                return true;
1149            }
1150        }
1151
1152        false
1153    }
1154
1155    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
1156    ///
1157    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
1158    /// This is the standard iterator for rules that need to process headings.
1159    ///
1160    /// # Examples
1161    ///
1162    /// ```
1163    /// use rumdl::lint_context::LintContext;
1164    /// use rumdl::config::MarkdownFlavor;
1165    ///
1166    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
1167    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
1168    ///
1169    /// for heading in ctx.valid_headings() {
1170    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
1171    /// }
1172    /// // Only prints valid headings, skips `#NoSpace`
1173    /// ```
1174    #[must_use]
1175    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1176        ValidHeadingsIter::new(&self.lines)
1177    }
1178
1179    /// Check if the document contains any valid CommonMark headings
1180    ///
1181    /// Returns `true` if there is at least one heading with proper space after `#`.
1182    #[must_use]
1183    pub fn has_valid_headings(&self) -> bool {
1184        self.lines
1185            .iter()
1186            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1187    }
1188}
rumdl_lib/lint_context/mod.rs

rumdl_lib/lint_context/
mod.rs