rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::utils::code_block_utils::CodeBlockUtils;
16use std::collections::HashMap;
17use std::path::PathBuf;
18
19/// Macro for profiling sections - only active in non-WASM builds
20#[cfg(not(target_arch = "wasm32"))]
21macro_rules! profile_section {
22    ($name:expr, $profile:expr, $code:expr) => {{
23        let start = std::time::Instant::now();
24        let result = $code;
25        if $profile {
26            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
27        }
28        result
29    }};
30}
31
32#[cfg(target_arch = "wasm32")]
33macro_rules! profile_section {
34    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
35}
36
37/// Grouped byte ranges for skip context detection
38/// Used to reduce parameter count in internal functions
39pub(super) struct SkipByteRanges<'a> {
40    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
41    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
44}
45
46use std::sync::{Arc, OnceLock};
47
48/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
49pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
50
51/// Type alias for byte ranges used in JSX expression and MDX comment detection
52pub(super) type ByteRanges = Vec<(usize, usize)>;
53
54pub struct LintContext<'a> {
55    pub content: &'a str,
56    pub line_offsets: Vec<usize>,
57    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
58    pub lines: Vec<LineInfo>,             // Pre-computed line information
59    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
60    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
61    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
62    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
63    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
64    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
65    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
66    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
67    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
68    pub char_frequency: CharFrequency,    // Character frequency analysis
69    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
70    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
71    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
72    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
73    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
74    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
75    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
76    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
77    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
78    pub flavor: MarkdownFlavor,           // Markdown flavor being used
79    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
80    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
81    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
82    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
83    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
84    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
85    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
86}
87
88impl<'a> LintContext<'a> {
89    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
90        #[cfg(not(target_arch = "wasm32"))]
91        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
92        #[cfg(target_arch = "wasm32")]
93        let profile = false;
94
95        let line_offsets = profile_section!("Line offsets", profile, {
96            let mut offsets = vec![0];
97            for (i, c) in content.char_indices() {
98                if c == '\n' {
99                    offsets.push(i + 1);
100                }
101            }
102            offsets
103        });
104
105        // Detect code blocks and code spans once and cache them
106        let (code_blocks, code_span_ranges) = profile_section!(
107            "Code blocks",
108            profile,
109            CodeBlockUtils::detect_code_blocks_and_spans(content)
110        );
111
112        // Pre-compute HTML comment ranges ONCE for all operations
113        let html_comment_ranges = profile_section!(
114            "HTML comment ranges",
115            profile,
116            crate::utils::skip_context::compute_html_comment_ranges(content)
117        );
118
119        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n^2) scaling)
120        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
121            if flavor == MarkdownFlavor::MkDocs {
122                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
123            } else {
124                Vec::new()
125            }
126        });
127
128        // Pre-compute Quarto div block ranges for Quarto flavor
129        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
130            if flavor == MarkdownFlavor::Quarto {
131                crate::utils::quarto_divs::detect_div_block_ranges(content)
132            } else {
133                Vec::new()
134            }
135        });
136
137        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
138        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
139            if flavor == MarkdownFlavor::MkDocs {
140                crate::utils::pymdown_blocks::detect_block_ranges(content)
141            } else {
142                Vec::new()
143            }
144        });
145
146        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
147        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
148        let skip_ranges = SkipByteRanges {
149            html_comment_ranges: &html_comment_ranges,
150            autodoc_ranges: &autodoc_ranges,
151            quarto_div_ranges: &quarto_div_ranges,
152            pymdown_block_ranges: &pymdown_block_ranges,
153        };
154        let (mut lines, emphasis_spans) = profile_section!(
155            "Basic line info",
156            profile,
157            line_computation::compute_basic_line_info(content, &line_offsets, &code_blocks, flavor, &skip_ranges,)
158        );
159
160        // Detect HTML blocks BEFORE heading detection
161        profile_section!(
162            "HTML blocks",
163            profile,
164            heading_detection::detect_html_blocks(content, &mut lines)
165        );
166
167        // Detect ESM import/export blocks in MDX files BEFORE heading detection
168        profile_section!(
169            "ESM blocks",
170            profile,
171            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
172        );
173
174        // Detect JSX expressions and MDX comments in MDX files
175        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
176            "JSX/MDX detection",
177            profile,
178            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
179        );
180
181        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
182        profile_section!(
183            "MkDocs constructs",
184            profile,
185            flavor_detection::detect_mkdocs_line_info(content, &mut lines, flavor)
186        );
187
188        // Detect Obsidian comments (%%...%%) in Obsidian flavor
189        let obsidian_comment_ranges = profile_section!(
190            "Obsidian comments",
191            profile,
192            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
193        );
194
195        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
196        let link_byte_ranges = profile_section!(
197            "Link byte ranges",
198            profile,
199            link_parser::collect_link_byte_ranges(content)
200        );
201
202        // Now detect headings and blockquotes
203        profile_section!(
204            "Headings & blockquotes",
205            profile,
206            heading_detection::detect_headings_and_blockquotes(
207                content,
208                &mut lines,
209                flavor,
210                &html_comment_ranges,
211                &link_byte_ranges
212            )
213        );
214
215        // Parse code spans early so we can exclude them from link/image parsing
216        let code_spans = profile_section!(
217            "Code spans",
218            profile,
219            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
220        );
221
222        // Mark lines that are continuations of multi-line code spans
223        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
224        for span in &code_spans {
225            if span.end_line > span.line {
226                // Mark lines after the first line as continuations
227                for line_num in (span.line + 1)..=span.end_line {
228                    if let Some(line_info) = lines.get_mut(line_num - 1) {
229                        line_info.in_code_span_continuation = true;
230                    }
231                }
232            }
233        }
234
235        // Parse links, images, references, and list blocks
236        let (links, broken_links, footnote_refs) = profile_section!(
237            "Links",
238            profile,
239            link_parser::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
240        );
241
242        let images = profile_section!(
243            "Images",
244            profile,
245            link_parser::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
246        );
247
248        let reference_defs = profile_section!(
249            "Reference defs",
250            profile,
251            link_parser::parse_reference_defs(content, &lines)
252        );
253
254        // Build O(1) lookup map for reference definitions by lowercase ID
255        let reference_defs_map: HashMap<String, usize> = reference_defs
256            .iter()
257            .enumerate()
258            .map(|(idx, def)| (def.id.to_lowercase(), idx))
259            .collect();
260
261        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
262
263        // Compute character frequency for fast content analysis
264        let char_frequency = profile_section!(
265            "Char frequency",
266            profile,
267            line_computation::compute_char_frequency(content)
268        );
269
270        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
271        let table_blocks = profile_section!(
272            "Table blocks",
273            profile,
274            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
275                content,
276                &code_blocks,
277                &code_spans,
278                &html_comment_ranges,
279            )
280        );
281
282        // Pre-compute LineIndex once for all rules (eliminates 46x content cloning)
283        let line_index = profile_section!(
284            "Line index",
285            profile,
286            crate::utils::range_utils::LineIndex::new(content)
287        );
288
289        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
290        let jinja_ranges = profile_section!(
291            "Jinja ranges",
292            profile,
293            crate::utils::jinja_utils::find_jinja_ranges(content)
294        );
295
296        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
297        let citation_ranges = profile_section!("Citation ranges", profile, {
298            if flavor == MarkdownFlavor::Quarto {
299                crate::utils::quarto_divs::find_citation_ranges(content)
300            } else {
301                Vec::new()
302            }
303        });
304
305        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
306        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
307            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
308            let mut ranges = Vec::new();
309            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
310                ranges.push((mat.start(), mat.end()));
311            }
312            ranges
313        });
314
315        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
316
317        Self {
318            content,
319            line_offsets,
320            code_blocks,
321            lines,
322            links,
323            images,
324            broken_links,
325            footnote_refs,
326            reference_defs,
327            reference_defs_map,
328            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
329            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
330            list_blocks,
331            char_frequency,
332            html_tags_cache: OnceLock::new(),
333            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
334            table_rows_cache: OnceLock::new(),
335            bare_urls_cache: OnceLock::new(),
336            has_mixed_list_nesting_cache: OnceLock::new(),
337            html_comment_ranges,
338            table_blocks,
339            line_index,
340            jinja_ranges,
341            flavor,
342            source_file,
343            jsx_expression_ranges,
344            mdx_comment_ranges,
345            citation_ranges,
346            shortcode_ranges,
347            inline_config,
348            obsidian_comment_ranges,
349        }
350    }
351
352    /// Check if a rule is disabled at a specific line number (1-indexed)
353    ///
354    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
355    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
356    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
357        self.inline_config.is_rule_disabled(rule_name, line_number)
358    }
359
360    /// Get code spans - computed lazily on first access
361    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
362        Arc::clone(
363            self.code_spans_cache
364                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
365        )
366    }
367
368    /// Get math spans - computed lazily on first access
369    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
370        Arc::clone(
371            self.math_spans_cache
372                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
373        )
374    }
375
376    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
377    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
378        let math_spans = self.math_spans();
379        math_spans
380            .iter()
381            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
382    }
383
384    /// Get HTML comment ranges - pre-computed during LintContext construction
385    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
386        &self.html_comment_ranges
387    }
388
389    /// Get Obsidian comment ranges - pre-computed during LintContext construction
390    /// Returns empty slice for non-Obsidian flavors
391    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
392        &self.obsidian_comment_ranges
393    }
394
395    /// Check if a byte position is inside an Obsidian comment
396    ///
397    /// Returns false for non-Obsidian flavors.
398    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
399        self.obsidian_comment_ranges
400            .iter()
401            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
402    }
403
404    /// Check if a line/column position is inside an Obsidian comment
405    ///
406    /// Line number is 1-indexed, column is 1-indexed.
407    /// Returns false for non-Obsidian flavors.
408    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
409        if self.obsidian_comment_ranges.is_empty() {
410            return false;
411        }
412
413        // Convert line/column (1-indexed, char-based) to byte position
414        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
415        self.is_in_obsidian_comment(byte_pos)
416    }
417
418    /// Get HTML tags - computed lazily on first access
419    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
420        Arc::clone(self.html_tags_cache.get_or_init(|| {
421            Arc::new(element_parsers::parse_html_tags(
422                self.content,
423                &self.lines,
424                &self.code_blocks,
425                self.flavor,
426            ))
427        }))
428    }
429
430    /// Get emphasis spans - pre-computed during construction
431    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
432        Arc::clone(
433            self.emphasis_spans_cache
434                .get()
435                .expect("emphasis_spans_cache initialized during construction"),
436        )
437    }
438
439    /// Get table rows - computed lazily on first access
440    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
441        Arc::clone(
442            self.table_rows_cache
443                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
444        )
445    }
446
447    /// Get bare URLs - computed lazily on first access
448    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
449        Arc::clone(self.bare_urls_cache.get_or_init(|| {
450            Arc::new(element_parsers::parse_bare_urls(
451                self.content,
452                &self.lines,
453                &self.code_blocks,
454            ))
455        }))
456    }
457
458    /// Check if document has mixed ordered/unordered list nesting.
459    /// Result is cached after first computation (document-level invariant).
460    /// This is used by MD007 for smart style auto-detection.
461    pub fn has_mixed_list_nesting(&self) -> bool {
462        *self
463            .has_mixed_list_nesting_cache
464            .get_or_init(|| self.compute_mixed_list_nesting())
465    }
466
467    /// Internal computation for mixed list nesting (only called once per LintContext).
468    fn compute_mixed_list_nesting(&self) -> bool {
469        // Track parent list items by their marker position and type
470        // Using marker_column instead of indent because it works correctly
471        // for blockquoted content where indent doesn't account for the prefix
472        // Stack stores: (marker_column, is_ordered)
473        let mut stack: Vec<(usize, bool)> = Vec::new();
474        let mut last_was_blank = false;
475
476        for line_info in &self.lines {
477            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
478            if line_info.in_code_block
479                || line_info.in_front_matter
480                || line_info.in_mkdocstrings
481                || line_info.in_html_comment
482                || line_info.in_esm_block
483            {
484                continue;
485            }
486
487            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
488            if line_info.is_blank {
489                last_was_blank = true;
490                continue;
491            }
492
493            if let Some(list_item) = &line_info.list_item {
494                // Normalize column 1 to column 0 (consistent with MD007 check function)
495                let current_pos = if list_item.marker_column == 1 {
496                    0
497                } else {
498                    list_item.marker_column
499                };
500
501                // If there was a blank line and this item is at root level, reset stack
502                if last_was_blank && current_pos == 0 {
503                    stack.clear();
504                }
505                last_was_blank = false;
506
507                // Pop items at same or greater position (they're siblings or deeper, not parents)
508                while let Some(&(pos, _)) = stack.last() {
509                    if pos >= current_pos {
510                        stack.pop();
511                    } else {
512                        break;
513                    }
514                }
515
516                // Check if immediate parent has different type - this is mixed nesting
517                if let Some(&(_, parent_is_ordered)) = stack.last()
518                    && parent_is_ordered != list_item.is_ordered
519                {
520                    return true; // Found mixed nesting - early exit
521                }
522
523                stack.push((current_pos, list_item.is_ordered));
524            } else {
525                // Non-list line (but not blank) - could be paragraph or other content
526                last_was_blank = false;
527            }
528        }
529
530        false
531    }
532
533    /// Map a byte offset to (line, column)
534    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
535        match self.line_offsets.binary_search(&offset) {
536            Ok(line) => (line + 1, 1),
537            Err(line) => {
538                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
539                (line, offset - line_start + 1)
540            }
541        }
542    }
543
544    /// Check if a position is within a code block or code span
545    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
546        // Check code blocks first
547        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
548            return true;
549        }
550
551        // Check inline code spans (lazy load if needed)
552        self.code_spans()
553            .iter()
554            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
555    }
556
557    /// Get line information by line number (1-indexed)
558    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
559        if line_num > 0 {
560            self.lines.get(line_num - 1)
561        } else {
562            None
563        }
564    }
565
566    /// Get byte offset for a line number (1-indexed)
567    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
568        self.line_info(line_num).map(|info| info.byte_offset)
569    }
570
571    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
572    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
573        let normalized_id = ref_id.to_lowercase();
574        self.reference_defs_map
575            .get(&normalized_id)
576            .map(|&idx| self.reference_defs[idx].url.as_str())
577    }
578
579    /// Get a reference definition by its ID (O(1) lookup via HashMap)
580    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
581        let normalized_id = ref_id.to_lowercase();
582        self.reference_defs_map
583            .get(&normalized_id)
584            .map(|&idx| &self.reference_defs[idx])
585    }
586
587    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
588    pub fn has_reference_def(&self, ref_id: &str) -> bool {
589        let normalized_id = ref_id.to_lowercase();
590        self.reference_defs_map.contains_key(&normalized_id)
591    }
592
593    /// Check if a line is part of a list block
594    pub fn is_in_list_block(&self, line_num: usize) -> bool {
595        self.list_blocks
596            .iter()
597            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
598    }
599
600    /// Get the list block containing a specific line
601    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
602        self.list_blocks
603            .iter()
604            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
605    }
606
607    // Compatibility methods for DocumentStructure migration
608
609    /// Check if a line is within a code block
610    pub fn is_in_code_block(&self, line_num: usize) -> bool {
611        if line_num == 0 || line_num > self.lines.len() {
612            return false;
613        }
614        self.lines[line_num - 1].in_code_block
615    }
616
617    /// Check if a line is within front matter
618    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
619        if line_num == 0 || line_num > self.lines.len() {
620            return false;
621        }
622        self.lines[line_num - 1].in_front_matter
623    }
624
625    /// Check if a line is within an HTML block
626    pub fn is_in_html_block(&self, line_num: usize) -> bool {
627        if line_num == 0 || line_num > self.lines.len() {
628            return false;
629        }
630        self.lines[line_num - 1].in_html_block
631    }
632
633    /// Check if a line and column is within a code span
634    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
635        if line_num == 0 || line_num > self.lines.len() {
636            return false;
637        }
638
639        // Use the code spans cache to check
640        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
641        // Convert col to 0-indexed for comparison
642        let col_0indexed = if col > 0 { col - 1 } else { 0 };
643        let code_spans = self.code_spans();
644        code_spans.iter().any(|span| {
645            // Check if line is within the span's line range
646            if line_num < span.line || line_num > span.end_line {
647                return false;
648            }
649
650            if span.line == span.end_line {
651                // Single-line span: check column bounds
652                col_0indexed >= span.start_col && col_0indexed < span.end_col
653            } else if line_num == span.line {
654                // First line of multi-line span: anything after start_col is in span
655                col_0indexed >= span.start_col
656            } else if line_num == span.end_line {
657                // Last line of multi-line span: anything before end_col is in span
658                col_0indexed < span.end_col
659            } else {
660                // Middle line of multi-line span: entire line is in span
661                true
662            }
663        })
664    }
665
666    /// Check if a byte offset is within a code span
667    #[inline]
668    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
669        let code_spans = self.code_spans();
670        code_spans
671            .iter()
672            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
673    }
674
675    /// Check if a byte position is within a reference definition
676    #[inline]
677    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
678        self.reference_defs
679            .iter()
680            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
681    }
682
683    /// Check if a byte position is within an HTML comment
684    #[inline]
685    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
686        self.html_comment_ranges
687            .iter()
688            .any(|range| byte_pos >= range.start && byte_pos < range.end)
689    }
690
691    /// Check if a byte position is within an HTML tag (including multiline tags)
692    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
693    #[inline]
694    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
695        self.html_tags()
696            .iter()
697            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
698    }
699
700    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
701    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
702        self.jinja_ranges
703            .iter()
704            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
705    }
706
707    /// Check if a byte position is within a JSX expression (MDX: {expression})
708    #[inline]
709    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
710        self.jsx_expression_ranges
711            .iter()
712            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
713    }
714
715    /// Check if a byte position is within an MDX comment ({/* ... */})
716    #[inline]
717    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
718        self.mdx_comment_ranges
719            .iter()
720            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
721    }
722
723    /// Get all JSX expression byte ranges
724    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
725        &self.jsx_expression_ranges
726    }
727
728    /// Get all MDX comment byte ranges
729    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
730        &self.mdx_comment_ranges
731    }
732
733    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`)
734    /// Only active in Quarto flavor
735    #[inline]
736    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
737        self.citation_ranges
738            .iter()
739            .any(|range| byte_pos >= range.start && byte_pos < range.end)
740    }
741
742    /// Get all citation byte ranges (Quarto flavor only)
743    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
744        &self.citation_ranges
745    }
746
747    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
748    #[inline]
749    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
750        self.shortcode_ranges
751            .iter()
752            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
753    }
754
755    /// Get all shortcode byte ranges
756    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
757        &self.shortcode_ranges
758    }
759
760    /// Check if a byte position is within a link reference definition title
761    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
762        self.reference_defs.iter().any(|def| {
763            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
764                byte_pos >= start && byte_pos < end
765            } else {
766                false
767            }
768        })
769    }
770
771    /// Check if content has any instances of a specific character (fast)
772    pub fn has_char(&self, ch: char) -> bool {
773        match ch {
774            '#' => self.char_frequency.hash_count > 0,
775            '*' => self.char_frequency.asterisk_count > 0,
776            '_' => self.char_frequency.underscore_count > 0,
777            '-' => self.char_frequency.hyphen_count > 0,
778            '+' => self.char_frequency.plus_count > 0,
779            '>' => self.char_frequency.gt_count > 0,
780            '|' => self.char_frequency.pipe_count > 0,
781            '[' => self.char_frequency.bracket_count > 0,
782            '`' => self.char_frequency.backtick_count > 0,
783            '<' => self.char_frequency.lt_count > 0,
784            '!' => self.char_frequency.exclamation_count > 0,
785            '\n' => self.char_frequency.newline_count > 0,
786            _ => self.content.contains(ch), // Fallback for other characters
787        }
788    }
789
790    /// Get count of a specific character (fast)
791    pub fn char_count(&self, ch: char) -> usize {
792        match ch {
793            '#' => self.char_frequency.hash_count,
794            '*' => self.char_frequency.asterisk_count,
795            '_' => self.char_frequency.underscore_count,
796            '-' => self.char_frequency.hyphen_count,
797            '+' => self.char_frequency.plus_count,
798            '>' => self.char_frequency.gt_count,
799            '|' => self.char_frequency.pipe_count,
800            '[' => self.char_frequency.bracket_count,
801            '`' => self.char_frequency.backtick_count,
802            '<' => self.char_frequency.lt_count,
803            '!' => self.char_frequency.exclamation_count,
804            '\n' => self.char_frequency.newline_count,
805            _ => self.content.matches(ch).count(), // Fallback for other characters
806        }
807    }
808
809    /// Check if content likely contains headings (fast)
810    pub fn likely_has_headings(&self) -> bool {
811        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
812    }
813
814    /// Check if content likely contains lists (fast)
815    pub fn likely_has_lists(&self) -> bool {
816        self.char_frequency.asterisk_count > 0
817            || self.char_frequency.hyphen_count > 0
818            || self.char_frequency.plus_count > 0
819    }
820
821    /// Check if content likely contains emphasis (fast)
822    pub fn likely_has_emphasis(&self) -> bool {
823        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
824    }
825
826    /// Check if content likely contains tables (fast)
827    pub fn likely_has_tables(&self) -> bool {
828        self.char_frequency.pipe_count > 2
829    }
830
831    /// Check if content likely contains blockquotes (fast)
832    pub fn likely_has_blockquotes(&self) -> bool {
833        self.char_frequency.gt_count > 0
834    }
835
836    /// Check if content likely contains code (fast)
837    pub fn likely_has_code(&self) -> bool {
838        self.char_frequency.backtick_count > 0
839    }
840
841    /// Check if content likely contains links or images (fast)
842    pub fn likely_has_links_or_images(&self) -> bool {
843        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
844    }
845
846    /// Check if content likely contains HTML (fast)
847    pub fn likely_has_html(&self) -> bool {
848        self.char_frequency.lt_count > 0
849    }
850
851    /// Get the blockquote prefix for inserting a blank line at the given line index.
852    /// Returns the prefix without trailing content (e.g., ">" or ">>").
853    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
854    /// Returns an empty string if the line is not inside a blockquote.
855    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
856        if let Some(line_info) = self.lines.get(line_idx)
857            && let Some(ref bq) = line_info.blockquote
858        {
859            bq.prefix.trim_end().to_string()
860        } else {
861            String::new()
862        }
863    }
864
865    /// Get HTML tags on a specific line
866    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
867        self.html_tags()
868            .iter()
869            .filter(|tag| tag.line == line_num)
870            .cloned()
871            .collect()
872    }
873
874    /// Get emphasis spans on a specific line
875    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
876        self.emphasis_spans()
877            .iter()
878            .filter(|span| span.line == line_num)
879            .cloned()
880            .collect()
881    }
882
883    /// Get table rows on a specific line
884    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
885        self.table_rows()
886            .iter()
887            .filter(|row| row.line == line_num)
888            .cloned()
889            .collect()
890    }
891
892    /// Get bare URLs on a specific line
893    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
894        self.bare_urls()
895            .iter()
896            .filter(|url| url.line == line_num)
897            .cloned()
898            .collect()
899    }
900
901    /// Find the line index for a given byte offset using binary search.
902    /// Returns (line_index, line_number, column) where:
903    /// - line_index is the 0-based index in the lines array
904    /// - line_number is the 1-based line number
905    /// - column is the byte offset within that line
906    #[inline]
907    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
908        // Binary search to find the line containing this byte offset
909        let idx = match lines.binary_search_by(|line| {
910            if byte_offset < line.byte_offset {
911                std::cmp::Ordering::Greater
912            } else if byte_offset > line.byte_offset + line.byte_len {
913                std::cmp::Ordering::Less
914            } else {
915                std::cmp::Ordering::Equal
916            }
917        }) {
918            Ok(idx) => idx,
919            Err(idx) => idx.saturating_sub(1),
920        };
921
922        let line = &lines[idx];
923        let line_num = idx + 1;
924        let col = byte_offset.saturating_sub(line.byte_offset);
925
926        (idx, line_num, col)
927    }
928
929    /// Check if a byte offset is within a code span using binary search
930    #[inline]
931    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
932        // Since spans are sorted by byte_offset, use partition_point for binary search
933        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
934
935        // Check the span that starts at or before our offset
936        if idx > 0 {
937            let span = &code_spans[idx - 1];
938            if offset >= span.byte_offset && offset < span.byte_end {
939                return true;
940            }
941        }
942
943        false
944    }
945
946    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
947    ///
948    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
949    /// This is the standard iterator for rules that need to process headings.
950    ///
951    /// # Examples
952    ///
953    /// ```
954    /// use rumdl::lint_context::LintContext;
955    /// use rumdl::config::MarkdownFlavor;
956    ///
957    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
958    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
959    ///
960    /// for heading in ctx.valid_headings() {
961    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
962    /// }
963    /// // Only prints valid headings, skips `#NoSpace`
964    /// ```
965    #[must_use]
966    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
967        ValidHeadingsIter::new(&self.lines)
968    }
969
970    /// Check if the document contains any valid CommonMark headings
971    ///
972    /// Returns `true` if there is at least one heading with proper space after `#`.
973    #[must_use]
974    pub fn has_valid_headings(&self) -> bool {
975        self.lines
976            .iter()
977            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
978    }
979}
rumdl_lib/lint_context/mod.rs

rumdl_lib/lint_context/
mod.rs