rumdl_lib/lint_context/
mod.rs

1pub mod types;
2pub use types::*;
3
4mod element_parsers;
5mod flavor_detection;
6mod heading_detection;
7mod line_computation;
8mod link_parser;
9mod list_blocks;
10#[cfg(test)]
11mod tests;
12
13use crate::config::MarkdownFlavor;
14use crate::inline_config::InlineConfig;
15use crate::rules::front_matter_utils::FrontMatterUtils;
16use crate::utils::code_block_utils::CodeBlockUtils;
17use std::collections::HashMap;
18use std::path::PathBuf;
19
20/// Macro for profiling sections - only active in non-WASM builds
21#[cfg(not(target_arch = "wasm32"))]
22macro_rules! profile_section {
23    ($name:expr, $profile:expr, $code:expr) => {{
24        let start = std::time::Instant::now();
25        let result = $code;
26        if $profile {
27            eprintln!("[PROFILE] {}: {:?}", $name, start.elapsed());
28        }
29        result
30    }};
31}
32
33#[cfg(target_arch = "wasm32")]
34macro_rules! profile_section {
35    ($name:expr, $profile:expr, $code:expr) => {{ $code }};
36}
37
38/// Grouped byte ranges for skip context detection
39/// Used to reduce parameter count in internal functions
40pub(super) struct SkipByteRanges<'a> {
41    pub(super) html_comment_ranges: &'a [crate::utils::skip_context::ByteRange],
42    pub(super) autodoc_ranges: &'a [crate::utils::skip_context::ByteRange],
43    pub(super) quarto_div_ranges: &'a [crate::utils::skip_context::ByteRange],
44    pub(super) pymdown_block_ranges: &'a [crate::utils::skip_context::ByteRange],
45}
46
47use std::sync::{Arc, OnceLock};
48
49/// Map from line byte offset to list item data: (is_ordered, marker, marker_column, content_column, number)
50pub(super) type ListItemMap = std::collections::HashMap<usize, (bool, String, usize, usize, Option<usize>)>;
51
52/// Type alias for byte ranges used in JSX expression and MDX comment detection
53pub(super) type ByteRanges = Vec<(usize, usize)>;
54
55pub struct LintContext<'a> {
56    pub content: &'a str,
57    content_lines: Vec<&'a str>, // Pre-split lines from content (avoids repeated allocations)
58    pub line_offsets: Vec<usize>,
59    pub code_blocks: Vec<(usize, usize)>, // Cached code block ranges (not including inline code spans)
60    pub lines: Vec<LineInfo>,             // Pre-computed line information
61    pub links: Vec<ParsedLink<'a>>,       // Pre-parsed links
62    pub images: Vec<ParsedImage<'a>>,     // Pre-parsed images
63    pub broken_links: Vec<BrokenLinkInfo>, // Broken/undefined references
64    pub footnote_refs: Vec<FootnoteRef>,  // Pre-parsed footnote references
65    pub reference_defs: Vec<ReferenceDef>, // Reference definitions
66    reference_defs_map: HashMap<String, usize>, // O(1) lookup by lowercase ID -> index in reference_defs
67    code_spans_cache: OnceLock<Arc<Vec<CodeSpan>>>, // Lazy-loaded inline code spans
68    math_spans_cache: OnceLock<Arc<Vec<MathSpan>>>, // Lazy-loaded math spans ($...$ and $$...$$)
69    pub list_blocks: Vec<ListBlock>,      // Pre-parsed list blocks
70    pub char_frequency: CharFrequency,    // Character frequency analysis
71    html_tags_cache: OnceLock<Arc<Vec<HtmlTag>>>, // Lazy-loaded HTML tags
72    emphasis_spans_cache: OnceLock<Arc<Vec<EmphasisSpan>>>, // Lazy-loaded emphasis spans
73    table_rows_cache: OnceLock<Arc<Vec<TableRow>>>, // Lazy-loaded table rows
74    bare_urls_cache: OnceLock<Arc<Vec<BareUrl>>>, // Lazy-loaded bare URLs
75    has_mixed_list_nesting_cache: OnceLock<bool>, // Cached result for mixed ordered/unordered list nesting detection
76    html_comment_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed HTML comment ranges
77    pub table_blocks: Vec<crate::utils::table_utils::TableBlock>, // Pre-computed table blocks
78    pub line_index: crate::utils::range_utils::LineIndex<'a>, // Pre-computed line index for byte position calculations
79    jinja_ranges: Vec<(usize, usize)>,    // Pre-computed Jinja template ranges ({{ }}, {% %})
80    pub flavor: MarkdownFlavor,           // Markdown flavor being used
81    pub source_file: Option<PathBuf>,     // Source file path (for rules that need file context)
82    jsx_expression_ranges: Vec<(usize, usize)>, // Pre-computed JSX expression ranges (MDX: {expression})
83    mdx_comment_ranges: Vec<(usize, usize)>, // Pre-computed MDX comment ranges ({/* ... */})
84    citation_ranges: Vec<crate::utils::skip_context::ByteRange>, // Pre-computed Pandoc/Quarto citation ranges (Quarto: @key, [@key])
85    shortcode_ranges: Vec<(usize, usize)>, // Pre-computed Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
86    inline_config: InlineConfig,           // Parsed inline configuration comments for rule disabling
87    obsidian_comment_ranges: Vec<(usize, usize)>, // Pre-computed Obsidian comment ranges (%%...%%)
88}
89
90impl<'a> LintContext<'a> {
91    pub fn new(content: &'a str, flavor: MarkdownFlavor, source_file: Option<PathBuf>) -> Self {
92        #[cfg(not(target_arch = "wasm32"))]
93        let profile = std::env::var("RUMDL_PROFILE_QUADRATIC").is_ok();
94        #[cfg(target_arch = "wasm32")]
95        let profile = false;
96
97        let line_offsets = profile_section!("Line offsets", profile, {
98            let mut offsets = vec![0];
99            for (i, c) in content.char_indices() {
100                if c == '\n' {
101                    offsets.push(i + 1);
102                }
103            }
104            offsets
105        });
106
107        // Compute content_lines once for all functions that need it
108        let content_lines: Vec<&str> = content.lines().collect();
109
110        // Detect front matter boundaries once for all functions that need it
111        let front_matter_end = FrontMatterUtils::get_front_matter_end_line(content);
112
113        // Detect code blocks and code spans once and cache them
114        let (code_blocks, code_span_ranges) = profile_section!(
115            "Code blocks",
116            profile,
117            CodeBlockUtils::detect_code_blocks_and_spans(content)
118        );
119
120        // Pre-compute HTML comment ranges ONCE for all operations
121        let html_comment_ranges = profile_section!(
122            "HTML comment ranges",
123            profile,
124            crate::utils::skip_context::compute_html_comment_ranges(content)
125        );
126
127        // Pre-compute autodoc block ranges for MkDocs flavor (avoids O(n^2) scaling)
128        let autodoc_ranges = profile_section!("Autodoc block ranges", profile, {
129            if flavor == MarkdownFlavor::MkDocs {
130                crate::utils::mkdocstrings_refs::detect_autodoc_block_ranges(content)
131            } else {
132                Vec::new()
133            }
134        });
135
136        // Pre-compute Quarto div block ranges for Quarto flavor
137        let quarto_div_ranges = profile_section!("Quarto div ranges", profile, {
138            if flavor == MarkdownFlavor::Quarto {
139                crate::utils::quarto_divs::detect_div_block_ranges(content)
140            } else {
141                Vec::new()
142            }
143        });
144
145        // Pre-compute PyMdown Blocks ranges for MkDocs flavor (/// ... ///)
146        let pymdown_block_ranges = profile_section!("PyMdown block ranges", profile, {
147            if flavor == MarkdownFlavor::MkDocs {
148                crate::utils::pymdown_blocks::detect_block_ranges(content)
149            } else {
150                Vec::new()
151            }
152        });
153
154        // Pre-compute line information AND emphasis spans (without headings/blockquotes yet)
155        // Emphasis spans are captured during the same pulldown-cmark parse as list detection
156        let skip_ranges = SkipByteRanges {
157            html_comment_ranges: &html_comment_ranges,
158            autodoc_ranges: &autodoc_ranges,
159            quarto_div_ranges: &quarto_div_ranges,
160            pymdown_block_ranges: &pymdown_block_ranges,
161        };
162        let (mut lines, emphasis_spans) = profile_section!(
163            "Basic line info",
164            profile,
165            line_computation::compute_basic_line_info(
166                content,
167                &content_lines,
168                &line_offsets,
169                &code_blocks,
170                flavor,
171                &skip_ranges,
172                front_matter_end,
173            )
174        );
175
176        // Detect HTML blocks BEFORE heading detection
177        profile_section!(
178            "HTML blocks",
179            profile,
180            heading_detection::detect_html_blocks(content, &mut lines)
181        );
182
183        // Detect ESM import/export blocks in MDX files BEFORE heading detection
184        profile_section!(
185            "ESM blocks",
186            profile,
187            flavor_detection::detect_esm_blocks(content, &mut lines, flavor)
188        );
189
190        // Detect JSX expressions and MDX comments in MDX files
191        let (jsx_expression_ranges, mdx_comment_ranges) = profile_section!(
192            "JSX/MDX detection",
193            profile,
194            flavor_detection::detect_jsx_and_mdx_comments(content, &mut lines, flavor, &code_blocks)
195        );
196
197        // Detect MkDocs-specific constructs (admonitions, tabs, definition lists)
198        profile_section!(
199            "MkDocs constructs",
200            profile,
201            flavor_detection::detect_mkdocs_line_info(&content_lines, &mut lines, flavor)
202        );
203
204        // Detect Obsidian comments (%%...%%) in Obsidian flavor
205        let obsidian_comment_ranges = profile_section!(
206            "Obsidian comments",
207            profile,
208            flavor_detection::detect_obsidian_comments(content, &mut lines, flavor, &code_span_ranges)
209        );
210
211        // Collect link byte ranges early for heading detection (to skip lines inside link syntax)
212        let link_byte_ranges = profile_section!(
213            "Link byte ranges",
214            profile,
215            link_parser::collect_link_byte_ranges(content)
216        );
217
218        // Now detect headings and blockquotes
219        profile_section!(
220            "Headings & blockquotes",
221            profile,
222            heading_detection::detect_headings_and_blockquotes(
223                &content_lines,
224                &mut lines,
225                flavor,
226                &html_comment_ranges,
227                &link_byte_ranges,
228                front_matter_end,
229            )
230        );
231
232        // Parse code spans early so we can exclude them from link/image parsing
233        let code_spans = profile_section!(
234            "Code spans",
235            profile,
236            element_parsers::build_code_spans_from_ranges(content, &lines, &code_span_ranges)
237        );
238
239        // Mark lines that are continuations of multi-line code spans
240        // This is needed for parse_list_blocks to correctly handle list items with multi-line code spans
241        for span in &code_spans {
242            if span.end_line > span.line {
243                // Mark lines after the first line as continuations
244                for line_num in (span.line + 1)..=span.end_line {
245                    if let Some(line_info) = lines.get_mut(line_num - 1) {
246                        line_info.in_code_span_continuation = true;
247                    }
248                }
249            }
250        }
251
252        // Parse links, images, references, and list blocks
253        let (links, broken_links, footnote_refs) = profile_section!(
254            "Links",
255            profile,
256            link_parser::parse_links(content, &lines, &code_blocks, &code_spans, flavor, &html_comment_ranges)
257        );
258
259        let images = profile_section!(
260            "Images",
261            profile,
262            link_parser::parse_images(content, &lines, &code_blocks, &code_spans, &html_comment_ranges)
263        );
264
265        let reference_defs = profile_section!(
266            "Reference defs",
267            profile,
268            link_parser::parse_reference_defs(content, &lines)
269        );
270
271        // Build O(1) lookup map for reference definitions by lowercase ID
272        let reference_defs_map: HashMap<String, usize> = reference_defs
273            .iter()
274            .enumerate()
275            .map(|(idx, def)| (def.id.to_lowercase(), idx))
276            .collect();
277
278        let list_blocks = profile_section!("List blocks", profile, list_blocks::parse_list_blocks(content, &lines));
279
280        // Compute character frequency for fast content analysis
281        let char_frequency = profile_section!(
282            "Char frequency",
283            profile,
284            line_computation::compute_char_frequency(content)
285        );
286
287        // Pre-compute table blocks for rules that need them (MD013, MD055, MD056, MD058, MD060)
288        let table_blocks = profile_section!(
289            "Table blocks",
290            profile,
291            crate::utils::table_utils::TableUtils::find_table_blocks_with_code_info(
292                content,
293                &code_blocks,
294                &code_spans,
295                &html_comment_ranges,
296            )
297        );
298
299        // Reuse already-computed line_offsets and code_blocks instead of re-detecting
300        let line_index = profile_section!(
301            "Line index",
302            profile,
303            crate::utils::range_utils::LineIndex::with_line_starts_and_code_blocks(
304                content,
305                line_offsets.clone(),
306                &code_blocks,
307            )
308        );
309
310        // Pre-compute Jinja template ranges once for all rules (eliminates O(n*m) in MD011)
311        let jinja_ranges = profile_section!(
312            "Jinja ranges",
313            profile,
314            crate::utils::jinja_utils::find_jinja_ranges(content)
315        );
316
317        // Pre-compute Pandoc/Quarto citation ranges for Quarto flavor
318        let citation_ranges = profile_section!("Citation ranges", profile, {
319            if flavor == MarkdownFlavor::Quarto {
320                crate::utils::quarto_divs::find_citation_ranges(content)
321            } else {
322                Vec::new()
323            }
324        });
325
326        // Pre-compute Hugo/Quarto shortcode ranges ({{< ... >}} and {{% ... %}})
327        let shortcode_ranges = profile_section!("Shortcode ranges", profile, {
328            use crate::utils::regex_cache::HUGO_SHORTCODE_REGEX;
329            let mut ranges = Vec::new();
330            for mat in HUGO_SHORTCODE_REGEX.find_iter(content).flatten() {
331                ranges.push((mat.start(), mat.end()));
332            }
333            ranges
334        });
335
336        let inline_config = InlineConfig::from_content_with_code_blocks(content, &code_blocks);
337
338        Self {
339            content,
340            content_lines,
341            line_offsets,
342            code_blocks,
343            lines,
344            links,
345            images,
346            broken_links,
347            footnote_refs,
348            reference_defs,
349            reference_defs_map,
350            code_spans_cache: OnceLock::from(Arc::new(code_spans)),
351            math_spans_cache: OnceLock::new(), // Lazy-loaded on first access
352            list_blocks,
353            char_frequency,
354            html_tags_cache: OnceLock::new(),
355            emphasis_spans_cache: OnceLock::from(Arc::new(emphasis_spans)),
356            table_rows_cache: OnceLock::new(),
357            bare_urls_cache: OnceLock::new(),
358            has_mixed_list_nesting_cache: OnceLock::new(),
359            html_comment_ranges,
360            table_blocks,
361            line_index,
362            jinja_ranges,
363            flavor,
364            source_file,
365            jsx_expression_ranges,
366            mdx_comment_ranges,
367            citation_ranges,
368            shortcode_ranges,
369            inline_config,
370            obsidian_comment_ranges,
371        }
372    }
373
374    /// Get parsed inline configuration state.
375    pub fn inline_config(&self) -> &InlineConfig {
376        &self.inline_config
377    }
378
379    /// Get pre-split content lines, avoiding repeated `content.lines().collect()` allocations.
380    ///
381    /// Lines are 0-indexed (line 0 corresponds to line number 1 in the document).
382    pub fn raw_lines(&self) -> &[&'a str] {
383        &self.content_lines
384    }
385
386    /// Check if a rule is disabled at a specific line number (1-indexed)
387    ///
388    /// This method checks both persistent disable comments (<!-- rumdl-disable -->)
389    /// and line-specific comments (<!-- rumdl-disable-line -->, <!-- rumdl-disable-next-line -->).
390    pub fn is_rule_disabled(&self, rule_name: &str, line_number: usize) -> bool {
391        self.inline_config.is_rule_disabled(rule_name, line_number)
392    }
393
394    /// Get code spans - computed lazily on first access
395    pub fn code_spans(&self) -> Arc<Vec<CodeSpan>> {
396        Arc::clone(
397            self.code_spans_cache
398                .get_or_init(|| Arc::new(element_parsers::parse_code_spans(self.content, &self.lines))),
399        )
400    }
401
402    /// Get math spans - computed lazily on first access
403    pub fn math_spans(&self) -> Arc<Vec<MathSpan>> {
404        Arc::clone(
405            self.math_spans_cache
406                .get_or_init(|| Arc::new(element_parsers::parse_math_spans(self.content, &self.lines))),
407        )
408    }
409
410    /// Check if a byte position is within a math span (inline $...$ or display $$...$$)
411    pub fn is_in_math_span(&self, byte_pos: usize) -> bool {
412        let math_spans = self.math_spans();
413        math_spans
414            .iter()
415            .any(|span| byte_pos >= span.byte_offset && byte_pos < span.byte_end)
416    }
417
418    /// Get HTML comment ranges - pre-computed during LintContext construction
419    pub fn html_comment_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
420        &self.html_comment_ranges
421    }
422
423    /// Get Obsidian comment ranges - pre-computed during LintContext construction
424    /// Returns empty slice for non-Obsidian flavors
425    pub fn obsidian_comment_ranges(&self) -> &[(usize, usize)] {
426        &self.obsidian_comment_ranges
427    }
428
429    /// Check if a byte position is inside an Obsidian comment
430    ///
431    /// Returns false for non-Obsidian flavors.
432    pub fn is_in_obsidian_comment(&self, byte_pos: usize) -> bool {
433        self.obsidian_comment_ranges
434            .iter()
435            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
436    }
437
438    /// Check if a line/column position is inside an Obsidian comment
439    ///
440    /// Line number is 1-indexed, column is 1-indexed.
441    /// Returns false for non-Obsidian flavors.
442    pub fn is_position_in_obsidian_comment(&self, line_num: usize, col: usize) -> bool {
443        if self.obsidian_comment_ranges.is_empty() {
444            return false;
445        }
446
447        // Convert line/column (1-indexed, char-based) to byte position
448        let byte_pos = self.line_index.line_col_to_byte_range(line_num, col).start;
449        self.is_in_obsidian_comment(byte_pos)
450    }
451
452    /// Get HTML tags - computed lazily on first access
453    pub fn html_tags(&self) -> Arc<Vec<HtmlTag>> {
454        Arc::clone(self.html_tags_cache.get_or_init(|| {
455            Arc::new(element_parsers::parse_html_tags(
456                self.content,
457                &self.lines,
458                &self.code_blocks,
459                self.flavor,
460            ))
461        }))
462    }
463
464    /// Get emphasis spans - pre-computed during construction
465    pub fn emphasis_spans(&self) -> Arc<Vec<EmphasisSpan>> {
466        Arc::clone(
467            self.emphasis_spans_cache
468                .get()
469                .expect("emphasis_spans_cache initialized during construction"),
470        )
471    }
472
473    /// Get table rows - computed lazily on first access
474    pub fn table_rows(&self) -> Arc<Vec<TableRow>> {
475        Arc::clone(
476            self.table_rows_cache
477                .get_or_init(|| Arc::new(element_parsers::parse_table_rows(self.content, &self.lines))),
478        )
479    }
480
481    /// Get bare URLs - computed lazily on first access
482    pub fn bare_urls(&self) -> Arc<Vec<BareUrl>> {
483        Arc::clone(self.bare_urls_cache.get_or_init(|| {
484            Arc::new(element_parsers::parse_bare_urls(
485                self.content,
486                &self.lines,
487                &self.code_blocks,
488            ))
489        }))
490    }
491
492    /// Check if document has mixed ordered/unordered list nesting.
493    /// Result is cached after first computation (document-level invariant).
494    /// This is used by MD007 for smart style auto-detection.
495    pub fn has_mixed_list_nesting(&self) -> bool {
496        *self
497            .has_mixed_list_nesting_cache
498            .get_or_init(|| self.compute_mixed_list_nesting())
499    }
500
501    /// Internal computation for mixed list nesting (only called once per LintContext).
502    fn compute_mixed_list_nesting(&self) -> bool {
503        // Track parent list items by their marker position and type
504        // Using marker_column instead of indent because it works correctly
505        // for blockquoted content where indent doesn't account for the prefix
506        // Stack stores: (marker_column, is_ordered)
507        let mut stack: Vec<(usize, bool)> = Vec::new();
508        let mut last_was_blank = false;
509
510        for line_info in &self.lines {
511            // Skip non-content lines (code blocks, frontmatter, HTML comments, etc.)
512            if line_info.in_code_block
513                || line_info.in_front_matter
514                || line_info.in_mkdocstrings
515                || line_info.in_html_comment
516                || line_info.in_esm_block
517            {
518                continue;
519            }
520
521            // OPTIMIZATION: Use pre-computed is_blank instead of content().trim()
522            if line_info.is_blank {
523                last_was_blank = true;
524                continue;
525            }
526
527            if let Some(list_item) = &line_info.list_item {
528                // Normalize column 1 to column 0 (consistent with MD007 check function)
529                let current_pos = if list_item.marker_column == 1 {
530                    0
531                } else {
532                    list_item.marker_column
533                };
534
535                // If there was a blank line and this item is at root level, reset stack
536                if last_was_blank && current_pos == 0 {
537                    stack.clear();
538                }
539                last_was_blank = false;
540
541                // Pop items at same or greater position (they're siblings or deeper, not parents)
542                while let Some(&(pos, _)) = stack.last() {
543                    if pos >= current_pos {
544                        stack.pop();
545                    } else {
546                        break;
547                    }
548                }
549
550                // Check if immediate parent has different type - this is mixed nesting
551                if let Some(&(_, parent_is_ordered)) = stack.last()
552                    && parent_is_ordered != list_item.is_ordered
553                {
554                    return true; // Found mixed nesting - early exit
555                }
556
557                stack.push((current_pos, list_item.is_ordered));
558            } else {
559                // Non-list line (but not blank) - could be paragraph or other content
560                last_was_blank = false;
561            }
562        }
563
564        false
565    }
566
567    /// Map a byte offset to (line, column)
568    pub fn offset_to_line_col(&self, offset: usize) -> (usize, usize) {
569        match self.line_offsets.binary_search(&offset) {
570            Ok(line) => (line + 1, 1),
571            Err(line) => {
572                let line_start = self.line_offsets.get(line.wrapping_sub(1)).copied().unwrap_or(0);
573                (line, offset - line_start + 1)
574            }
575        }
576    }
577
578    /// Check if a position is within a code block or code span
579    pub fn is_in_code_block_or_span(&self, pos: usize) -> bool {
580        // Check code blocks first
581        if CodeBlockUtils::is_in_code_block_or_span(&self.code_blocks, pos) {
582            return true;
583        }
584
585        // Check inline code spans (lazy load if needed)
586        self.code_spans()
587            .iter()
588            .any(|span| pos >= span.byte_offset && pos < span.byte_end)
589    }
590
591    /// Get line information by line number (1-indexed)
592    pub fn line_info(&self, line_num: usize) -> Option<&LineInfo> {
593        if line_num > 0 {
594            self.lines.get(line_num - 1)
595        } else {
596            None
597        }
598    }
599
600    /// Get byte offset for a line number (1-indexed)
601    pub fn line_to_byte_offset(&self, line_num: usize) -> Option<usize> {
602        self.line_info(line_num).map(|info| info.byte_offset)
603    }
604
605    /// Get URL for a reference link/image by its ID (O(1) lookup via HashMap)
606    pub fn get_reference_url(&self, ref_id: &str) -> Option<&str> {
607        let normalized_id = ref_id.to_lowercase();
608        self.reference_defs_map
609            .get(&normalized_id)
610            .map(|&idx| self.reference_defs[idx].url.as_str())
611    }
612
613    /// Get a reference definition by its ID (O(1) lookup via HashMap)
614    pub fn get_reference_def(&self, ref_id: &str) -> Option<&ReferenceDef> {
615        let normalized_id = ref_id.to_lowercase();
616        self.reference_defs_map
617            .get(&normalized_id)
618            .map(|&idx| &self.reference_defs[idx])
619    }
620
621    /// Check if a reference definition exists by ID (O(1) lookup via HashMap)
622    pub fn has_reference_def(&self, ref_id: &str) -> bool {
623        let normalized_id = ref_id.to_lowercase();
624        self.reference_defs_map.contains_key(&normalized_id)
625    }
626
627    /// Check if a line is part of a list block
628    pub fn is_in_list_block(&self, line_num: usize) -> bool {
629        self.list_blocks
630            .iter()
631            .any(|block| line_num >= block.start_line && line_num <= block.end_line)
632    }
633
634    /// Get the list block containing a specific line
635    pub fn list_block_for_line(&self, line_num: usize) -> Option<&ListBlock> {
636        self.list_blocks
637            .iter()
638            .find(|block| line_num >= block.start_line && line_num <= block.end_line)
639    }
640
641    // Compatibility methods for DocumentStructure migration
642
643    /// Check if a line is within a code block
644    pub fn is_in_code_block(&self, line_num: usize) -> bool {
645        if line_num == 0 || line_num > self.lines.len() {
646            return false;
647        }
648        self.lines[line_num - 1].in_code_block
649    }
650
651    /// Check if a line is within front matter
652    pub fn is_in_front_matter(&self, line_num: usize) -> bool {
653        if line_num == 0 || line_num > self.lines.len() {
654            return false;
655        }
656        self.lines[line_num - 1].in_front_matter
657    }
658
659    /// Check if a line is within an HTML block
660    pub fn is_in_html_block(&self, line_num: usize) -> bool {
661        if line_num == 0 || line_num > self.lines.len() {
662            return false;
663        }
664        self.lines[line_num - 1].in_html_block
665    }
666
667    /// Check if a line and column is within a code span
668    pub fn is_in_code_span(&self, line_num: usize, col: usize) -> bool {
669        if line_num == 0 || line_num > self.lines.len() {
670            return false;
671        }
672
673        // Use the code spans cache to check
674        // Note: col is 1-indexed from caller, but span.start_col and span.end_col are 0-indexed
675        // Convert col to 0-indexed for comparison
676        let col_0indexed = if col > 0 { col - 1 } else { 0 };
677        let code_spans = self.code_spans();
678        code_spans.iter().any(|span| {
679            // Check if line is within the span's line range
680            if line_num < span.line || line_num > span.end_line {
681                return false;
682            }
683
684            if span.line == span.end_line {
685                // Single-line span: check column bounds
686                col_0indexed >= span.start_col && col_0indexed < span.end_col
687            } else if line_num == span.line {
688                // First line of multi-line span: anything after start_col is in span
689                col_0indexed >= span.start_col
690            } else if line_num == span.end_line {
691                // Last line of multi-line span: anything before end_col is in span
692                col_0indexed < span.end_col
693            } else {
694                // Middle line of multi-line span: entire line is in span
695                true
696            }
697        })
698    }
699
700    /// Check if a byte offset is within a code span
701    #[inline]
702    pub fn is_byte_offset_in_code_span(&self, byte_offset: usize) -> bool {
703        let code_spans = self.code_spans();
704        code_spans
705            .iter()
706            .any(|span| byte_offset >= span.byte_offset && byte_offset < span.byte_end)
707    }
708
709    /// Check if a byte position is within a reference definition
710    #[inline]
711    pub fn is_in_reference_def(&self, byte_pos: usize) -> bool {
712        self.reference_defs
713            .iter()
714            .any(|ref_def| byte_pos >= ref_def.byte_offset && byte_pos < ref_def.byte_end)
715    }
716
717    /// Check if a byte position is within an HTML comment
718    #[inline]
719    pub fn is_in_html_comment(&self, byte_pos: usize) -> bool {
720        self.html_comment_ranges
721            .iter()
722            .any(|range| byte_pos >= range.start && byte_pos < range.end)
723    }
724
725    /// Check if a byte position is within an HTML tag (including multiline tags)
726    /// Uses the pre-parsed html_tags which correctly handles tags spanning multiple lines
727    #[inline]
728    pub fn is_in_html_tag(&self, byte_pos: usize) -> bool {
729        self.html_tags()
730            .iter()
731            .any(|tag| byte_pos >= tag.byte_offset && byte_pos < tag.byte_end)
732    }
733
734    /// Check if a byte position is within a Jinja template ({{ }} or {% %})
735    pub fn is_in_jinja_range(&self, byte_pos: usize) -> bool {
736        self.jinja_ranges
737            .iter()
738            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
739    }
740
741    /// Check if a byte position is within a JSX expression (MDX: {expression})
742    #[inline]
743    pub fn is_in_jsx_expression(&self, byte_pos: usize) -> bool {
744        self.jsx_expression_ranges
745            .iter()
746            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
747    }
748
749    /// Check if a byte position is within an MDX comment ({/* ... */})
750    #[inline]
751    pub fn is_in_mdx_comment(&self, byte_pos: usize) -> bool {
752        self.mdx_comment_ranges
753            .iter()
754            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
755    }
756
757    /// Get all JSX expression byte ranges
758    pub fn jsx_expression_ranges(&self) -> &[(usize, usize)] {
759        &self.jsx_expression_ranges
760    }
761
762    /// Get all MDX comment byte ranges
763    pub fn mdx_comment_ranges(&self) -> &[(usize, usize)] {
764        &self.mdx_comment_ranges
765    }
766
767    /// Check if a byte position is within a Pandoc/Quarto citation (`@key` or `[@key]`)
768    /// Only active in Quarto flavor
769    #[inline]
770    pub fn is_in_citation(&self, byte_pos: usize) -> bool {
771        self.citation_ranges
772            .iter()
773            .any(|range| byte_pos >= range.start && byte_pos < range.end)
774    }
775
776    /// Get all citation byte ranges (Quarto flavor only)
777    pub fn citation_ranges(&self) -> &[crate::utils::skip_context::ByteRange] {
778        &self.citation_ranges
779    }
780
781    /// Check if a byte position is within a Hugo/Quarto shortcode ({{< ... >}} or {{% ... %}})
782    #[inline]
783    pub fn is_in_shortcode(&self, byte_pos: usize) -> bool {
784        self.shortcode_ranges
785            .iter()
786            .any(|(start, end)| byte_pos >= *start && byte_pos < *end)
787    }
788
789    /// Get all shortcode byte ranges
790    pub fn shortcode_ranges(&self) -> &[(usize, usize)] {
791        &self.shortcode_ranges
792    }
793
794    /// Check if a byte position is within a link reference definition title
795    pub fn is_in_link_title(&self, byte_pos: usize) -> bool {
796        self.reference_defs.iter().any(|def| {
797            if let (Some(start), Some(end)) = (def.title_byte_start, def.title_byte_end) {
798                byte_pos >= start && byte_pos < end
799            } else {
800                false
801            }
802        })
803    }
804
805    /// Check if content has any instances of a specific character (fast)
806    pub fn has_char(&self, ch: char) -> bool {
807        match ch {
808            '#' => self.char_frequency.hash_count > 0,
809            '*' => self.char_frequency.asterisk_count > 0,
810            '_' => self.char_frequency.underscore_count > 0,
811            '-' => self.char_frequency.hyphen_count > 0,
812            '+' => self.char_frequency.plus_count > 0,
813            '>' => self.char_frequency.gt_count > 0,
814            '|' => self.char_frequency.pipe_count > 0,
815            '[' => self.char_frequency.bracket_count > 0,
816            '`' => self.char_frequency.backtick_count > 0,
817            '<' => self.char_frequency.lt_count > 0,
818            '!' => self.char_frequency.exclamation_count > 0,
819            '\n' => self.char_frequency.newline_count > 0,
820            _ => self.content.contains(ch), // Fallback for other characters
821        }
822    }
823
824    /// Get count of a specific character (fast)
825    pub fn char_count(&self, ch: char) -> usize {
826        match ch {
827            '#' => self.char_frequency.hash_count,
828            '*' => self.char_frequency.asterisk_count,
829            '_' => self.char_frequency.underscore_count,
830            '-' => self.char_frequency.hyphen_count,
831            '+' => self.char_frequency.plus_count,
832            '>' => self.char_frequency.gt_count,
833            '|' => self.char_frequency.pipe_count,
834            '[' => self.char_frequency.bracket_count,
835            '`' => self.char_frequency.backtick_count,
836            '<' => self.char_frequency.lt_count,
837            '!' => self.char_frequency.exclamation_count,
838            '\n' => self.char_frequency.newline_count,
839            _ => self.content.matches(ch).count(), // Fallback for other characters
840        }
841    }
842
843    /// Check if content likely contains headings (fast)
844    pub fn likely_has_headings(&self) -> bool {
845        self.char_frequency.hash_count > 0 || self.char_frequency.hyphen_count > 2 // Potential setext underlines
846    }
847
848    /// Check if content likely contains lists (fast)
849    pub fn likely_has_lists(&self) -> bool {
850        self.char_frequency.asterisk_count > 0
851            || self.char_frequency.hyphen_count > 0
852            || self.char_frequency.plus_count > 0
853    }
854
855    /// Check if content likely contains emphasis (fast)
856    pub fn likely_has_emphasis(&self) -> bool {
857        self.char_frequency.asterisk_count > 1 || self.char_frequency.underscore_count > 1
858    }
859
860    /// Check if content likely contains tables (fast)
861    pub fn likely_has_tables(&self) -> bool {
862        self.char_frequency.pipe_count > 2
863    }
864
865    /// Check if content likely contains blockquotes (fast)
866    pub fn likely_has_blockquotes(&self) -> bool {
867        self.char_frequency.gt_count > 0
868    }
869
870    /// Check if content likely contains code (fast)
871    pub fn likely_has_code(&self) -> bool {
872        self.char_frequency.backtick_count > 0
873    }
874
875    /// Check if content likely contains links or images (fast)
876    pub fn likely_has_links_or_images(&self) -> bool {
877        self.char_frequency.bracket_count > 0 || self.char_frequency.exclamation_count > 0
878    }
879
880    /// Check if content likely contains HTML (fast)
881    pub fn likely_has_html(&self) -> bool {
882        self.char_frequency.lt_count > 0
883    }
884
885    /// Get the blockquote prefix for inserting a blank line at the given line index.
886    /// Returns the prefix without trailing content (e.g., ">" or ">>").
887    /// This is needed because blank lines inside blockquotes must preserve the blockquote structure.
888    /// Returns an empty string if the line is not inside a blockquote.
889    pub fn blockquote_prefix_for_blank_line(&self, line_idx: usize) -> String {
890        if let Some(line_info) = self.lines.get(line_idx)
891            && let Some(ref bq) = line_info.blockquote
892        {
893            bq.prefix.trim_end().to_string()
894        } else {
895            String::new()
896        }
897    }
898
899    /// Get HTML tags on a specific line
900    pub fn html_tags_on_line(&self, line_num: usize) -> Vec<HtmlTag> {
901        self.html_tags()
902            .iter()
903            .filter(|tag| tag.line == line_num)
904            .cloned()
905            .collect()
906    }
907
908    /// Get emphasis spans on a specific line
909    pub fn emphasis_spans_on_line(&self, line_num: usize) -> Vec<EmphasisSpan> {
910        self.emphasis_spans()
911            .iter()
912            .filter(|span| span.line == line_num)
913            .cloned()
914            .collect()
915    }
916
917    /// Get table rows on a specific line
918    pub fn table_rows_on_line(&self, line_num: usize) -> Vec<TableRow> {
919        self.table_rows()
920            .iter()
921            .filter(|row| row.line == line_num)
922            .cloned()
923            .collect()
924    }
925
926    /// Get bare URLs on a specific line
927    pub fn bare_urls_on_line(&self, line_num: usize) -> Vec<BareUrl> {
928        self.bare_urls()
929            .iter()
930            .filter(|url| url.line == line_num)
931            .cloned()
932            .collect()
933    }
934
935    /// Find the line index for a given byte offset using binary search.
936    /// Returns (line_index, line_number, column) where:
937    /// - line_index is the 0-based index in the lines array
938    /// - line_number is the 1-based line number
939    /// - column is the byte offset within that line
940    #[inline]
941    fn find_line_for_offset(lines: &[LineInfo], byte_offset: usize) -> (usize, usize, usize) {
942        // Binary search to find the line containing this byte offset
943        let idx = match lines.binary_search_by(|line| {
944            if byte_offset < line.byte_offset {
945                std::cmp::Ordering::Greater
946            } else if byte_offset > line.byte_offset + line.byte_len {
947                std::cmp::Ordering::Less
948            } else {
949                std::cmp::Ordering::Equal
950            }
951        }) {
952            Ok(idx) => idx,
953            Err(idx) => idx.saturating_sub(1),
954        };
955
956        let line = &lines[idx];
957        let line_num = idx + 1;
958        let col = byte_offset.saturating_sub(line.byte_offset);
959
960        (idx, line_num, col)
961    }
962
963    /// Check if a byte offset is within a code span using binary search
964    #[inline]
965    fn is_offset_in_code_span(code_spans: &[CodeSpan], offset: usize) -> bool {
966        // Since spans are sorted by byte_offset, use partition_point for binary search
967        let idx = code_spans.partition_point(|span| span.byte_offset <= offset);
968
969        // Check the span that starts at or before our offset
970        if idx > 0 {
971            let span = &code_spans[idx - 1];
972            if offset >= span.byte_offset && offset < span.byte_end {
973                return true;
974            }
975        }
976
977        false
978    }
979
980    /// Get an iterator over valid headings (skipping invalid ones like `#NoSpace`)
981    ///
982    /// Valid headings have proper spacing after the `#` markers (or are level > 1).
983    /// This is the standard iterator for rules that need to process headings.
984    ///
985    /// # Examples
986    ///
987    /// ```
988    /// use rumdl::lint_context::LintContext;
989    /// use rumdl::config::MarkdownFlavor;
990    ///
991    /// let content = "# Valid Heading\n#NoSpace\n## Another Valid";
992    /// let ctx = LintContext::new(content, MarkdownFlavor::Standard, None);
993    ///
994    /// for heading in ctx.valid_headings() {
995    ///     println!("Line {}: {} (level {})", heading.line_num, heading.heading.text, heading.heading.level);
996    /// }
997    /// // Only prints valid headings, skips `#NoSpace`
998    /// ```
999    #[must_use]
1000    pub fn valid_headings(&self) -> ValidHeadingsIter<'_> {
1001        ValidHeadingsIter::new(&self.lines)
1002    }
1003
1004    /// Check if the document contains any valid CommonMark headings
1005    ///
1006    /// Returns `true` if there is at least one heading with proper space after `#`.
1007    #[must_use]
1008    pub fn has_valid_headings(&self) -> bool {
1009        self.lines
1010            .iter()
1011            .any(|line| line.heading.as_ref().is_some_and(|h| h.is_valid))
1012    }
1013}
rumdl_lib/lint_context/mod.rs

rumdl_lib/lint_context/
mod.rs