rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_extensions;
12use crate::utils::mkdocs_footnotes;
13use crate::utils::mkdocs_icons;
14use crate::utils::mkdocs_snippets;
15use crate::utils::mkdocs_tabs;
16use crate::utils::mkdocstrings_refs;
17use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
18use regex::Regex;
19use std::sync::LazyLock;
20
21/// Enhanced inline math pattern that handles both single $ and double $$ delimiters.
22/// Matches:
23/// - Display math: $$...$$ (zero or more non-$ characters)
24/// - Inline math: $...$ (zero or more non-$ non-newline characters)
25///
26/// The display math pattern is tried first to correctly handle $$content$$.
27/// Critically, both patterns allow ZERO characters between delimiters,
28/// so empty math like $$ or $ $ is consumed and won't pair with other $ signs.
29static INLINE_MATH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
30
31/// Range representing a span of bytes (start inclusive, end exclusive)
32#[derive(Debug, Clone, Copy)]
33pub struct ByteRange {
34    pub start: usize,
35    pub end: usize,
36}
37
38/// Pre-compute all HTML comment ranges in the content
39/// Returns a sorted vector of byte ranges for efficient lookup
40pub fn compute_html_comment_ranges(content: &str) -> Vec<ByteRange> {
41    HTML_COMMENT_PATTERN
42        .find_iter(content)
43        .map(|m| ByteRange {
44            start: m.start(),
45            end: m.end(),
46        })
47        .collect()
48}
49
50/// Check if a byte position is within any of the pre-computed HTML comment ranges
51/// Uses binary search for O(log n) complexity
52pub fn is_in_html_comment_ranges(ranges: &[ByteRange], byte_pos: usize) -> bool {
53    // Binary search to find a range that might contain byte_pos
54    ranges
55        .binary_search_by(|range| {
56            if byte_pos < range.start {
57                std::cmp::Ordering::Greater
58            } else if byte_pos >= range.end {
59                std::cmp::Ordering::Less
60            } else {
61                std::cmp::Ordering::Equal
62            }
63        })
64        .is_ok()
65}
66
67/// Check if a line is ENTIRELY within a single HTML comment
68/// Returns true only if both the line start AND end are within the same comment range
69pub fn is_line_entirely_in_html_comment(ranges: &[ByteRange], line_start: usize, line_end: usize) -> bool {
70    for range in ranges {
71        // If line start is within this range, check if line end is also within it
72        if line_start >= range.start && line_start < range.end {
73            return line_end <= range.end;
74        }
75    }
76    false
77}
78
79/// Check if a line is within front matter (both YAML and TOML)
80pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
81    let lines: Vec<&str> = content.lines().collect();
82
83    // Check YAML front matter (---) at the beginning
84    if !lines.is_empty() && lines[0] == "---" {
85        for (i, line) in lines.iter().enumerate().skip(1) {
86            if *line == "---" {
87                return line_num <= i;
88            }
89        }
90    }
91
92    // Check TOML front matter (+++) at the beginning
93    if !lines.is_empty() && lines[0] == "+++" {
94        for (i, line) in lines.iter().enumerate().skip(1) {
95            if *line == "+++" {
96                return line_num <= i;
97            }
98        }
99    }
100
101    false
102}
103
104/// Check if a byte position is within any context that should be skipped
105pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
106    // Check standard code contexts
107    if ctx.is_in_code_block_or_span(byte_pos) {
108        return true;
109    }
110
111    // Check HTML comments
112    if is_in_html_comment(ctx.content, byte_pos) {
113        return true;
114    }
115
116    // Check math contexts
117    if is_in_math_context(ctx, byte_pos) {
118        return true;
119    }
120
121    // Check if in HTML tag
122    if is_in_html_tag(ctx, byte_pos) {
123        return true;
124    }
125
126    // Check MDX-specific contexts
127    if ctx.flavor == MarkdownFlavor::MDX {
128        // Check JSX expressions
129        if ctx.is_in_jsx_expression(byte_pos) {
130            return true;
131        }
132        // Check MDX comments
133        if ctx.is_in_mdx_comment(byte_pos) {
134            return true;
135        }
136    }
137
138    // Check MkDocs snippet sections and multi-line blocks
139    if ctx.flavor == MarkdownFlavor::MkDocs {
140        if mkdocs_snippets::is_within_snippet_section(ctx.content, byte_pos) {
141            return true;
142        }
143        if mkdocs_snippets::is_within_snippet_block(ctx.content, byte_pos) {
144            return true;
145        }
146    }
147
148    // Check MkDocs admonition blocks
149    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_within_admonition(ctx.content, byte_pos) {
150        return true;
151    }
152
153    // Check MkDocs footnote definitions
154    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_within_footnote_definition(ctx.content, byte_pos) {
155        return true;
156    }
157
158    // Check MkDocs content tabs
159    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_within_tab_content(ctx.content, byte_pos) {
160        return true;
161    }
162
163    // Check MkDocstrings autodoc blocks
164    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_within_autodoc_block(ctx.content, byte_pos) {
165        return true;
166    }
167
168    // Check MkDocs Critic Markup
169    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_critic::is_within_critic_markup(ctx.content, byte_pos) {
170        return true;
171    }
172
173    false
174}
175
176/// Check if a byte position is within a JSX expression (MDX: {expression})
177#[inline]
178pub fn is_in_jsx_expression(ctx: &LintContext, byte_pos: usize) -> bool {
179    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_jsx_expression(byte_pos)
180}
181
182/// Check if a byte position is within an MDX comment ({/* ... */})
183#[inline]
184pub fn is_in_mdx_comment(ctx: &LintContext, byte_pos: usize) -> bool {
185    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_mdx_comment(byte_pos)
186}
187
188/// Check if a line should be skipped due to MkDocs snippet syntax
189pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
190    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
191}
192
193/// Check if a line is a MkDocs admonition marker
194pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
195    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
196}
197
198/// Check if a line is a MkDocs footnote definition
199pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
200    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
201}
202
203/// Check if a line is a MkDocs tab marker
204pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
205    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
206}
207
208/// Check if a line is a MkDocstrings autodoc marker
209pub fn is_mkdocstrings_autodoc_line(line: &str, flavor: MarkdownFlavor) -> bool {
210    flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_autodoc_marker(line)
211}
212
213/// Check if a line contains MkDocs Critic Markup
214pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
215    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
216}
217
218/// Check if a byte position is within an HTML comment
219pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
220    for m in HTML_COMMENT_PATTERN.find_iter(content) {
221        if m.start() <= byte_pos && byte_pos < m.end() {
222            return true;
223        }
224    }
225    false
226}
227
228/// Check if a byte position is within an HTML tag
229pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
230    for html_tag in ctx.html_tags().iter() {
231        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
232            return true;
233        }
234    }
235    false
236}
237
238/// Check if a byte position is within a math context (block or inline)
239pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
240    let content = ctx.content;
241
242    // Check if we're in a math block
243    if is_in_math_block(content, byte_pos) {
244        return true;
245    }
246
247    // Check if we're in inline math
248    if is_in_inline_math(content, byte_pos) {
249        return true;
250    }
251
252    false
253}
254
255/// Check if a byte position is within a math block ($$...$$)
256pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
257    let mut in_math_block = false;
258    let mut current_pos = 0;
259
260    for line in content.lines() {
261        let line_start = current_pos;
262        let line_end = current_pos + line.len();
263
264        // Check if this line is a math block delimiter
265        if is_math_block_delimiter(line) {
266            if byte_pos >= line_start && byte_pos <= line_end {
267                // Position is on the delimiter line itself
268                return true;
269            }
270            in_math_block = !in_math_block;
271        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
272            // Position is inside a math block
273            return true;
274        }
275
276        current_pos = line_end + 1; // +1 for newline
277    }
278
279    false
280}
281
282/// Check if a byte position is within inline math ($...$)
283pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
284    // Find all inline math spans
285    for m in INLINE_MATH_REGEX.find_iter(content) {
286        if m.start() <= byte_pos && byte_pos < m.end() {
287            return true;
288        }
289    }
290    false
291}
292
293/// Check if a position is within a table cell
294pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
295    // Check if this line is part of a table
296    for table_row in ctx.table_rows().iter() {
297        if table_row.line == line_num {
298            // This line is part of a table
299            // For now, we'll skip the entire table row
300            // Future enhancement: check specific column boundaries
301            return true;
302        }
303    }
304    false
305}
306
307/// Check if a line contains table syntax
308pub fn is_table_line(line: &str) -> bool {
309    let trimmed = line.trim();
310
311    // Check for table separator line
312    if trimmed
313        .chars()
314        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
315        && trimmed.contains('|')
316        && trimmed.contains('-')
317    {
318        return true;
319    }
320
321    // Check for table content line (starts and/or ends with |)
322    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
323        return true;
324    }
325
326    false
327}
328
329/// Check if a byte position is within an MkDocs icon shortcode
330/// Icon shortcodes use format like `:material-check:`, `:octicons-mark-github-16:`
331pub fn is_in_icon_shortcode(line: &str, position: usize, _flavor: MarkdownFlavor) -> bool {
332    // Only skip for MkDocs flavor, but check pattern for all flavors
333    // since emoji shortcodes are universal
334    mkdocs_icons::is_in_any_shortcode(line, position)
335}
336
337/// Check if a byte position is within PyMdown extension markup
338/// Includes: Keys (++ctrl+alt++), Caret (^text^), Insert (^^text^^), Mark (==text==)
339pub fn is_in_pymdown_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
340    if flavor != MarkdownFlavor::MkDocs {
341        return false;
342    }
343    mkdocs_extensions::is_in_pymdown_markup(line, position)
344}
345
346/// Check if a byte position is within any MkDocs-specific markup
347/// Combines icon shortcodes and PyMdown extensions
348pub fn is_in_mkdocs_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
349    if is_in_icon_shortcode(line, position, flavor) {
350        return true;
351    }
352    if is_in_pymdown_markup(line, position, flavor) {
353        return true;
354    }
355    false
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361
362    #[test]
363    fn test_html_comment_detection() {
364        let content = "Text <!-- comment --> more text";
365        assert!(is_in_html_comment(content, 10)); // Inside comment
366        assert!(!is_in_html_comment(content, 0)); // Before comment
367        assert!(!is_in_html_comment(content, 25)); // After comment
368    }
369
370    #[test]
371    fn test_is_line_entirely_in_html_comment() {
372        // Test 1: Multi-line comment with content after closing
373        let content = "<!--\ncomment\n--> Content after comment";
374        let ranges = compute_html_comment_ranges(content);
375        // Line 0: "<!--" (bytes 0-4) - entirely in comment
376        assert!(is_line_entirely_in_html_comment(&ranges, 0, 4));
377        // Line 1: "comment" (bytes 5-12) - entirely in comment
378        assert!(is_line_entirely_in_html_comment(&ranges, 5, 12));
379        // Line 2: "--> Content after comment" (bytes 13-38) - NOT entirely in comment
380        assert!(!is_line_entirely_in_html_comment(&ranges, 13, 38));
381
382        // Test 2: Single-line comment with content after
383        let content2 = "<!-- comment --> Not a comment";
384        let ranges2 = compute_html_comment_ranges(content2);
385        // The entire line is NOT entirely in the comment
386        assert!(!is_line_entirely_in_html_comment(&ranges2, 0, 30));
387
388        // Test 3: Single-line comment alone
389        let content3 = "<!-- comment -->";
390        let ranges3 = compute_html_comment_ranges(content3);
391        // The entire line IS entirely in the comment
392        assert!(is_line_entirely_in_html_comment(&ranges3, 0, 16));
393
394        // Test 4: Content before comment
395        let content4 = "Text before <!-- comment -->";
396        let ranges4 = compute_html_comment_ranges(content4);
397        // Line start is NOT in the comment range
398        assert!(!is_line_entirely_in_html_comment(&ranges4, 0, 28));
399    }
400
401    #[test]
402    fn test_math_block_detection() {
403        let content = "Text\n$$\nmath content\n$$\nmore text";
404        assert!(is_in_math_block(content, 8)); // On opening $$
405        assert!(is_in_math_block(content, 15)); // Inside math block
406        assert!(!is_in_math_block(content, 0)); // Before math block
407        assert!(!is_in_math_block(content, 30)); // After math block
408    }
409
410    #[test]
411    fn test_inline_math_detection() {
412        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
413        assert!(is_in_inline_math(content, 7)); // Inside first math
414        assert!(is_in_inline_math(content, 20)); // Inside second math
415        assert!(!is_in_inline_math(content, 0)); // Before math
416        assert!(!is_in_inline_math(content, 35)); // After math
417    }
418
419    #[test]
420    fn test_table_line_detection() {
421        assert!(is_table_line("| Header | Column |"));
422        assert!(is_table_line("|--------|--------|"));
423        assert!(is_table_line("| Cell 1 | Cell 2 |"));
424        assert!(!is_table_line("Regular text"));
425        assert!(!is_table_line("Just a pipe | here"));
426    }
427
428    #[test]
429    fn test_is_in_front_matter() {
430        // Test YAML frontmatter
431        let yaml_content = r#"---
432title: "My Post"
433tags: ["test", "example"]
434---
435
436# Content"#;
437
438        assert!(
439            is_in_front_matter(yaml_content, 0),
440            "Line 1 should be in YAML front matter"
441        );
442        assert!(
443            is_in_front_matter(yaml_content, 2),
444            "Line 3 should be in YAML front matter"
445        );
446        assert!(
447            is_in_front_matter(yaml_content, 3),
448            "Line 4 should be in YAML front matter"
449        );
450        assert!(
451            !is_in_front_matter(yaml_content, 4),
452            "Line 5 should NOT be in front matter"
453        );
454
455        // Test TOML frontmatter
456        let toml_content = r#"+++
457title = "My Post"
458tags = ["test", "example"]
459+++
460
461# Content"#;
462
463        assert!(
464            is_in_front_matter(toml_content, 0),
465            "Line 1 should be in TOML front matter"
466        );
467        assert!(
468            is_in_front_matter(toml_content, 2),
469            "Line 3 should be in TOML front matter"
470        );
471        assert!(
472            is_in_front_matter(toml_content, 3),
473            "Line 4 should be in TOML front matter"
474        );
475        assert!(
476            !is_in_front_matter(toml_content, 4),
477            "Line 5 should NOT be in front matter"
478        );
479
480        // Test TOML blocks NOT at beginning (should not be considered front matter)
481        let mixed_content = r#"# Content
482
483+++
484title = "Not frontmatter"
485+++
486
487More content"#;
488
489        assert!(
490            !is_in_front_matter(mixed_content, 2),
491            "TOML block not at beginning should NOT be front matter"
492        );
493        assert!(
494            !is_in_front_matter(mixed_content, 3),
495            "TOML block not at beginning should NOT be front matter"
496        );
497        assert!(
498            !is_in_front_matter(mixed_content, 4),
499            "TOML block not at beginning should NOT be front matter"
500        );
501    }
502
503    #[test]
504    fn test_is_in_icon_shortcode() {
505        let line = "Click :material-check: to confirm";
506        // Position 0-5 is "Click"
507        assert!(!is_in_icon_shortcode(line, 0, MarkdownFlavor::MkDocs));
508        // Position 6-22 is ":material-check:"
509        assert!(is_in_icon_shortcode(line, 6, MarkdownFlavor::MkDocs));
510        assert!(is_in_icon_shortcode(line, 15, MarkdownFlavor::MkDocs));
511        assert!(is_in_icon_shortcode(line, 21, MarkdownFlavor::MkDocs));
512        // Position 22+ is " to confirm"
513        assert!(!is_in_icon_shortcode(line, 22, MarkdownFlavor::MkDocs));
514    }
515
516    #[test]
517    fn test_is_in_pymdown_markup() {
518        // Test Keys notation
519        let line = "Press ++ctrl+c++ to copy";
520        assert!(!is_in_pymdown_markup(line, 0, MarkdownFlavor::MkDocs));
521        assert!(is_in_pymdown_markup(line, 6, MarkdownFlavor::MkDocs));
522        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::MkDocs));
523        assert!(!is_in_pymdown_markup(line, 17, MarkdownFlavor::MkDocs));
524
525        // Test Mark notation
526        let line2 = "This is ==highlighted== text";
527        assert!(!is_in_pymdown_markup(line2, 0, MarkdownFlavor::MkDocs));
528        assert!(is_in_pymdown_markup(line2, 8, MarkdownFlavor::MkDocs));
529        assert!(is_in_pymdown_markup(line2, 15, MarkdownFlavor::MkDocs));
530        assert!(!is_in_pymdown_markup(line2, 23, MarkdownFlavor::MkDocs));
531
532        // Should not match for Standard flavor
533        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Standard));
534    }
535
536    #[test]
537    fn test_is_in_mkdocs_markup() {
538        // Should combine both icon and pymdown
539        let line = ":material-check: and ++ctrl++";
540        assert!(is_in_mkdocs_markup(line, 5, MarkdownFlavor::MkDocs)); // In icon
541        assert!(is_in_mkdocs_markup(line, 23, MarkdownFlavor::MkDocs)); // In keys
542        assert!(!is_in_mkdocs_markup(line, 17, MarkdownFlavor::MkDocs)); // In " and "
543    }
544}