Skip to main content

rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_extensions;
12use crate::utils::mkdocs_footnotes;
13use crate::utils::mkdocs_icons;
14use crate::utils::mkdocs_snippets;
15use crate::utils::mkdocs_tabs;
16use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
17use regex::Regex;
18use std::sync::LazyLock;
19
20/// Enhanced inline math pattern that handles both single $ and double $$ delimiters.
21/// Matches:
22/// - Display math: $$...$$ (zero or more non-$ characters)
23/// - Inline math: $...$ (zero or more non-$ non-newline characters)
24///
25/// The display math pattern is tried first to correctly handle $$content$$.
26/// Critically, both patterns allow ZERO characters between delimiters,
27/// so empty math like $$ or $ $ is consumed and won't pair with other $ signs.
28static INLINE_MATH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
29
30/// Range representing a span of bytes (start inclusive, end exclusive)
31#[derive(Debug, Clone, Copy)]
32pub struct ByteRange {
33    pub start: usize,
34    pub end: usize,
35}
36
37/// Pre-compute all HTML comment ranges in the content
38/// Returns a sorted vector of byte ranges for efficient lookup
39pub fn compute_html_comment_ranges(content: &str) -> Vec<ByteRange> {
40    HTML_COMMENT_PATTERN
41        .find_iter(content)
42        .map(|m| ByteRange {
43            start: m.start(),
44            end: m.end(),
45        })
46        .collect()
47}
48
49/// Check if a byte position is within any of the pre-computed HTML comment ranges
50/// Uses binary search for O(log n) complexity
51pub fn is_in_html_comment_ranges(ranges: &[ByteRange], byte_pos: usize) -> bool {
52    // Binary search to find a range that might contain byte_pos
53    ranges
54        .binary_search_by(|range| {
55            if byte_pos < range.start {
56                std::cmp::Ordering::Greater
57            } else if byte_pos >= range.end {
58                std::cmp::Ordering::Less
59            } else {
60                std::cmp::Ordering::Equal
61            }
62        })
63        .is_ok()
64}
65
66/// Check if a line is ENTIRELY within a single HTML comment
67/// Returns true only if both the line start AND end are within the same comment range
68pub fn is_line_entirely_in_html_comment(ranges: &[ByteRange], line_start: usize, line_end: usize) -> bool {
69    for range in ranges {
70        // If line start is within this range, check if line end is also within it
71        if line_start >= range.start && line_start < range.end {
72            return line_end <= range.end;
73        }
74    }
75    false
76}
77
78/// Check if a line is within front matter (both YAML and TOML)
79pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
80    let lines: Vec<&str> = content.lines().collect();
81
82    // Check YAML front matter (---) at the beginning
83    if !lines.is_empty() && lines[0] == "---" {
84        for (i, line) in lines.iter().enumerate().skip(1) {
85            if *line == "---" {
86                return line_num <= i;
87            }
88        }
89    }
90
91    // Check TOML front matter (+++) at the beginning
92    if !lines.is_empty() && lines[0] == "+++" {
93        for (i, line) in lines.iter().enumerate().skip(1) {
94            if *line == "+++" {
95                return line_num <= i;
96            }
97        }
98    }
99
100    false
101}
102
103/// Check if a byte position is within any context that should be skipped
104
105/// Check if a byte position is within a JSX expression (MDX: {expression})
106#[inline]
107pub fn is_in_jsx_expression(ctx: &LintContext, byte_pos: usize) -> bool {
108    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_jsx_expression(byte_pos)
109}
110
111/// Check if a byte position is within an MDX comment ({/* ... */})
112#[inline]
113pub fn is_in_mdx_comment(ctx: &LintContext, byte_pos: usize) -> bool {
114    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_mdx_comment(byte_pos)
115}
116
117/// Check if a line should be skipped due to MkDocs snippet syntax
118pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
119    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
120}
121
122/// Check if a line is a MkDocs admonition marker
123pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
124    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
125}
126
127/// Check if a line is a MkDocs footnote definition
128pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
129    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
130}
131
132/// Check if a line is a MkDocs tab marker
133pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
134    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
135}
136
137/// Check if a line contains MkDocs Critic Markup
138pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
139    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
140}
141
142/// Check if a byte position is within an HTML comment
143pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
144    for m in HTML_COMMENT_PATTERN.find_iter(content) {
145        if m.start() <= byte_pos && byte_pos < m.end() {
146            return true;
147        }
148    }
149    false
150}
151
152/// Check if a byte position is within an HTML tag
153pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
154    for html_tag in ctx.html_tags().iter() {
155        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
156            return true;
157        }
158    }
159    false
160}
161
162/// Check if a byte position is within a math context (block or inline)
163pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
164    let content = ctx.content;
165
166    // Check if we're in a math block
167    if is_in_math_block(content, byte_pos) {
168        return true;
169    }
170
171    // Check if we're in inline math
172    if is_in_inline_math(content, byte_pos) {
173        return true;
174    }
175
176    false
177}
178
179/// Check if a byte position is within a math block ($$...$$)
180pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
181    let mut in_math_block = false;
182    let mut current_pos = 0;
183
184    for line in content.lines() {
185        let line_start = current_pos;
186        let line_end = current_pos + line.len();
187
188        // Check if this line is a math block delimiter
189        if is_math_block_delimiter(line) {
190            if byte_pos >= line_start && byte_pos <= line_end {
191                // Position is on the delimiter line itself
192                return true;
193            }
194            in_math_block = !in_math_block;
195        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
196            // Position is inside a math block
197            return true;
198        }
199
200        current_pos = line_end + 1; // +1 for newline
201    }
202
203    false
204}
205
206/// Check if a byte position is within inline math ($...$)
207pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
208    // Find all inline math spans
209    for m in INLINE_MATH_REGEX.find_iter(content) {
210        if m.start() <= byte_pos && byte_pos < m.end() {
211            return true;
212        }
213    }
214    false
215}
216
217/// Check if a position is within a table cell
218pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
219    // Check if this line is part of a table
220    for table_row in ctx.table_rows().iter() {
221        if table_row.line == line_num {
222            // This line is part of a table
223            // For now, we'll skip the entire table row
224            // Future enhancement: check specific column boundaries
225            return true;
226        }
227    }
228    false
229}
230
231/// Check if a line contains table syntax
232pub fn is_table_line(line: &str) -> bool {
233    let trimmed = line.trim();
234
235    // Check for table separator line
236    if trimmed
237        .chars()
238        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
239        && trimmed.contains('|')
240        && trimmed.contains('-')
241    {
242        return true;
243    }
244
245    // Check for table content line (starts and/or ends with |)
246    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
247        return true;
248    }
249
250    false
251}
252
253/// Check if a byte position is within an MkDocs icon shortcode
254/// Icon shortcodes use format like `:material-check:`, `:octicons-mark-github-16:`
255pub fn is_in_icon_shortcode(line: &str, position: usize, _flavor: MarkdownFlavor) -> bool {
256    // Only skip for MkDocs flavor, but check pattern for all flavors
257    // since emoji shortcodes are universal
258    mkdocs_icons::is_in_any_shortcode(line, position)
259}
260
261/// Check if a byte position is within PyMdown extension markup
262/// Includes: Keys (++ctrl+alt++), Caret (^text^), Insert (^^text^^), Mark (==text==)
263///
264/// For MkDocs flavor: supports all PyMdown extensions
265/// For Obsidian flavor: only supports Mark (==highlight==) syntax
266pub fn is_in_pymdown_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
267    match flavor {
268        MarkdownFlavor::MkDocs => mkdocs_extensions::is_in_pymdown_markup(line, position),
269        MarkdownFlavor::Obsidian => {
270            // Obsidian supports ==highlight== syntax (same as PyMdown Mark)
271            mkdocs_extensions::is_in_mark(line, position)
272        }
273        _ => false,
274    }
275}
276
277/// Check if a byte position is within flavor-specific markup
278/// For MkDocs: icon shortcodes and PyMdown extensions
279/// For Obsidian: highlight syntax (==text==)
280pub fn is_in_mkdocs_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
281    if is_in_icon_shortcode(line, position, flavor) {
282        return true;
283    }
284    if is_in_pymdown_markup(line, position, flavor) {
285        return true;
286    }
287    false
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_html_comment_detection() {
296        let content = "Text <!-- comment --> more text";
297        assert!(is_in_html_comment(content, 10)); // Inside comment
298        assert!(!is_in_html_comment(content, 0)); // Before comment
299        assert!(!is_in_html_comment(content, 25)); // After comment
300    }
301
302    #[test]
303    fn test_is_line_entirely_in_html_comment() {
304        // Test 1: Multi-line comment with content after closing
305        let content = "<!--\ncomment\n--> Content after comment";
306        let ranges = compute_html_comment_ranges(content);
307        // Line 0: "<!--" (bytes 0-4) - entirely in comment
308        assert!(is_line_entirely_in_html_comment(&ranges, 0, 4));
309        // Line 1: "comment" (bytes 5-12) - entirely in comment
310        assert!(is_line_entirely_in_html_comment(&ranges, 5, 12));
311        // Line 2: "--> Content after comment" (bytes 13-38) - NOT entirely in comment
312        assert!(!is_line_entirely_in_html_comment(&ranges, 13, 38));
313
314        // Test 2: Single-line comment with content after
315        let content2 = "<!-- comment --> Not a comment";
316        let ranges2 = compute_html_comment_ranges(content2);
317        // The entire line is NOT entirely in the comment
318        assert!(!is_line_entirely_in_html_comment(&ranges2, 0, 30));
319
320        // Test 3: Single-line comment alone
321        let content3 = "<!-- comment -->";
322        let ranges3 = compute_html_comment_ranges(content3);
323        // The entire line IS entirely in the comment
324        assert!(is_line_entirely_in_html_comment(&ranges3, 0, 16));
325
326        // Test 4: Content before comment
327        let content4 = "Text before <!-- comment -->";
328        let ranges4 = compute_html_comment_ranges(content4);
329        // Line start is NOT in the comment range
330        assert!(!is_line_entirely_in_html_comment(&ranges4, 0, 28));
331    }
332
333    #[test]
334    fn test_math_block_detection() {
335        let content = "Text\n$$\nmath content\n$$\nmore text";
336        assert!(is_in_math_block(content, 8)); // On opening $$
337        assert!(is_in_math_block(content, 15)); // Inside math block
338        assert!(!is_in_math_block(content, 0)); // Before math block
339        assert!(!is_in_math_block(content, 30)); // After math block
340    }
341
342    #[test]
343    fn test_inline_math_detection() {
344        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
345        assert!(is_in_inline_math(content, 7)); // Inside first math
346        assert!(is_in_inline_math(content, 20)); // Inside second math
347        assert!(!is_in_inline_math(content, 0)); // Before math
348        assert!(!is_in_inline_math(content, 35)); // After math
349    }
350
351    #[test]
352    fn test_table_line_detection() {
353        assert!(is_table_line("| Header | Column |"));
354        assert!(is_table_line("|--------|--------|"));
355        assert!(is_table_line("| Cell 1 | Cell 2 |"));
356        assert!(!is_table_line("Regular text"));
357        assert!(!is_table_line("Just a pipe | here"));
358    }
359
360    #[test]
361    fn test_is_in_front_matter() {
362        // Test YAML frontmatter
363        let yaml_content = r#"---
364title: "My Post"
365tags: ["test", "example"]
366---
367
368# Content"#;
369
370        assert!(
371            is_in_front_matter(yaml_content, 0),
372            "Line 1 should be in YAML front matter"
373        );
374        assert!(
375            is_in_front_matter(yaml_content, 2),
376            "Line 3 should be in YAML front matter"
377        );
378        assert!(
379            is_in_front_matter(yaml_content, 3),
380            "Line 4 should be in YAML front matter"
381        );
382        assert!(
383            !is_in_front_matter(yaml_content, 4),
384            "Line 5 should NOT be in front matter"
385        );
386
387        // Test TOML frontmatter
388        let toml_content = r#"+++
389title = "My Post"
390tags = ["test", "example"]
391+++
392
393# Content"#;
394
395        assert!(
396            is_in_front_matter(toml_content, 0),
397            "Line 1 should be in TOML front matter"
398        );
399        assert!(
400            is_in_front_matter(toml_content, 2),
401            "Line 3 should be in TOML front matter"
402        );
403        assert!(
404            is_in_front_matter(toml_content, 3),
405            "Line 4 should be in TOML front matter"
406        );
407        assert!(
408            !is_in_front_matter(toml_content, 4),
409            "Line 5 should NOT be in front matter"
410        );
411
412        // Test TOML blocks NOT at beginning (should not be considered front matter)
413        let mixed_content = r#"# Content
414
415+++
416title = "Not frontmatter"
417+++
418
419More content"#;
420
421        assert!(
422            !is_in_front_matter(mixed_content, 2),
423            "TOML block not at beginning should NOT be front matter"
424        );
425        assert!(
426            !is_in_front_matter(mixed_content, 3),
427            "TOML block not at beginning should NOT be front matter"
428        );
429        assert!(
430            !is_in_front_matter(mixed_content, 4),
431            "TOML block not at beginning should NOT be front matter"
432        );
433    }
434
435    #[test]
436    fn test_is_in_icon_shortcode() {
437        let line = "Click :material-check: to confirm";
438        // Position 0-5 is "Click"
439        assert!(!is_in_icon_shortcode(line, 0, MarkdownFlavor::MkDocs));
440        // Position 6-22 is ":material-check:"
441        assert!(is_in_icon_shortcode(line, 6, MarkdownFlavor::MkDocs));
442        assert!(is_in_icon_shortcode(line, 15, MarkdownFlavor::MkDocs));
443        assert!(is_in_icon_shortcode(line, 21, MarkdownFlavor::MkDocs));
444        // Position 22+ is " to confirm"
445        assert!(!is_in_icon_shortcode(line, 22, MarkdownFlavor::MkDocs));
446    }
447
448    #[test]
449    fn test_is_in_pymdown_markup() {
450        // Test Keys notation
451        let line = "Press ++ctrl+c++ to copy";
452        assert!(!is_in_pymdown_markup(line, 0, MarkdownFlavor::MkDocs));
453        assert!(is_in_pymdown_markup(line, 6, MarkdownFlavor::MkDocs));
454        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::MkDocs));
455        assert!(!is_in_pymdown_markup(line, 17, MarkdownFlavor::MkDocs));
456
457        // Test Mark notation
458        let line2 = "This is ==highlighted== text";
459        assert!(!is_in_pymdown_markup(line2, 0, MarkdownFlavor::MkDocs));
460        assert!(is_in_pymdown_markup(line2, 8, MarkdownFlavor::MkDocs));
461        assert!(is_in_pymdown_markup(line2, 15, MarkdownFlavor::MkDocs));
462        assert!(!is_in_pymdown_markup(line2, 23, MarkdownFlavor::MkDocs));
463
464        // Should not match for Standard flavor
465        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Standard));
466    }
467
468    #[test]
469    fn test_is_in_mkdocs_markup() {
470        // Should combine both icon and pymdown
471        let line = ":material-check: and ++ctrl++";
472        assert!(is_in_mkdocs_markup(line, 5, MarkdownFlavor::MkDocs)); // In icon
473        assert!(is_in_mkdocs_markup(line, 23, MarkdownFlavor::MkDocs)); // In keys
474        assert!(!is_in_mkdocs_markup(line, 17, MarkdownFlavor::MkDocs)); // In " and "
475    }
476
477    // ==================== Obsidian highlight tests ====================
478
479    #[test]
480    fn test_obsidian_highlight_basic() {
481        // Obsidian flavor should recognize ==highlight== syntax
482        let line = "This is ==highlighted== text";
483        assert!(!is_in_pymdown_markup(line, 0, MarkdownFlavor::Obsidian)); // "T"
484        assert!(is_in_pymdown_markup(line, 8, MarkdownFlavor::Obsidian)); // First "="
485        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian)); // "h"
486        assert!(is_in_pymdown_markup(line, 15, MarkdownFlavor::Obsidian)); // "g"
487        assert!(is_in_pymdown_markup(line, 22, MarkdownFlavor::Obsidian)); // Last "="
488        assert!(!is_in_pymdown_markup(line, 23, MarkdownFlavor::Obsidian)); // " "
489    }
490
491    #[test]
492    fn test_obsidian_highlight_multiple() {
493        // Multiple highlights on one line
494        let line = "Both ==one== and ==two== here";
495        assert!(is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian)); // In first
496        assert!(is_in_pymdown_markup(line, 8, MarkdownFlavor::Obsidian)); // "o"
497        assert!(!is_in_pymdown_markup(line, 12, MarkdownFlavor::Obsidian)); // Space after
498        assert!(is_in_pymdown_markup(line, 17, MarkdownFlavor::Obsidian)); // In second
499    }
500
501    #[test]
502    fn test_obsidian_highlight_not_standard_flavor() {
503        // Standard flavor should NOT recognize ==highlight== as special
504        let line = "This is ==highlighted== text";
505        assert!(!is_in_pymdown_markup(line, 8, MarkdownFlavor::Standard));
506        assert!(!is_in_pymdown_markup(line, 15, MarkdownFlavor::Standard));
507    }
508
509    #[test]
510    fn test_obsidian_highlight_with_spaces_inside() {
511        // Highlights can have spaces inside the content
512        let line = "This is ==text with spaces== here";
513        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian)); // "t"
514        assert!(is_in_pymdown_markup(line, 15, MarkdownFlavor::Obsidian)); // "w"
515        assert!(is_in_pymdown_markup(line, 27, MarkdownFlavor::Obsidian)); // "="
516    }
517
518    #[test]
519    fn test_obsidian_does_not_support_keys_notation() {
520        // Obsidian flavor should NOT recognize ++keys++ syntax (that's MkDocs-specific)
521        let line = "Press ++ctrl+c++ to copy";
522        assert!(!is_in_pymdown_markup(line, 6, MarkdownFlavor::Obsidian));
523        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian));
524    }
525
526    #[test]
527    fn test_obsidian_mkdocs_markup_function() {
528        // is_in_mkdocs_markup should also work for Obsidian highlights
529        let line = "This is ==highlighted== text";
530        assert!(is_in_mkdocs_markup(line, 10, MarkdownFlavor::Obsidian)); // In highlight
531        assert!(!is_in_mkdocs_markup(line, 0, MarkdownFlavor::Obsidian)); // Not in highlight
532    }
533
534    #[test]
535    fn test_obsidian_highlight_edge_cases() {
536        // Empty highlight (====) should not match
537        let line = "Test ==== here";
538        assert!(!is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian)); // Position at first =
539        assert!(!is_in_pymdown_markup(line, 6, MarkdownFlavor::Obsidian));
540
541        // Single character highlight
542        let line2 = "Test ==a== here";
543        assert!(is_in_pymdown_markup(line2, 5, MarkdownFlavor::Obsidian));
544        assert!(is_in_pymdown_markup(line2, 7, MarkdownFlavor::Obsidian)); // "a"
545        assert!(is_in_pymdown_markup(line2, 9, MarkdownFlavor::Obsidian)); // last =
546
547        // Triple equals (===) should not create highlight
548        let line3 = "a === b";
549        assert!(!is_in_pymdown_markup(line3, 3, MarkdownFlavor::Obsidian));
550    }
551
552    #[test]
553    fn test_obsidian_highlight_unclosed() {
554        // Unclosed highlight should not match
555        let line = "This ==starts but never ends";
556        assert!(!is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian));
557        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian));
558    }
559}