Skip to main content

rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_extensions;
12use crate::utils::mkdocs_footnotes;
13use crate::utils::mkdocs_icons;
14use crate::utils::mkdocs_snippets;
15use crate::utils::mkdocs_tabs;
16use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
17use regex::Regex;
18use std::sync::LazyLock;
19
20/// Enhanced inline math pattern that handles both single $ and double $$ delimiters.
21/// Matches:
22/// - Display math: $$...$$ (zero or more non-$ characters)
23/// - Inline math: $...$ (zero or more non-$ non-newline characters)
24///
25/// The display math pattern is tried first to correctly handle $$content$$.
26/// Critically, both patterns allow ZERO characters between delimiters,
27/// so empty math like $$ or $ $ is consumed and won't pair with other $ signs.
28static INLINE_MATH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\$[^$]*\$\$|\$[^$\n]*\$").unwrap());
29
30/// Range representing a span of bytes (start inclusive, end exclusive)
31#[derive(Debug, Clone, Copy)]
32pub struct ByteRange {
33    pub start: usize,
34    pub end: usize,
35}
36
37/// Pre-compute all HTML comment ranges in the content
38/// Returns a sorted vector of byte ranges for efficient lookup
39pub fn compute_html_comment_ranges(content: &str) -> Vec<ByteRange> {
40    HTML_COMMENT_PATTERN
41        .find_iter(content)
42        .map(|m| ByteRange {
43            start: m.start(),
44            end: m.end(),
45        })
46        .collect()
47}
48
49/// Check if a byte position is within any of the pre-computed HTML comment ranges
50/// Uses binary search for O(log n) complexity
51pub fn is_in_html_comment_ranges(ranges: &[ByteRange], byte_pos: usize) -> bool {
52    // Binary search to find a range that might contain byte_pos
53    ranges
54        .binary_search_by(|range| {
55            if byte_pos < range.start {
56                std::cmp::Ordering::Greater
57            } else if byte_pos >= range.end {
58                std::cmp::Ordering::Less
59            } else {
60                std::cmp::Ordering::Equal
61            }
62        })
63        .is_ok()
64}
65
66/// Check if a line is ENTIRELY within a single HTML comment
67/// Returns true only if both the line start AND end are within the same comment range
68pub fn is_line_entirely_in_html_comment(ranges: &[ByteRange], line_start: usize, line_end: usize) -> bool {
69    for range in ranges {
70        // If line start is within this range, check if line end is also within it
71        if line_start >= range.start && line_start < range.end {
72            return line_end <= range.end;
73        }
74    }
75    false
76}
77
78/// Check if a line is within front matter (both YAML and TOML)
79pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
80    let lines: Vec<&str> = content.lines().collect();
81
82    // Check YAML front matter (---) at the beginning
83    if !lines.is_empty() && lines[0] == "---" {
84        for (i, line) in lines.iter().enumerate().skip(1) {
85            if *line == "---" {
86                return line_num <= i;
87            }
88        }
89    }
90
91    // Check TOML front matter (+++) at the beginning
92    if !lines.is_empty() && lines[0] == "+++" {
93        for (i, line) in lines.iter().enumerate().skip(1) {
94            if *line == "+++" {
95                return line_num <= i;
96            }
97        }
98    }
99
100    false
101}
102
103/// Check if a byte position is within a JSX expression (MDX: {expression})
104#[inline]
105pub fn is_in_jsx_expression(ctx: &LintContext, byte_pos: usize) -> bool {
106    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_jsx_expression(byte_pos)
107}
108
109/// Check if a byte position is within an MDX comment ({/* ... */})
110#[inline]
111pub fn is_in_mdx_comment(ctx: &LintContext, byte_pos: usize) -> bool {
112    ctx.flavor == MarkdownFlavor::MDX && ctx.is_in_mdx_comment(byte_pos)
113}
114
115/// Check if a line should be skipped due to MkDocs snippet syntax
116pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
117    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
118}
119
120/// Check if a line is a MkDocs admonition marker
121pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
122    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
123}
124
125/// Check if a line is a MkDocs footnote definition
126pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
127    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
128}
129
130/// Check if a line is a MkDocs tab marker
131pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
132    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
133}
134
135/// Check if a line contains MkDocs Critic Markup
136pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
137    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
138}
139
140/// Check if a byte position is within an HTML comment
141pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
142    for m in HTML_COMMENT_PATTERN.find_iter(content) {
143        if m.start() <= byte_pos && byte_pos < m.end() {
144            return true;
145        }
146    }
147    false
148}
149
150/// Check if a byte position is within an HTML tag
151pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
152    for html_tag in ctx.html_tags().iter() {
153        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
154            return true;
155        }
156    }
157    false
158}
159
160/// Check if a byte position is within a math context (block or inline)
161pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
162    let content = ctx.content;
163
164    // Check if we're in a math block
165    if is_in_math_block(content, byte_pos) {
166        return true;
167    }
168
169    // Check if we're in inline math
170    if is_in_inline_math(content, byte_pos) {
171        return true;
172    }
173
174    false
175}
176
177/// Check if a byte position is within a math block ($$...$$)
178pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
179    let mut in_math_block = false;
180    let mut current_pos = 0;
181
182    for line in content.lines() {
183        let line_start = current_pos;
184        let line_end = current_pos + line.len();
185
186        // Check if this line is a math block delimiter
187        if is_math_block_delimiter(line) {
188            if byte_pos >= line_start && byte_pos <= line_end {
189                // Position is on the delimiter line itself
190                return true;
191            }
192            in_math_block = !in_math_block;
193        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
194            // Position is inside a math block
195            return true;
196        }
197
198        current_pos = line_end + 1; // +1 for newline
199    }
200
201    false
202}
203
204/// Check if a byte position is within inline math ($...$)
205pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
206    // Find all inline math spans
207    for m in INLINE_MATH_REGEX.find_iter(content) {
208        if m.start() <= byte_pos && byte_pos < m.end() {
209            return true;
210        }
211    }
212    false
213}
214
215/// Check if a position is within a table cell
216pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
217    // Check if this line is part of a table
218    for table_row in ctx.table_rows().iter() {
219        if table_row.line == line_num {
220            // This line is part of a table
221            // For now, we'll skip the entire table row
222            // Future enhancement: check specific column boundaries
223            return true;
224        }
225    }
226    false
227}
228
229/// Check if a line contains table syntax
230pub fn is_table_line(line: &str) -> bool {
231    let trimmed = line.trim();
232
233    // Check for table separator line
234    if trimmed
235        .chars()
236        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
237        && trimmed.contains('|')
238        && trimmed.contains('-')
239    {
240        return true;
241    }
242
243    // Check for table content line (starts and/or ends with |)
244    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
245        return true;
246    }
247
248    false
249}
250
251/// Check if a byte position is within an MkDocs icon shortcode
252/// Icon shortcodes use format like `:material-check:`, `:octicons-mark-github-16:`
253pub fn is_in_icon_shortcode(line: &str, position: usize, _flavor: MarkdownFlavor) -> bool {
254    // Only skip for MkDocs flavor, but check pattern for all flavors
255    // since emoji shortcodes are universal
256    mkdocs_icons::is_in_any_shortcode(line, position)
257}
258
259/// Check if a byte position is within PyMdown extension markup
260/// Includes: Keys (++ctrl+alt++), Caret (^text^), Insert (^^text^^), Mark (==text==)
261///
262/// For MkDocs flavor: supports all PyMdown extensions
263/// For Obsidian flavor: only supports Mark (==highlight==) syntax
264pub fn is_in_pymdown_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
265    match flavor {
266        MarkdownFlavor::MkDocs => mkdocs_extensions::is_in_pymdown_markup(line, position),
267        MarkdownFlavor::Obsidian => {
268            // Obsidian supports ==highlight== syntax (same as PyMdown Mark)
269            mkdocs_extensions::is_in_mark(line, position)
270        }
271        _ => false,
272    }
273}
274
275/// Check if a byte position is within flavor-specific markup
276/// For MkDocs: icon shortcodes and PyMdown extensions
277/// For Obsidian: highlight syntax (==text==)
278pub fn is_in_mkdocs_markup(line: &str, position: usize, flavor: MarkdownFlavor) -> bool {
279    if is_in_icon_shortcode(line, position, flavor) {
280        return true;
281    }
282    if is_in_pymdown_markup(line, position, flavor) {
283        return true;
284    }
285    false
286}
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291
292    #[test]
293    fn test_html_comment_detection() {
294        let content = "Text <!-- comment --> more text";
295        assert!(is_in_html_comment(content, 10)); // Inside comment
296        assert!(!is_in_html_comment(content, 0)); // Before comment
297        assert!(!is_in_html_comment(content, 25)); // After comment
298    }
299
300    #[test]
301    fn test_is_line_entirely_in_html_comment() {
302        // Test 1: Multi-line comment with content after closing
303        let content = "<!--\ncomment\n--> Content after comment";
304        let ranges = compute_html_comment_ranges(content);
305        // Line 0: "<!--" (bytes 0-4) - entirely in comment
306        assert!(is_line_entirely_in_html_comment(&ranges, 0, 4));
307        // Line 1: "comment" (bytes 5-12) - entirely in comment
308        assert!(is_line_entirely_in_html_comment(&ranges, 5, 12));
309        // Line 2: "--> Content after comment" (bytes 13-38) - NOT entirely in comment
310        assert!(!is_line_entirely_in_html_comment(&ranges, 13, 38));
311
312        // Test 2: Single-line comment with content after
313        let content2 = "<!-- comment --> Not a comment";
314        let ranges2 = compute_html_comment_ranges(content2);
315        // The entire line is NOT entirely in the comment
316        assert!(!is_line_entirely_in_html_comment(&ranges2, 0, 30));
317
318        // Test 3: Single-line comment alone
319        let content3 = "<!-- comment -->";
320        let ranges3 = compute_html_comment_ranges(content3);
321        // The entire line IS entirely in the comment
322        assert!(is_line_entirely_in_html_comment(&ranges3, 0, 16));
323
324        // Test 4: Content before comment
325        let content4 = "Text before <!-- comment -->";
326        let ranges4 = compute_html_comment_ranges(content4);
327        // Line start is NOT in the comment range
328        assert!(!is_line_entirely_in_html_comment(&ranges4, 0, 28));
329    }
330
331    #[test]
332    fn test_math_block_detection() {
333        let content = "Text\n$$\nmath content\n$$\nmore text";
334        assert!(is_in_math_block(content, 8)); // On opening $$
335        assert!(is_in_math_block(content, 15)); // Inside math block
336        assert!(!is_in_math_block(content, 0)); // Before math block
337        assert!(!is_in_math_block(content, 30)); // After math block
338    }
339
340    #[test]
341    fn test_inline_math_detection() {
342        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
343        assert!(is_in_inline_math(content, 7)); // Inside first math
344        assert!(is_in_inline_math(content, 20)); // Inside second math
345        assert!(!is_in_inline_math(content, 0)); // Before math
346        assert!(!is_in_inline_math(content, 35)); // After math
347    }
348
349    #[test]
350    fn test_table_line_detection() {
351        assert!(is_table_line("| Header | Column |"));
352        assert!(is_table_line("|--------|--------|"));
353        assert!(is_table_line("| Cell 1 | Cell 2 |"));
354        assert!(!is_table_line("Regular text"));
355        assert!(!is_table_line("Just a pipe | here"));
356    }
357
358    #[test]
359    fn test_is_in_front_matter() {
360        // Test YAML frontmatter
361        let yaml_content = r#"---
362title: "My Post"
363tags: ["test", "example"]
364---
365
366# Content"#;
367
368        assert!(
369            is_in_front_matter(yaml_content, 0),
370            "Line 1 should be in YAML front matter"
371        );
372        assert!(
373            is_in_front_matter(yaml_content, 2),
374            "Line 3 should be in YAML front matter"
375        );
376        assert!(
377            is_in_front_matter(yaml_content, 3),
378            "Line 4 should be in YAML front matter"
379        );
380        assert!(
381            !is_in_front_matter(yaml_content, 4),
382            "Line 5 should NOT be in front matter"
383        );
384
385        // Test TOML frontmatter
386        let toml_content = r#"+++
387title = "My Post"
388tags = ["test", "example"]
389+++
390
391# Content"#;
392
393        assert!(
394            is_in_front_matter(toml_content, 0),
395            "Line 1 should be in TOML front matter"
396        );
397        assert!(
398            is_in_front_matter(toml_content, 2),
399            "Line 3 should be in TOML front matter"
400        );
401        assert!(
402            is_in_front_matter(toml_content, 3),
403            "Line 4 should be in TOML front matter"
404        );
405        assert!(
406            !is_in_front_matter(toml_content, 4),
407            "Line 5 should NOT be in front matter"
408        );
409
410        // Test TOML blocks NOT at beginning (should not be considered front matter)
411        let mixed_content = r#"# Content
412
413+++
414title = "Not frontmatter"
415+++
416
417More content"#;
418
419        assert!(
420            !is_in_front_matter(mixed_content, 2),
421            "TOML block not at beginning should NOT be front matter"
422        );
423        assert!(
424            !is_in_front_matter(mixed_content, 3),
425            "TOML block not at beginning should NOT be front matter"
426        );
427        assert!(
428            !is_in_front_matter(mixed_content, 4),
429            "TOML block not at beginning should NOT be front matter"
430        );
431    }
432
433    #[test]
434    fn test_is_in_icon_shortcode() {
435        let line = "Click :material-check: to confirm";
436        // Position 0-5 is "Click"
437        assert!(!is_in_icon_shortcode(line, 0, MarkdownFlavor::MkDocs));
438        // Position 6-22 is ":material-check:"
439        assert!(is_in_icon_shortcode(line, 6, MarkdownFlavor::MkDocs));
440        assert!(is_in_icon_shortcode(line, 15, MarkdownFlavor::MkDocs));
441        assert!(is_in_icon_shortcode(line, 21, MarkdownFlavor::MkDocs));
442        // Position 22+ is " to confirm"
443        assert!(!is_in_icon_shortcode(line, 22, MarkdownFlavor::MkDocs));
444    }
445
446    #[test]
447    fn test_is_in_pymdown_markup() {
448        // Test Keys notation
449        let line = "Press ++ctrl+c++ to copy";
450        assert!(!is_in_pymdown_markup(line, 0, MarkdownFlavor::MkDocs));
451        assert!(is_in_pymdown_markup(line, 6, MarkdownFlavor::MkDocs));
452        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::MkDocs));
453        assert!(!is_in_pymdown_markup(line, 17, MarkdownFlavor::MkDocs));
454
455        // Test Mark notation
456        let line2 = "This is ==highlighted== text";
457        assert!(!is_in_pymdown_markup(line2, 0, MarkdownFlavor::MkDocs));
458        assert!(is_in_pymdown_markup(line2, 8, MarkdownFlavor::MkDocs));
459        assert!(is_in_pymdown_markup(line2, 15, MarkdownFlavor::MkDocs));
460        assert!(!is_in_pymdown_markup(line2, 23, MarkdownFlavor::MkDocs));
461
462        // Should not match for Standard flavor
463        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Standard));
464    }
465
466    #[test]
467    fn test_is_in_mkdocs_markup() {
468        // Should combine both icon and pymdown
469        let line = ":material-check: and ++ctrl++";
470        assert!(is_in_mkdocs_markup(line, 5, MarkdownFlavor::MkDocs)); // In icon
471        assert!(is_in_mkdocs_markup(line, 23, MarkdownFlavor::MkDocs)); // In keys
472        assert!(!is_in_mkdocs_markup(line, 17, MarkdownFlavor::MkDocs)); // In " and "
473    }
474
475    // ==================== Obsidian highlight tests ====================
476
477    #[test]
478    fn test_obsidian_highlight_basic() {
479        // Obsidian flavor should recognize ==highlight== syntax
480        let line = "This is ==highlighted== text";
481        assert!(!is_in_pymdown_markup(line, 0, MarkdownFlavor::Obsidian)); // "T"
482        assert!(is_in_pymdown_markup(line, 8, MarkdownFlavor::Obsidian)); // First "="
483        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian)); // "h"
484        assert!(is_in_pymdown_markup(line, 15, MarkdownFlavor::Obsidian)); // "g"
485        assert!(is_in_pymdown_markup(line, 22, MarkdownFlavor::Obsidian)); // Last "="
486        assert!(!is_in_pymdown_markup(line, 23, MarkdownFlavor::Obsidian)); // " "
487    }
488
489    #[test]
490    fn test_obsidian_highlight_multiple() {
491        // Multiple highlights on one line
492        let line = "Both ==one== and ==two== here";
493        assert!(is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian)); // In first
494        assert!(is_in_pymdown_markup(line, 8, MarkdownFlavor::Obsidian)); // "o"
495        assert!(!is_in_pymdown_markup(line, 12, MarkdownFlavor::Obsidian)); // Space after
496        assert!(is_in_pymdown_markup(line, 17, MarkdownFlavor::Obsidian)); // In second
497    }
498
499    #[test]
500    fn test_obsidian_highlight_not_standard_flavor() {
501        // Standard flavor should NOT recognize ==highlight== as special
502        let line = "This is ==highlighted== text";
503        assert!(!is_in_pymdown_markup(line, 8, MarkdownFlavor::Standard));
504        assert!(!is_in_pymdown_markup(line, 15, MarkdownFlavor::Standard));
505    }
506
507    #[test]
508    fn test_obsidian_highlight_with_spaces_inside() {
509        // Highlights can have spaces inside the content
510        let line = "This is ==text with spaces== here";
511        assert!(is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian)); // "t"
512        assert!(is_in_pymdown_markup(line, 15, MarkdownFlavor::Obsidian)); // "w"
513        assert!(is_in_pymdown_markup(line, 27, MarkdownFlavor::Obsidian)); // "="
514    }
515
516    #[test]
517    fn test_obsidian_does_not_support_keys_notation() {
518        // Obsidian flavor should NOT recognize ++keys++ syntax (that's MkDocs-specific)
519        let line = "Press ++ctrl+c++ to copy";
520        assert!(!is_in_pymdown_markup(line, 6, MarkdownFlavor::Obsidian));
521        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian));
522    }
523
524    #[test]
525    fn test_obsidian_mkdocs_markup_function() {
526        // is_in_mkdocs_markup should also work for Obsidian highlights
527        let line = "This is ==highlighted== text";
528        assert!(is_in_mkdocs_markup(line, 10, MarkdownFlavor::Obsidian)); // In highlight
529        assert!(!is_in_mkdocs_markup(line, 0, MarkdownFlavor::Obsidian)); // Not in highlight
530    }
531
532    #[test]
533    fn test_obsidian_highlight_edge_cases() {
534        // Empty highlight (====) should not match
535        let line = "Test ==== here";
536        assert!(!is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian)); // Position at first =
537        assert!(!is_in_pymdown_markup(line, 6, MarkdownFlavor::Obsidian));
538
539        // Single character highlight
540        let line2 = "Test ==a== here";
541        assert!(is_in_pymdown_markup(line2, 5, MarkdownFlavor::Obsidian));
542        assert!(is_in_pymdown_markup(line2, 7, MarkdownFlavor::Obsidian)); // "a"
543        assert!(is_in_pymdown_markup(line2, 9, MarkdownFlavor::Obsidian)); // last =
544
545        // Triple equals (===) should not create highlight
546        let line3 = "a === b";
547        assert!(!is_in_pymdown_markup(line3, 3, MarkdownFlavor::Obsidian));
548    }
549
550    #[test]
551    fn test_obsidian_highlight_unclosed() {
552        // Unclosed highlight should not match
553        let line = "This ==starts but never ends";
554        assert!(!is_in_pymdown_markup(line, 5, MarkdownFlavor::Obsidian));
555        assert!(!is_in_pymdown_markup(line, 10, MarkdownFlavor::Obsidian));
556    }
557}