rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_footnotes;
12use crate::utils::mkdocs_snippets;
13use crate::utils::mkdocs_tabs;
14use crate::utils::mkdocstrings_refs;
15use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
16use regex::Regex;
17use std::sync::LazyLock;
18
19/// Enhanced inline math pattern that handles both single $ and double $$ delimiters
20static INLINE_MATH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$(?:\$)?[^$]+\$(?:\$)?").unwrap());
21
22/// Range representing a span of bytes (start inclusive, end exclusive)
23#[derive(Debug, Clone, Copy)]
24pub struct ByteRange {
25    pub start: usize,
26    pub end: usize,
27}
28
29/// Pre-compute all HTML comment ranges in the content
30/// Returns a sorted vector of byte ranges for efficient lookup
31pub fn compute_html_comment_ranges(content: &str) -> Vec<ByteRange> {
32    HTML_COMMENT_PATTERN
33        .find_iter(content)
34        .map(|m| ByteRange {
35            start: m.start(),
36            end: m.end(),
37        })
38        .collect()
39}
40
41/// Check if a byte position is within any of the pre-computed HTML comment ranges
42/// Uses binary search for O(log n) complexity
43pub fn is_in_html_comment_ranges(ranges: &[ByteRange], byte_pos: usize) -> bool {
44    // Binary search to find a range that might contain byte_pos
45    ranges
46        .binary_search_by(|range| {
47            if byte_pos < range.start {
48                std::cmp::Ordering::Greater
49            } else if byte_pos >= range.end {
50                std::cmp::Ordering::Less
51            } else {
52                std::cmp::Ordering::Equal
53            }
54        })
55        .is_ok()
56}
57
58/// Check if a line is ENTIRELY within a single HTML comment
59/// Returns true only if both the line start AND end are within the same comment range
60pub fn is_line_entirely_in_html_comment(ranges: &[ByteRange], line_start: usize, line_end: usize) -> bool {
61    for range in ranges {
62        // If line start is within this range, check if line end is also within it
63        if line_start >= range.start && line_start < range.end {
64            return line_end <= range.end;
65        }
66    }
67    false
68}
69
70/// Check if a line is within front matter (both YAML and TOML)
71pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
72    let lines: Vec<&str> = content.lines().collect();
73
74    // Check YAML front matter (---) at the beginning
75    if !lines.is_empty() && lines[0] == "---" {
76        for (i, line) in lines.iter().enumerate().skip(1) {
77            if *line == "---" {
78                return line_num <= i;
79            }
80        }
81    }
82
83    // Check TOML front matter (+++) at the beginning
84    if !lines.is_empty() && lines[0] == "+++" {
85        for (i, line) in lines.iter().enumerate().skip(1) {
86            if *line == "+++" {
87                return line_num <= i;
88            }
89        }
90    }
91
92    false
93}
94
95/// Check if a byte position is within any context that should be skipped
96pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
97    // Check standard code contexts
98    if ctx.is_in_code_block_or_span(byte_pos) {
99        return true;
100    }
101
102    // Check HTML comments
103    if is_in_html_comment(ctx.content, byte_pos) {
104        return true;
105    }
106
107    // Check math contexts
108    if is_in_math_context(ctx, byte_pos) {
109        return true;
110    }
111
112    // Check if in HTML tag
113    if is_in_html_tag(ctx, byte_pos) {
114        return true;
115    }
116
117    // Check MkDocs snippet sections and multi-line blocks
118    if ctx.flavor == MarkdownFlavor::MkDocs {
119        if mkdocs_snippets::is_within_snippet_section(ctx.content, byte_pos) {
120            return true;
121        }
122        if mkdocs_snippets::is_within_snippet_block(ctx.content, byte_pos) {
123            return true;
124        }
125    }
126
127    // Check MkDocs admonition blocks
128    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_within_admonition(ctx.content, byte_pos) {
129        return true;
130    }
131
132    // Check MkDocs footnote definitions
133    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_within_footnote_definition(ctx.content, byte_pos) {
134        return true;
135    }
136
137    // Check MkDocs content tabs
138    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_within_tab_content(ctx.content, byte_pos) {
139        return true;
140    }
141
142    // Check MkDocstrings autodoc blocks
143    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_within_autodoc_block(ctx.content, byte_pos) {
144        return true;
145    }
146
147    // Check MkDocs Critic Markup
148    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_critic::is_within_critic_markup(ctx.content, byte_pos) {
149        return true;
150    }
151
152    false
153}
154
155/// Check if a line should be skipped due to MkDocs snippet syntax
156pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
157    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
158}
159
160/// Check if a line is a MkDocs admonition marker
161pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
162    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
163}
164
165/// Check if a line is a MkDocs footnote definition
166pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
167    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
168}
169
170/// Check if a line is a MkDocs tab marker
171pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
172    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
173}
174
175/// Check if a line is a MkDocstrings autodoc marker
176pub fn is_mkdocstrings_autodoc_line(line: &str, flavor: MarkdownFlavor) -> bool {
177    flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_autodoc_marker(line)
178}
179
180/// Check if a line contains MkDocs Critic Markup
181pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
182    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
183}
184
185/// Check if a byte position is within an HTML comment
186pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
187    for m in HTML_COMMENT_PATTERN.find_iter(content) {
188        if m.start() <= byte_pos && byte_pos < m.end() {
189            return true;
190        }
191    }
192    false
193}
194
195/// Check if a byte position is within an HTML tag
196pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
197    for html_tag in ctx.html_tags().iter() {
198        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
199            return true;
200        }
201    }
202    false
203}
204
205/// Check if a byte position is within a math context (block or inline)
206pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
207    let content = ctx.content;
208
209    // Check if we're in a math block
210    if is_in_math_block(content, byte_pos) {
211        return true;
212    }
213
214    // Check if we're in inline math
215    if is_in_inline_math(content, byte_pos) {
216        return true;
217    }
218
219    false
220}
221
222/// Check if a byte position is within a math block ($$...$$)
223pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
224    let mut in_math_block = false;
225    let mut current_pos = 0;
226
227    for line in content.lines() {
228        let line_start = current_pos;
229        let line_end = current_pos + line.len();
230
231        // Check if this line is a math block delimiter
232        if is_math_block_delimiter(line) {
233            if byte_pos >= line_start && byte_pos <= line_end {
234                // Position is on the delimiter line itself
235                return true;
236            }
237            in_math_block = !in_math_block;
238        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
239            // Position is inside a math block
240            return true;
241        }
242
243        current_pos = line_end + 1; // +1 for newline
244    }
245
246    false
247}
248
249/// Check if a byte position is within inline math ($...$)
250pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
251    // Find all inline math spans
252    for m in INLINE_MATH_REGEX.find_iter(content) {
253        if m.start() <= byte_pos && byte_pos < m.end() {
254            return true;
255        }
256    }
257    false
258}
259
260/// Check if a position is within a table cell
261pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
262    // Check if this line is part of a table
263    for table_row in ctx.table_rows().iter() {
264        if table_row.line == line_num {
265            // This line is part of a table
266            // For now, we'll skip the entire table row
267            // Future enhancement: check specific column boundaries
268            return true;
269        }
270    }
271    false
272}
273
274/// Check if a line contains table syntax
275pub fn is_table_line(line: &str) -> bool {
276    let trimmed = line.trim();
277
278    // Check for table separator line
279    if trimmed
280        .chars()
281        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
282        && trimmed.contains('|')
283        && trimmed.contains('-')
284    {
285        return true;
286    }
287
288    // Check for table content line (starts and/or ends with |)
289    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
290        return true;
291    }
292
293    false
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299
300    #[test]
301    fn test_html_comment_detection() {
302        let content = "Text <!-- comment --> more text";
303        assert!(is_in_html_comment(content, 10)); // Inside comment
304        assert!(!is_in_html_comment(content, 0)); // Before comment
305        assert!(!is_in_html_comment(content, 25)); // After comment
306    }
307
308    #[test]
309    fn test_is_line_entirely_in_html_comment() {
310        // Test 1: Multi-line comment with content after closing
311        let content = "<!--\ncomment\n--> Content after comment";
312        let ranges = compute_html_comment_ranges(content);
313        // Line 0: "<!--" (bytes 0-4) - entirely in comment
314        assert!(is_line_entirely_in_html_comment(&ranges, 0, 4));
315        // Line 1: "comment" (bytes 5-12) - entirely in comment
316        assert!(is_line_entirely_in_html_comment(&ranges, 5, 12));
317        // Line 2: "--> Content after comment" (bytes 13-38) - NOT entirely in comment
318        assert!(!is_line_entirely_in_html_comment(&ranges, 13, 38));
319
320        // Test 2: Single-line comment with content after
321        let content2 = "<!-- comment --> Not a comment";
322        let ranges2 = compute_html_comment_ranges(content2);
323        // The entire line is NOT entirely in the comment
324        assert!(!is_line_entirely_in_html_comment(&ranges2, 0, 30));
325
326        // Test 3: Single-line comment alone
327        let content3 = "<!-- comment -->";
328        let ranges3 = compute_html_comment_ranges(content3);
329        // The entire line IS entirely in the comment
330        assert!(is_line_entirely_in_html_comment(&ranges3, 0, 16));
331
332        // Test 4: Content before comment
333        let content4 = "Text before <!-- comment -->";
334        let ranges4 = compute_html_comment_ranges(content4);
335        // Line start is NOT in the comment range
336        assert!(!is_line_entirely_in_html_comment(&ranges4, 0, 28));
337    }
338
339    #[test]
340    fn test_math_block_detection() {
341        let content = "Text\n$$\nmath content\n$$\nmore text";
342        assert!(is_in_math_block(content, 8)); // On opening $$
343        assert!(is_in_math_block(content, 15)); // Inside math block
344        assert!(!is_in_math_block(content, 0)); // Before math block
345        assert!(!is_in_math_block(content, 30)); // After math block
346    }
347
348    #[test]
349    fn test_inline_math_detection() {
350        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
351        assert!(is_in_inline_math(content, 7)); // Inside first math
352        assert!(is_in_inline_math(content, 20)); // Inside second math
353        assert!(!is_in_inline_math(content, 0)); // Before math
354        assert!(!is_in_inline_math(content, 35)); // After math
355    }
356
357    #[test]
358    fn test_table_line_detection() {
359        assert!(is_table_line("| Header | Column |"));
360        assert!(is_table_line("|--------|--------|"));
361        assert!(is_table_line("| Cell 1 | Cell 2 |"));
362        assert!(!is_table_line("Regular text"));
363        assert!(!is_table_line("Just a pipe | here"));
364    }
365
366    #[test]
367    fn test_is_in_front_matter() {
368        // Test YAML frontmatter
369        let yaml_content = r#"---
370title: "My Post"
371tags: ["test", "example"]
372---
373
374# Content"#;
375
376        assert!(
377            is_in_front_matter(yaml_content, 0),
378            "Line 1 should be in YAML front matter"
379        );
380        assert!(
381            is_in_front_matter(yaml_content, 2),
382            "Line 3 should be in YAML front matter"
383        );
384        assert!(
385            is_in_front_matter(yaml_content, 3),
386            "Line 4 should be in YAML front matter"
387        );
388        assert!(
389            !is_in_front_matter(yaml_content, 4),
390            "Line 5 should NOT be in front matter"
391        );
392
393        // Test TOML frontmatter
394        let toml_content = r#"+++
395title = "My Post"
396tags = ["test", "example"]
397+++
398
399# Content"#;
400
401        assert!(
402            is_in_front_matter(toml_content, 0),
403            "Line 1 should be in TOML front matter"
404        );
405        assert!(
406            is_in_front_matter(toml_content, 2),
407            "Line 3 should be in TOML front matter"
408        );
409        assert!(
410            is_in_front_matter(toml_content, 3),
411            "Line 4 should be in TOML front matter"
412        );
413        assert!(
414            !is_in_front_matter(toml_content, 4),
415            "Line 5 should NOT be in front matter"
416        );
417
418        // Test TOML blocks NOT at beginning (should not be considered front matter)
419        let mixed_content = r#"# Content
420
421+++
422title = "Not frontmatter"
423+++
424
425More content"#;
426
427        assert!(
428            !is_in_front_matter(mixed_content, 2),
429            "TOML block not at beginning should NOT be front matter"
430        );
431        assert!(
432            !is_in_front_matter(mixed_content, 3),
433            "TOML block not at beginning should NOT be front matter"
434        );
435        assert!(
436            !is_in_front_matter(mixed_content, 4),
437            "TOML block not at beginning should NOT be front matter"
438        );
439    }
440}