rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_footnotes;
12use crate::utils::mkdocs_snippets;
13use crate::utils::mkdocs_tabs;
14use crate::utils::mkdocstrings_refs;
15use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
16use regex::Regex;
17use std::sync::LazyLock;
18
19/// Enhanced inline math pattern that handles both single $ and double $$ delimiters
20static INLINE_MATH_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$(?:\$)?[^$]+\$(?:\$)?").unwrap());
21
22/// Range representing a span of bytes (start inclusive, end exclusive)
23#[derive(Debug, Clone, Copy)]
24pub struct ByteRange {
25    pub start: usize,
26    pub end: usize,
27}
28
29/// Pre-compute all HTML comment ranges in the content
30/// Returns a sorted vector of byte ranges for efficient lookup
31pub fn compute_html_comment_ranges(content: &str) -> Vec<ByteRange> {
32    HTML_COMMENT_PATTERN
33        .find_iter(content)
34        .map(|m| ByteRange {
35            start: m.start(),
36            end: m.end(),
37        })
38        .collect()
39}
40
41/// Check if a byte position is within any of the pre-computed HTML comment ranges
42/// Uses binary search for O(log n) complexity
43pub fn is_in_html_comment_ranges(ranges: &[ByteRange], byte_pos: usize) -> bool {
44    // Binary search to find a range that might contain byte_pos
45    ranges
46        .binary_search_by(|range| {
47            if byte_pos < range.start {
48                std::cmp::Ordering::Greater
49            } else if byte_pos >= range.end {
50                std::cmp::Ordering::Less
51            } else {
52                std::cmp::Ordering::Equal
53            }
54        })
55        .is_ok()
56}
57
58/// Check if a line is within front matter (both YAML and TOML)
59pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
60    let lines: Vec<&str> = content.lines().collect();
61
62    // Check YAML front matter (---) at the beginning
63    if !lines.is_empty() && lines[0] == "---" {
64        for (i, line) in lines.iter().enumerate().skip(1) {
65            if *line == "---" {
66                return line_num <= i;
67            }
68        }
69    }
70
71    // Check TOML front matter (+++) at the beginning
72    if !lines.is_empty() && lines[0] == "+++" {
73        for (i, line) in lines.iter().enumerate().skip(1) {
74            if *line == "+++" {
75                return line_num <= i;
76            }
77        }
78    }
79
80    false
81}
82
83/// Check if a byte position is within any context that should be skipped
84pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
85    // Check standard code contexts
86    if ctx.is_in_code_block_or_span(byte_pos) {
87        return true;
88    }
89
90    // Check HTML comments
91    if is_in_html_comment(ctx.content, byte_pos) {
92        return true;
93    }
94
95    // Check math contexts
96    if is_in_math_context(ctx, byte_pos) {
97        return true;
98    }
99
100    // Check if in HTML tag
101    if is_in_html_tag(ctx, byte_pos) {
102        return true;
103    }
104
105    // Check MkDocs snippet sections and multi-line blocks
106    if ctx.flavor == MarkdownFlavor::MkDocs {
107        if mkdocs_snippets::is_within_snippet_section(ctx.content, byte_pos) {
108            return true;
109        }
110        if mkdocs_snippets::is_within_snippet_block(ctx.content, byte_pos) {
111            return true;
112        }
113    }
114
115    // Check MkDocs admonition blocks
116    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_within_admonition(ctx.content, byte_pos) {
117        return true;
118    }
119
120    // Check MkDocs footnote definitions
121    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_within_footnote_definition(ctx.content, byte_pos) {
122        return true;
123    }
124
125    // Check MkDocs content tabs
126    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_within_tab_content(ctx.content, byte_pos) {
127        return true;
128    }
129
130    // Check MkDocstrings autodoc blocks
131    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_within_autodoc_block(ctx.content, byte_pos) {
132        return true;
133    }
134
135    // Check MkDocs Critic Markup
136    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_critic::is_within_critic_markup(ctx.content, byte_pos) {
137        return true;
138    }
139
140    false
141}
142
143/// Check if a line should be skipped due to MkDocs snippet syntax
144pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
145    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
146}
147
148/// Check if a line is a MkDocs admonition marker
149pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
150    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
151}
152
153/// Check if a line is a MkDocs footnote definition
154pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
155    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
156}
157
158/// Check if a line is a MkDocs tab marker
159pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
160    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
161}
162
163/// Check if a line is a MkDocstrings autodoc marker
164pub fn is_mkdocstrings_autodoc_line(line: &str, flavor: MarkdownFlavor) -> bool {
165    flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_autodoc_marker(line)
166}
167
168/// Check if a line contains MkDocs Critic Markup
169pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
170    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
171}
172
173/// Check if a byte position is within an HTML comment
174pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
175    for m in HTML_COMMENT_PATTERN.find_iter(content) {
176        if m.start() <= byte_pos && byte_pos < m.end() {
177            return true;
178        }
179    }
180    false
181}
182
183/// Check if a byte position is within an HTML tag
184pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
185    for html_tag in ctx.html_tags().iter() {
186        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
187            return true;
188        }
189    }
190    false
191}
192
193/// Check if a byte position is within a math context (block or inline)
194pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
195    let content = ctx.content;
196
197    // Check if we're in a math block
198    if is_in_math_block(content, byte_pos) {
199        return true;
200    }
201
202    // Check if we're in inline math
203    if is_in_inline_math(content, byte_pos) {
204        return true;
205    }
206
207    false
208}
209
210/// Check if a byte position is within a math block ($$...$$)
211pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
212    let mut in_math_block = false;
213    let mut current_pos = 0;
214
215    for line in content.lines() {
216        let line_start = current_pos;
217        let line_end = current_pos + line.len();
218
219        // Check if this line is a math block delimiter
220        if is_math_block_delimiter(line) {
221            if byte_pos >= line_start && byte_pos <= line_end {
222                // Position is on the delimiter line itself
223                return true;
224            }
225            in_math_block = !in_math_block;
226        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
227            // Position is inside a math block
228            return true;
229        }
230
231        current_pos = line_end + 1; // +1 for newline
232    }
233
234    false
235}
236
237/// Check if a byte position is within inline math ($...$)
238pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
239    // Find all inline math spans
240    for m in INLINE_MATH_REGEX.find_iter(content) {
241        if m.start() <= byte_pos && byte_pos < m.end() {
242            return true;
243        }
244    }
245    false
246}
247
248/// Check if a position is within a table cell
249pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
250    // Check if this line is part of a table
251    for table_row in ctx.table_rows().iter() {
252        if table_row.line == line_num {
253            // This line is part of a table
254            // For now, we'll skip the entire table row
255            // Future enhancement: check specific column boundaries
256            return true;
257        }
258    }
259    false
260}
261
262/// Check if a line contains table syntax
263pub fn is_table_line(line: &str) -> bool {
264    let trimmed = line.trim();
265
266    // Check for table separator line
267    if trimmed
268        .chars()
269        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
270        && trimmed.contains('|')
271        && trimmed.contains('-')
272    {
273        return true;
274    }
275
276    // Check for table content line (starts and/or ends with |)
277    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
278        return true;
279    }
280
281    false
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    #[test]
289    fn test_html_comment_detection() {
290        let content = "Text <!-- comment --> more text";
291        assert!(is_in_html_comment(content, 10)); // Inside comment
292        assert!(!is_in_html_comment(content, 0)); // Before comment
293        assert!(!is_in_html_comment(content, 25)); // After comment
294    }
295
296    #[test]
297    fn test_math_block_detection() {
298        let content = "Text\n$$\nmath content\n$$\nmore text";
299        assert!(is_in_math_block(content, 8)); // On opening $$
300        assert!(is_in_math_block(content, 15)); // Inside math block
301        assert!(!is_in_math_block(content, 0)); // Before math block
302        assert!(!is_in_math_block(content, 30)); // After math block
303    }
304
305    #[test]
306    fn test_inline_math_detection() {
307        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
308        assert!(is_in_inline_math(content, 7)); // Inside first math
309        assert!(is_in_inline_math(content, 20)); // Inside second math
310        assert!(!is_in_inline_math(content, 0)); // Before math
311        assert!(!is_in_inline_math(content, 35)); // After math
312    }
313
314    #[test]
315    fn test_table_line_detection() {
316        assert!(is_table_line("| Header | Column |"));
317        assert!(is_table_line("|--------|--------|"));
318        assert!(is_table_line("| Cell 1 | Cell 2 |"));
319        assert!(!is_table_line("Regular text"));
320        assert!(!is_table_line("Just a pipe | here"));
321    }
322
323    #[test]
324    fn test_is_in_front_matter() {
325        // Test YAML frontmatter
326        let yaml_content = r#"---
327title: "My Post"
328tags: ["test", "example"]
329---
330
331# Content"#;
332
333        assert!(
334            is_in_front_matter(yaml_content, 0),
335            "Line 1 should be in YAML front matter"
336        );
337        assert!(
338            is_in_front_matter(yaml_content, 2),
339            "Line 3 should be in YAML front matter"
340        );
341        assert!(
342            is_in_front_matter(yaml_content, 3),
343            "Line 4 should be in YAML front matter"
344        );
345        assert!(
346            !is_in_front_matter(yaml_content, 4),
347            "Line 5 should NOT be in front matter"
348        );
349
350        // Test TOML frontmatter
351        let toml_content = r#"+++
352title = "My Post"
353tags = ["test", "example"]
354+++
355
356# Content"#;
357
358        assert!(
359            is_in_front_matter(toml_content, 0),
360            "Line 1 should be in TOML front matter"
361        );
362        assert!(
363            is_in_front_matter(toml_content, 2),
364            "Line 3 should be in TOML front matter"
365        );
366        assert!(
367            is_in_front_matter(toml_content, 3),
368            "Line 4 should be in TOML front matter"
369        );
370        assert!(
371            !is_in_front_matter(toml_content, 4),
372            "Line 5 should NOT be in front matter"
373        );
374
375        // Test TOML blocks NOT at beginning (should not be considered front matter)
376        let mixed_content = r#"# Content
377
378+++
379title = "Not frontmatter"
380+++
381
382More content"#;
383
384        assert!(
385            !is_in_front_matter(mixed_content, 2),
386            "TOML block not at beginning should NOT be front matter"
387        );
388        assert!(
389            !is_in_front_matter(mixed_content, 3),
390            "TOML block not at beginning should NOT be front matter"
391        );
392        assert!(
393            !is_in_front_matter(mixed_content, 4),
394            "TOML block not at beginning should NOT be front matter"
395        );
396    }
397}