rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_footnotes;
11use crate::utils::mkdocs_snippets;
12use crate::utils::mkdocs_tabs;
13use crate::utils::mkdocstrings_refs;
14use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
15use lazy_static::lazy_static;
16use regex::Regex;
17
18lazy_static! {
19    /// Enhanced inline math pattern that handles both single $ and double $$ delimiters
20    static ref INLINE_MATH_REGEX: Regex = Regex::new(r"\$(?:\$)?[^$]+\$(?:\$)?").unwrap();
21}
22
23/// Check if a line is within front matter (both YAML and TOML)
24pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
25    let lines: Vec<&str> = content.lines().collect();
26
27    // Check YAML front matter (---) at the beginning
28    if !lines.is_empty() && lines[0] == "---" {
29        for (i, line) in lines.iter().enumerate().skip(1) {
30            if *line == "---" {
31                return line_num <= i;
32            }
33        }
34    }
35
36    // Check TOML front matter (+++) at the beginning
37    if !lines.is_empty() && lines[0] == "+++" {
38        for (i, line) in lines.iter().enumerate().skip(1) {
39            if *line == "+++" {
40                return line_num <= i;
41            }
42        }
43    }
44
45    false
46}
47
48/// Check if a byte position is within any context that should be skipped
49pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
50    // Check standard code contexts
51    if ctx.is_in_code_block_or_span(byte_pos) {
52        return true;
53    }
54
55    // Check HTML comments
56    if is_in_html_comment(ctx.content, byte_pos) {
57        return true;
58    }
59
60    // Check math contexts
61    if is_in_math_context(ctx, byte_pos) {
62        return true;
63    }
64
65    // Check if in HTML tag
66    if is_in_html_tag(ctx, byte_pos) {
67        return true;
68    }
69
70    // Check MkDocs snippet sections
71    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_within_snippet_section(ctx.content, byte_pos) {
72        return true;
73    }
74
75    // Check MkDocs admonition blocks
76    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_within_admonition(ctx.content, byte_pos) {
77        return true;
78    }
79
80    // Check MkDocs footnote definitions
81    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_within_footnote_definition(ctx.content, byte_pos) {
82        return true;
83    }
84
85    // Check MkDocs content tabs
86    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_within_tab_content(ctx.content, byte_pos) {
87        return true;
88    }
89
90    // Check MkDocstrings autodoc blocks
91    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_within_autodoc_block(ctx.content, byte_pos) {
92        return true;
93    }
94
95    false
96}
97
98/// Check if a line should be skipped due to MkDocs snippet syntax
99pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
100    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
101}
102
103/// Check if a line is a MkDocs admonition marker
104pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
105    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
106}
107
108/// Check if a line is a MkDocs footnote definition
109pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
110    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
111}
112
113/// Check if a line is a MkDocs tab marker
114pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
115    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
116}
117
118/// Check if a line is a MkDocstrings autodoc marker
119pub fn is_mkdocstrings_autodoc_line(line: &str, flavor: MarkdownFlavor) -> bool {
120    flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_autodoc_marker(line)
121}
122
123/// Check if a byte position is within an HTML comment
124pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
125    for m in HTML_COMMENT_PATTERN.find_iter(content) {
126        if m.start() <= byte_pos && byte_pos < m.end() {
127            return true;
128        }
129    }
130    false
131}
132
133/// Check if a byte position is within an HTML tag
134pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
135    for html_tag in ctx.html_tags().iter() {
136        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
137            return true;
138        }
139    }
140    false
141}
142
143/// Check if a byte position is within a math context (block or inline)
144pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
145    let content = ctx.content;
146
147    // Check if we're in a math block
148    if is_in_math_block(content, byte_pos) {
149        return true;
150    }
151
152    // Check if we're in inline math
153    if is_in_inline_math(content, byte_pos) {
154        return true;
155    }
156
157    false
158}
159
160/// Check if a byte position is within a math block ($$...$$)
161pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
162    let mut in_math_block = false;
163    let mut current_pos = 0;
164
165    for line in content.lines() {
166        let line_start = current_pos;
167        let line_end = current_pos + line.len();
168
169        // Check if this line is a math block delimiter
170        if is_math_block_delimiter(line) {
171            if byte_pos >= line_start && byte_pos <= line_end {
172                // Position is on the delimiter line itself
173                return true;
174            }
175            in_math_block = !in_math_block;
176        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
177            // Position is inside a math block
178            return true;
179        }
180
181        current_pos = line_end + 1; // +1 for newline
182    }
183
184    false
185}
186
187/// Check if a byte position is within inline math ($...$)
188pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
189    // Find all inline math spans
190    for m in INLINE_MATH_REGEX.find_iter(content) {
191        if m.start() <= byte_pos && byte_pos < m.end() {
192            return true;
193        }
194    }
195    false
196}
197
198/// Check if a position is within a table cell
199pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
200    // Check if this line is part of a table
201    for table_row in ctx.table_rows().iter() {
202        if table_row.line == line_num {
203            // This line is part of a table
204            // For now, we'll skip the entire table row
205            // Future enhancement: check specific column boundaries
206            return true;
207        }
208    }
209    false
210}
211
212/// Check if a line contains table syntax
213pub fn is_table_line(line: &str) -> bool {
214    let trimmed = line.trim();
215
216    // Check for table separator line
217    if trimmed
218        .chars()
219        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
220        && trimmed.contains('|')
221        && trimmed.contains('-')
222    {
223        return true;
224    }
225
226    // Check for table content line (starts and/or ends with |)
227    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
228        return true;
229    }
230
231    false
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn test_html_comment_detection() {
240        let content = "Text <!-- comment --> more text";
241        assert!(is_in_html_comment(content, 10)); // Inside comment
242        assert!(!is_in_html_comment(content, 0)); // Before comment
243        assert!(!is_in_html_comment(content, 25)); // After comment
244    }
245
246    #[test]
247    fn test_math_block_detection() {
248        let content = "Text\n$$\nmath content\n$$\nmore text";
249        assert!(is_in_math_block(content, 8)); // On opening $$
250        assert!(is_in_math_block(content, 15)); // Inside math block
251        assert!(!is_in_math_block(content, 0)); // Before math block
252        assert!(!is_in_math_block(content, 30)); // After math block
253    }
254
255    #[test]
256    fn test_inline_math_detection() {
257        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
258        assert!(is_in_inline_math(content, 7)); // Inside first math
259        assert!(is_in_inline_math(content, 20)); // Inside second math
260        assert!(!is_in_inline_math(content, 0)); // Before math
261        assert!(!is_in_inline_math(content, 35)); // After math
262    }
263
264    #[test]
265    fn test_table_line_detection() {
266        assert!(is_table_line("| Header | Column |"));
267        assert!(is_table_line("|--------|--------|"));
268        assert!(is_table_line("| Cell 1 | Cell 2 |"));
269        assert!(!is_table_line("Regular text"));
270        assert!(!is_table_line("Just a pipe | here"));
271    }
272
273    #[test]
274    fn test_is_in_front_matter() {
275        // Test YAML frontmatter
276        let yaml_content = r#"---
277title: "My Post"
278tags: ["test", "example"]
279---
280
281# Content"#;
282
283        assert!(
284            is_in_front_matter(yaml_content, 0),
285            "Line 1 should be in YAML front matter"
286        );
287        assert!(
288            is_in_front_matter(yaml_content, 2),
289            "Line 3 should be in YAML front matter"
290        );
291        assert!(
292            is_in_front_matter(yaml_content, 3),
293            "Line 4 should be in YAML front matter"
294        );
295        assert!(
296            !is_in_front_matter(yaml_content, 4),
297            "Line 5 should NOT be in front matter"
298        );
299
300        // Test TOML frontmatter
301        let toml_content = r#"+++
302title = "My Post"
303tags = ["test", "example"]
304+++
305
306# Content"#;
307
308        assert!(
309            is_in_front_matter(toml_content, 0),
310            "Line 1 should be in TOML front matter"
311        );
312        assert!(
313            is_in_front_matter(toml_content, 2),
314            "Line 3 should be in TOML front matter"
315        );
316        assert!(
317            is_in_front_matter(toml_content, 3),
318            "Line 4 should be in TOML front matter"
319        );
320        assert!(
321            !is_in_front_matter(toml_content, 4),
322            "Line 5 should NOT be in front matter"
323        );
324
325        // Test TOML blocks NOT at beginning (should not be considered front matter)
326        let mixed_content = r#"# Content
327
328+++
329title = "Not frontmatter"
330+++
331
332More content"#;
333
334        assert!(
335            !is_in_front_matter(mixed_content, 2),
336            "TOML block not at beginning should NOT be front matter"
337        );
338        assert!(
339            !is_in_front_matter(mixed_content, 3),
340            "TOML block not at beginning should NOT be front matter"
341        );
342        assert!(
343            !is_in_front_matter(mixed_content, 4),
344            "TOML block not at beginning should NOT be front matter"
345        );
346    }
347}