rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_footnotes;
12use crate::utils::mkdocs_snippets;
13use crate::utils::mkdocs_tabs;
14use crate::utils::mkdocstrings_refs;
15use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
16use lazy_static::lazy_static;
17use regex::Regex;
18
19lazy_static! {
20    /// Enhanced inline math pattern that handles both single $ and double $$ delimiters
21    static ref INLINE_MATH_REGEX: Regex = Regex::new(r"\$(?:\$)?[^$]+\$(?:\$)?").unwrap();
22}
23
24/// Check if a line is within front matter (both YAML and TOML)
25pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
26    let lines: Vec<&str> = content.lines().collect();
27
28    // Check YAML front matter (---) at the beginning
29    if !lines.is_empty() && lines[0] == "---" {
30        for (i, line) in lines.iter().enumerate().skip(1) {
31            if *line == "---" {
32                return line_num <= i;
33            }
34        }
35    }
36
37    // Check TOML front matter (+++) at the beginning
38    if !lines.is_empty() && lines[0] == "+++" {
39        for (i, line) in lines.iter().enumerate().skip(1) {
40            if *line == "+++" {
41                return line_num <= i;
42            }
43        }
44    }
45
46    false
47}
48
49/// Check if a byte position is within any context that should be skipped
50pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
51    // Check standard code contexts
52    if ctx.is_in_code_block_or_span(byte_pos) {
53        return true;
54    }
55
56    // Check HTML comments
57    if is_in_html_comment(ctx.content, byte_pos) {
58        return true;
59    }
60
61    // Check math contexts
62    if is_in_math_context(ctx, byte_pos) {
63        return true;
64    }
65
66    // Check if in HTML tag
67    if is_in_html_tag(ctx, byte_pos) {
68        return true;
69    }
70
71    // Check MkDocs snippet sections and multi-line blocks
72    if ctx.flavor == MarkdownFlavor::MkDocs {
73        if mkdocs_snippets::is_within_snippet_section(ctx.content, byte_pos) {
74            return true;
75        }
76        if mkdocs_snippets::is_within_snippet_block(ctx.content, byte_pos) {
77            return true;
78        }
79    }
80
81    // Check MkDocs admonition blocks
82    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_within_admonition(ctx.content, byte_pos) {
83        return true;
84    }
85
86    // Check MkDocs footnote definitions
87    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_within_footnote_definition(ctx.content, byte_pos) {
88        return true;
89    }
90
91    // Check MkDocs content tabs
92    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_within_tab_content(ctx.content, byte_pos) {
93        return true;
94    }
95
96    // Check MkDocstrings autodoc blocks
97    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_within_autodoc_block(ctx.content, byte_pos) {
98        return true;
99    }
100
101    // Check MkDocs Critic Markup
102    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_critic::is_within_critic_markup(ctx.content, byte_pos) {
103        return true;
104    }
105
106    false
107}
108
109/// Check if a line should be skipped due to MkDocs snippet syntax
110pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
111    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
112}
113
114/// Check if a line is a MkDocs admonition marker
115pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
116    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
117}
118
119/// Check if a line is a MkDocs footnote definition
120pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
121    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
122}
123
124/// Check if a line is a MkDocs tab marker
125pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
126    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
127}
128
129/// Check if a line is a MkDocstrings autodoc marker
130pub fn is_mkdocstrings_autodoc_line(line: &str, flavor: MarkdownFlavor) -> bool {
131    flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_autodoc_marker(line)
132}
133
134/// Check if a line contains MkDocs Critic Markup
135pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
136    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
137}
138
139/// Check if a byte position is within an HTML comment
140pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
141    for m in HTML_COMMENT_PATTERN.find_iter(content) {
142        if m.start() <= byte_pos && byte_pos < m.end() {
143            return true;
144        }
145    }
146    false
147}
148
149/// Check if a byte position is within an HTML tag
150pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
151    for html_tag in ctx.html_tags().iter() {
152        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
153            return true;
154        }
155    }
156    false
157}
158
159/// Check if a byte position is within a math context (block or inline)
160pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
161    let content = ctx.content;
162
163    // Check if we're in a math block
164    if is_in_math_block(content, byte_pos) {
165        return true;
166    }
167
168    // Check if we're in inline math
169    if is_in_inline_math(content, byte_pos) {
170        return true;
171    }
172
173    false
174}
175
176/// Check if a byte position is within a math block ($$...$$)
177pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
178    let mut in_math_block = false;
179    let mut current_pos = 0;
180
181    for line in content.lines() {
182        let line_start = current_pos;
183        let line_end = current_pos + line.len();
184
185        // Check if this line is a math block delimiter
186        if is_math_block_delimiter(line) {
187            if byte_pos >= line_start && byte_pos <= line_end {
188                // Position is on the delimiter line itself
189                return true;
190            }
191            in_math_block = !in_math_block;
192        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
193            // Position is inside a math block
194            return true;
195        }
196
197        current_pos = line_end + 1; // +1 for newline
198    }
199
200    false
201}
202
203/// Check if a byte position is within inline math ($...$)
204pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
205    // Find all inline math spans
206    for m in INLINE_MATH_REGEX.find_iter(content) {
207        if m.start() <= byte_pos && byte_pos < m.end() {
208            return true;
209        }
210    }
211    false
212}
213
214/// Check if a position is within a table cell
215pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
216    // Check if this line is part of a table
217    for table_row in ctx.table_rows().iter() {
218        if table_row.line == line_num {
219            // This line is part of a table
220            // For now, we'll skip the entire table row
221            // Future enhancement: check specific column boundaries
222            return true;
223        }
224    }
225    false
226}
227
228/// Check if a line contains table syntax
229pub fn is_table_line(line: &str) -> bool {
230    let trimmed = line.trim();
231
232    // Check for table separator line
233    if trimmed
234        .chars()
235        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
236        && trimmed.contains('|')
237        && trimmed.contains('-')
238    {
239        return true;
240    }
241
242    // Check for table content line (starts and/or ends with |)
243    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
244        return true;
245    }
246
247    false
248}
249
250#[cfg(test)]
251mod tests {
252    use super::*;
253
254    #[test]
255    fn test_html_comment_detection() {
256        let content = "Text <!-- comment --> more text";
257        assert!(is_in_html_comment(content, 10)); // Inside comment
258        assert!(!is_in_html_comment(content, 0)); // Before comment
259        assert!(!is_in_html_comment(content, 25)); // After comment
260    }
261
262    #[test]
263    fn test_math_block_detection() {
264        let content = "Text\n$$\nmath content\n$$\nmore text";
265        assert!(is_in_math_block(content, 8)); // On opening $$
266        assert!(is_in_math_block(content, 15)); // Inside math block
267        assert!(!is_in_math_block(content, 0)); // Before math block
268        assert!(!is_in_math_block(content, 30)); // After math block
269    }
270
271    #[test]
272    fn test_inline_math_detection() {
273        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
274        assert!(is_in_inline_math(content, 7)); // Inside first math
275        assert!(is_in_inline_math(content, 20)); // Inside second math
276        assert!(!is_in_inline_math(content, 0)); // Before math
277        assert!(!is_in_inline_math(content, 35)); // After math
278    }
279
280    #[test]
281    fn test_table_line_detection() {
282        assert!(is_table_line("| Header | Column |"));
283        assert!(is_table_line("|--------|--------|"));
284        assert!(is_table_line("| Cell 1 | Cell 2 |"));
285        assert!(!is_table_line("Regular text"));
286        assert!(!is_table_line("Just a pipe | here"));
287    }
288
289    #[test]
290    fn test_is_in_front_matter() {
291        // Test YAML frontmatter
292        let yaml_content = r#"---
293title: "My Post"
294tags: ["test", "example"]
295---
296
297# Content"#;
298
299        assert!(
300            is_in_front_matter(yaml_content, 0),
301            "Line 1 should be in YAML front matter"
302        );
303        assert!(
304            is_in_front_matter(yaml_content, 2),
305            "Line 3 should be in YAML front matter"
306        );
307        assert!(
308            is_in_front_matter(yaml_content, 3),
309            "Line 4 should be in YAML front matter"
310        );
311        assert!(
312            !is_in_front_matter(yaml_content, 4),
313            "Line 5 should NOT be in front matter"
314        );
315
316        // Test TOML frontmatter
317        let toml_content = r#"+++
318title = "My Post"
319tags = ["test", "example"]
320+++
321
322# Content"#;
323
324        assert!(
325            is_in_front_matter(toml_content, 0),
326            "Line 1 should be in TOML front matter"
327        );
328        assert!(
329            is_in_front_matter(toml_content, 2),
330            "Line 3 should be in TOML front matter"
331        );
332        assert!(
333            is_in_front_matter(toml_content, 3),
334            "Line 4 should be in TOML front matter"
335        );
336        assert!(
337            !is_in_front_matter(toml_content, 4),
338            "Line 5 should NOT be in front matter"
339        );
340
341        // Test TOML blocks NOT at beginning (should not be considered front matter)
342        let mixed_content = r#"# Content
343
344+++
345title = "Not frontmatter"
346+++
347
348More content"#;
349
350        assert!(
351            !is_in_front_matter(mixed_content, 2),
352            "TOML block not at beginning should NOT be front matter"
353        );
354        assert!(
355            !is_in_front_matter(mixed_content, 3),
356            "TOML block not at beginning should NOT be front matter"
357        );
358        assert!(
359            !is_in_front_matter(mixed_content, 4),
360            "TOML block not at beginning should NOT be front matter"
361        );
362    }
363}