rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_footnotes;
12use crate::utils::mkdocs_snippets;
13use crate::utils::mkdocs_tabs;
14use crate::utils::mkdocstrings_refs;
15use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
16use lazy_static::lazy_static;
17use regex::Regex;
18
19lazy_static! {
20    /// Enhanced inline math pattern that handles both single $ and double $$ delimiters
21    static ref INLINE_MATH_REGEX: Regex = Regex::new(r"\$(?:\$)?[^$]+\$(?:\$)?").unwrap();
22}
23
24/// Check if a line is within front matter (both YAML and TOML)
25pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
26    let lines: Vec<&str> = content.lines().collect();
27
28    // Check YAML front matter (---) at the beginning
29    if !lines.is_empty() && lines[0] == "---" {
30        for (i, line) in lines.iter().enumerate().skip(1) {
31            if *line == "---" {
32                return line_num <= i;
33            }
34        }
35    }
36
37    // Check TOML front matter (+++) at the beginning
38    if !lines.is_empty() && lines[0] == "+++" {
39        for (i, line) in lines.iter().enumerate().skip(1) {
40            if *line == "+++" {
41                return line_num <= i;
42            }
43        }
44    }
45
46    false
47}
48
49/// Check if a byte position is within any context that should be skipped
50pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
51    // Check standard code contexts
52    if ctx.is_in_code_block_or_span(byte_pos) {
53        return true;
54    }
55
56    // Check HTML comments
57    if is_in_html_comment(ctx.content, byte_pos) {
58        return true;
59    }
60
61    // Check math contexts
62    if is_in_math_context(ctx, byte_pos) {
63        return true;
64    }
65
66    // Check if in HTML tag
67    if is_in_html_tag(ctx, byte_pos) {
68        return true;
69    }
70
71    // Check MkDocs snippet sections
72    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_within_snippet_section(ctx.content, byte_pos) {
73        return true;
74    }
75
76    // Check MkDocs admonition blocks
77    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_within_admonition(ctx.content, byte_pos) {
78        return true;
79    }
80
81    // Check MkDocs footnote definitions
82    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_within_footnote_definition(ctx.content, byte_pos) {
83        return true;
84    }
85
86    // Check MkDocs content tabs
87    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_within_tab_content(ctx.content, byte_pos) {
88        return true;
89    }
90
91    // Check MkDocstrings autodoc blocks
92    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_within_autodoc_block(ctx.content, byte_pos) {
93        return true;
94    }
95
96    // Check MkDocs Critic Markup
97    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_critic::is_within_critic_markup(ctx.content, byte_pos) {
98        return true;
99    }
100
101    false
102}
103
104/// Check if a line should be skipped due to MkDocs snippet syntax
105pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
106    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
107}
108
109/// Check if a line is a MkDocs admonition marker
110pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
111    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
112}
113
114/// Check if a line is a MkDocs footnote definition
115pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
116    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
117}
118
119/// Check if a line is a MkDocs tab marker
120pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
121    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
122}
123
124/// Check if a line is a MkDocstrings autodoc marker
125pub fn is_mkdocstrings_autodoc_line(line: &str, flavor: MarkdownFlavor) -> bool {
126    flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_autodoc_marker(line)
127}
128
129/// Check if a line contains MkDocs Critic Markup
130pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
131    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
132}
133
134/// Check if a byte position is within an HTML comment
135pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
136    for m in HTML_COMMENT_PATTERN.find_iter(content) {
137        if m.start() <= byte_pos && byte_pos < m.end() {
138            return true;
139        }
140    }
141    false
142}
143
144/// Check if a byte position is within an HTML tag
145pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
146    for html_tag in ctx.html_tags().iter() {
147        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
148            return true;
149        }
150    }
151    false
152}
153
154/// Check if a byte position is within a math context (block or inline)
155pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
156    let content = ctx.content;
157
158    // Check if we're in a math block
159    if is_in_math_block(content, byte_pos) {
160        return true;
161    }
162
163    // Check if we're in inline math
164    if is_in_inline_math(content, byte_pos) {
165        return true;
166    }
167
168    false
169}
170
171/// Check if a byte position is within a math block ($$...$$)
172pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
173    let mut in_math_block = false;
174    let mut current_pos = 0;
175
176    for line in content.lines() {
177        let line_start = current_pos;
178        let line_end = current_pos + line.len();
179
180        // Check if this line is a math block delimiter
181        if is_math_block_delimiter(line) {
182            if byte_pos >= line_start && byte_pos <= line_end {
183                // Position is on the delimiter line itself
184                return true;
185            }
186            in_math_block = !in_math_block;
187        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
188            // Position is inside a math block
189            return true;
190        }
191
192        current_pos = line_end + 1; // +1 for newline
193    }
194
195    false
196}
197
198/// Check if a byte position is within inline math ($...$)
199pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
200    // Find all inline math spans
201    for m in INLINE_MATH_REGEX.find_iter(content) {
202        if m.start() <= byte_pos && byte_pos < m.end() {
203            return true;
204        }
205    }
206    false
207}
208
209/// Check if a position is within a table cell
210pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
211    // Check if this line is part of a table
212    for table_row in ctx.table_rows().iter() {
213        if table_row.line == line_num {
214            // This line is part of a table
215            // For now, we'll skip the entire table row
216            // Future enhancement: check specific column boundaries
217            return true;
218        }
219    }
220    false
221}
222
223/// Check if a line contains table syntax
224pub fn is_table_line(line: &str) -> bool {
225    let trimmed = line.trim();
226
227    // Check for table separator line
228    if trimmed
229        .chars()
230        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
231        && trimmed.contains('|')
232        && trimmed.contains('-')
233    {
234        return true;
235    }
236
237    // Check for table content line (starts and/or ends with |)
238    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
239        return true;
240    }
241
242    false
243}
244
245#[cfg(test)]
246mod tests {
247    use super::*;
248
249    #[test]
250    fn test_html_comment_detection() {
251        let content = "Text <!-- comment --> more text";
252        assert!(is_in_html_comment(content, 10)); // Inside comment
253        assert!(!is_in_html_comment(content, 0)); // Before comment
254        assert!(!is_in_html_comment(content, 25)); // After comment
255    }
256
257    #[test]
258    fn test_math_block_detection() {
259        let content = "Text\n$$\nmath content\n$$\nmore text";
260        assert!(is_in_math_block(content, 8)); // On opening $$
261        assert!(is_in_math_block(content, 15)); // Inside math block
262        assert!(!is_in_math_block(content, 0)); // Before math block
263        assert!(!is_in_math_block(content, 30)); // After math block
264    }
265
266    #[test]
267    fn test_inline_math_detection() {
268        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
269        assert!(is_in_inline_math(content, 7)); // Inside first math
270        assert!(is_in_inline_math(content, 20)); // Inside second math
271        assert!(!is_in_inline_math(content, 0)); // Before math
272        assert!(!is_in_inline_math(content, 35)); // After math
273    }
274
275    #[test]
276    fn test_table_line_detection() {
277        assert!(is_table_line("| Header | Column |"));
278        assert!(is_table_line("|--------|--------|"));
279        assert!(is_table_line("| Cell 1 | Cell 2 |"));
280        assert!(!is_table_line("Regular text"));
281        assert!(!is_table_line("Just a pipe | here"));
282    }
283
284    #[test]
285    fn test_is_in_front_matter() {
286        // Test YAML frontmatter
287        let yaml_content = r#"---
288title: "My Post"
289tags: ["test", "example"]
290---
291
292# Content"#;
293
294        assert!(
295            is_in_front_matter(yaml_content, 0),
296            "Line 1 should be in YAML front matter"
297        );
298        assert!(
299            is_in_front_matter(yaml_content, 2),
300            "Line 3 should be in YAML front matter"
301        );
302        assert!(
303            is_in_front_matter(yaml_content, 3),
304            "Line 4 should be in YAML front matter"
305        );
306        assert!(
307            !is_in_front_matter(yaml_content, 4),
308            "Line 5 should NOT be in front matter"
309        );
310
311        // Test TOML frontmatter
312        let toml_content = r#"+++
313title = "My Post"
314tags = ["test", "example"]
315+++
316
317# Content"#;
318
319        assert!(
320            is_in_front_matter(toml_content, 0),
321            "Line 1 should be in TOML front matter"
322        );
323        assert!(
324            is_in_front_matter(toml_content, 2),
325            "Line 3 should be in TOML front matter"
326        );
327        assert!(
328            is_in_front_matter(toml_content, 3),
329            "Line 4 should be in TOML front matter"
330        );
331        assert!(
332            !is_in_front_matter(toml_content, 4),
333            "Line 5 should NOT be in front matter"
334        );
335
336        // Test TOML blocks NOT at beginning (should not be considered front matter)
337        let mixed_content = r#"# Content
338
339+++
340title = "Not frontmatter"
341+++
342
343More content"#;
344
345        assert!(
346            !is_in_front_matter(mixed_content, 2),
347            "TOML block not at beginning should NOT be front matter"
348        );
349        assert!(
350            !is_in_front_matter(mixed_content, 3),
351            "TOML block not at beginning should NOT be front matter"
352        );
353        assert!(
354            !is_in_front_matter(mixed_content, 4),
355            "TOML block not at beginning should NOT be front matter"
356        );
357    }
358}