rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::lint_context::LintContext;
7use crate::utils::kramdown_utils::is_math_block_delimiter;
8use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
9use lazy_static::lazy_static;
10use regex::Regex;
11
12lazy_static! {
13    /// Enhanced inline math pattern that handles both single $ and double $$ delimiters
14    static ref INLINE_MATH_REGEX: Regex = Regex::new(r"\$(?:\$)?[^$]+\$(?:\$)?").unwrap();
15}
16
17/// Check if a line is within front matter (both YAML and TOML)
18pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
19    let lines: Vec<&str> = content.lines().collect();
20
21    // Check YAML front matter (---) at the beginning
22    if !lines.is_empty() && lines[0] == "---" {
23        for (i, line) in lines.iter().enumerate().skip(1) {
24            if *line == "---" {
25                return line_num <= i;
26            }
27        }
28    }
29
30    // Check TOML front matter (+++) at the beginning
31    if !lines.is_empty() && lines[0] == "+++" {
32        for (i, line) in lines.iter().enumerate().skip(1) {
33            if *line == "+++" {
34                return line_num <= i;
35            }
36        }
37    }
38
39    false
40}
41
42/// Check if a byte position is within any context that should be skipped
43pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
44    // Check standard code contexts
45    if ctx.is_in_code_block_or_span(byte_pos) {
46        return true;
47    }
48
49    // Check HTML comments
50    if is_in_html_comment(ctx.content, byte_pos) {
51        return true;
52    }
53
54    // Check math contexts
55    if is_in_math_context(ctx, byte_pos) {
56        return true;
57    }
58
59    // Check if in HTML tag
60    if is_in_html_tag(ctx, byte_pos) {
61        return true;
62    }
63
64    false
65}
66
67/// Check if a byte position is within an HTML comment
68pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
69    for m in HTML_COMMENT_PATTERN.find_iter(content) {
70        if m.start() <= byte_pos && byte_pos < m.end() {
71            return true;
72        }
73    }
74    false
75}
76
77/// Check if a byte position is within an HTML tag
78pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
79    for html_tag in ctx.html_tags().iter() {
80        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
81            return true;
82        }
83    }
84    false
85}
86
87/// Check if a byte position is within a math context (block or inline)
88pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
89    let content = ctx.content;
90
91    // Check if we're in a math block
92    if is_in_math_block(content, byte_pos) {
93        return true;
94    }
95
96    // Check if we're in inline math
97    if is_in_inline_math(content, byte_pos) {
98        return true;
99    }
100
101    false
102}
103
104/// Check if a byte position is within a math block ($$...$$)
105pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
106    let mut in_math_block = false;
107    let mut current_pos = 0;
108
109    for line in content.lines() {
110        let line_start = current_pos;
111        let line_end = current_pos + line.len();
112
113        // Check if this line is a math block delimiter
114        if is_math_block_delimiter(line) {
115            if byte_pos >= line_start && byte_pos <= line_end {
116                // Position is on the delimiter line itself
117                return true;
118            }
119            in_math_block = !in_math_block;
120        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
121            // Position is inside a math block
122            return true;
123        }
124
125        current_pos = line_end + 1; // +1 for newline
126    }
127
128    false
129}
130
131/// Check if a byte position is within inline math ($...$)
132pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
133    // Find all inline math spans
134    for m in INLINE_MATH_REGEX.find_iter(content) {
135        if m.start() <= byte_pos && byte_pos < m.end() {
136            return true;
137        }
138    }
139    false
140}
141
142/// Check if a position is within a table cell
143pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
144    // Check if this line is part of a table
145    for table_row in ctx.table_rows().iter() {
146        if table_row.line == line_num {
147            // This line is part of a table
148            // For now, we'll skip the entire table row
149            // Future enhancement: check specific column boundaries
150            return true;
151        }
152    }
153    false
154}
155
156/// Check if a line contains table syntax
157pub fn is_table_line(line: &str) -> bool {
158    let trimmed = line.trim();
159
160    // Check for table separator line
161    if trimmed
162        .chars()
163        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
164        && trimmed.contains('|')
165        && trimmed.contains('-')
166    {
167        return true;
168    }
169
170    // Check for table content line (starts and/or ends with |)
171    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
172        return true;
173    }
174
175    false
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn test_html_comment_detection() {
184        let content = "Text <!-- comment --> more text";
185        assert!(is_in_html_comment(content, 10)); // Inside comment
186        assert!(!is_in_html_comment(content, 0)); // Before comment
187        assert!(!is_in_html_comment(content, 25)); // After comment
188    }
189
190    #[test]
191    fn test_math_block_detection() {
192        let content = "Text\n$$\nmath content\n$$\nmore text";
193        assert!(is_in_math_block(content, 8)); // On opening $$
194        assert!(is_in_math_block(content, 15)); // Inside math block
195        assert!(!is_in_math_block(content, 0)); // Before math block
196        assert!(!is_in_math_block(content, 30)); // After math block
197    }
198
199    #[test]
200    fn test_inline_math_detection() {
201        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
202        assert!(is_in_inline_math(content, 7)); // Inside first math
203        assert!(is_in_inline_math(content, 20)); // Inside second math
204        assert!(!is_in_inline_math(content, 0)); // Before math
205        assert!(!is_in_inline_math(content, 35)); // After math
206    }
207
208    #[test]
209    fn test_table_line_detection() {
210        assert!(is_table_line("| Header | Column |"));
211        assert!(is_table_line("|--------|--------|"));
212        assert!(is_table_line("| Cell 1 | Cell 2 |"));
213        assert!(!is_table_line("Regular text"));
214        assert!(!is_table_line("Just a pipe | here"));
215    }
216
217    #[test]
218    fn test_is_in_front_matter() {
219        // Test YAML frontmatter
220        let yaml_content = r#"---
221title: "My Post"
222tags: ["test", "example"]
223---
224
225# Content"#;
226
227        assert!(
228            is_in_front_matter(yaml_content, 0),
229            "Line 1 should be in YAML front matter"
230        );
231        assert!(
232            is_in_front_matter(yaml_content, 2),
233            "Line 3 should be in YAML front matter"
234        );
235        assert!(
236            is_in_front_matter(yaml_content, 3),
237            "Line 4 should be in YAML front matter"
238        );
239        assert!(
240            !is_in_front_matter(yaml_content, 4),
241            "Line 5 should NOT be in front matter"
242        );
243
244        // Test TOML frontmatter
245        let toml_content = r#"+++
246title = "My Post"
247tags = ["test", "example"]
248+++
249
250# Content"#;
251
252        assert!(
253            is_in_front_matter(toml_content, 0),
254            "Line 1 should be in TOML front matter"
255        );
256        assert!(
257            is_in_front_matter(toml_content, 2),
258            "Line 3 should be in TOML front matter"
259        );
260        assert!(
261            is_in_front_matter(toml_content, 3),
262            "Line 4 should be in TOML front matter"
263        );
264        assert!(
265            !is_in_front_matter(toml_content, 4),
266            "Line 5 should NOT be in front matter"
267        );
268
269        // Test TOML blocks NOT at beginning (should not be considered front matter)
270        let mixed_content = r#"# Content
271
272+++
273title = "Not frontmatter"
274+++
275
276More content"#;
277
278        assert!(
279            !is_in_front_matter(mixed_content, 2),
280            "TOML block not at beginning should NOT be front matter"
281        );
282        assert!(
283            !is_in_front_matter(mixed_content, 3),
284            "TOML block not at beginning should NOT be front matter"
285        );
286        assert!(
287            !is_in_front_matter(mixed_content, 4),
288            "TOML block not at beginning should NOT be front matter"
289        );
290    }
291}