rumdl_lib/utils/
skip_context.rs

1//! Utilities for determining if a position in markdown should be skipped from processing
2//!
3//! This module provides centralized context detection for various markdown constructs
4//! that should typically be skipped when processing rules.
5
6use crate::config::MarkdownFlavor;
7use crate::lint_context::LintContext;
8use crate::utils::kramdown_utils::is_math_block_delimiter;
9use crate::utils::mkdocs_admonitions;
10use crate::utils::mkdocs_critic;
11use crate::utils::mkdocs_footnotes;
12use crate::utils::mkdocs_snippets;
13use crate::utils::mkdocs_tabs;
14use crate::utils::mkdocstrings_refs;
15use crate::utils::regex_cache::HTML_COMMENT_PATTERN;
16use lazy_static::lazy_static;
17use regex::Regex;
18
19lazy_static! {
20    /// Enhanced inline math pattern that handles both single $ and double $$ delimiters
21    static ref INLINE_MATH_REGEX: Regex = Regex::new(r"\$(?:\$)?[^$]+\$(?:\$)?").unwrap();
22}
23
24/// Range representing a span of bytes (start inclusive, end exclusive)
25#[derive(Debug, Clone, Copy)]
26pub struct ByteRange {
27    pub start: usize,
28    pub end: usize,
29}
30
31/// Pre-compute all HTML comment ranges in the content
32/// Returns a sorted vector of byte ranges for efficient lookup
33pub fn compute_html_comment_ranges(content: &str) -> Vec<ByteRange> {
34    HTML_COMMENT_PATTERN
35        .find_iter(content)
36        .map(|m| ByteRange {
37            start: m.start(),
38            end: m.end(),
39        })
40        .collect()
41}
42
43/// Check if a byte position is within any of the pre-computed HTML comment ranges
44/// Uses binary search for O(log n) complexity
45pub fn is_in_html_comment_ranges(ranges: &[ByteRange], byte_pos: usize) -> bool {
46    // Binary search to find a range that might contain byte_pos
47    ranges
48        .binary_search_by(|range| {
49            if byte_pos < range.start {
50                std::cmp::Ordering::Greater
51            } else if byte_pos >= range.end {
52                std::cmp::Ordering::Less
53            } else {
54                std::cmp::Ordering::Equal
55            }
56        })
57        .is_ok()
58}
59
60/// Check if a line is within front matter (both YAML and TOML)
61pub fn is_in_front_matter(content: &str, line_num: usize) -> bool {
62    let lines: Vec<&str> = content.lines().collect();
63
64    // Check YAML front matter (---) at the beginning
65    if !lines.is_empty() && lines[0] == "---" {
66        for (i, line) in lines.iter().enumerate().skip(1) {
67            if *line == "---" {
68                return line_num <= i;
69            }
70        }
71    }
72
73    // Check TOML front matter (+++) at the beginning
74    if !lines.is_empty() && lines[0] == "+++" {
75        for (i, line) in lines.iter().enumerate().skip(1) {
76            if *line == "+++" {
77                return line_num <= i;
78            }
79        }
80    }
81
82    false
83}
84
85/// Check if a byte position is within any context that should be skipped
86pub fn is_in_skip_context(ctx: &LintContext, byte_pos: usize) -> bool {
87    // Check standard code contexts
88    if ctx.is_in_code_block_or_span(byte_pos) {
89        return true;
90    }
91
92    // Check HTML comments
93    if is_in_html_comment(ctx.content, byte_pos) {
94        return true;
95    }
96
97    // Check math contexts
98    if is_in_math_context(ctx, byte_pos) {
99        return true;
100    }
101
102    // Check if in HTML tag
103    if is_in_html_tag(ctx, byte_pos) {
104        return true;
105    }
106
107    // Check MkDocs snippet sections and multi-line blocks
108    if ctx.flavor == MarkdownFlavor::MkDocs {
109        if mkdocs_snippets::is_within_snippet_section(ctx.content, byte_pos) {
110            return true;
111        }
112        if mkdocs_snippets::is_within_snippet_block(ctx.content, byte_pos) {
113            return true;
114        }
115    }
116
117    // Check MkDocs admonition blocks
118    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_within_admonition(ctx.content, byte_pos) {
119        return true;
120    }
121
122    // Check MkDocs footnote definitions
123    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_within_footnote_definition(ctx.content, byte_pos) {
124        return true;
125    }
126
127    // Check MkDocs content tabs
128    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_within_tab_content(ctx.content, byte_pos) {
129        return true;
130    }
131
132    // Check MkDocstrings autodoc blocks
133    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_within_autodoc_block(ctx.content, byte_pos) {
134        return true;
135    }
136
137    // Check MkDocs Critic Markup
138    if ctx.flavor == MarkdownFlavor::MkDocs && mkdocs_critic::is_within_critic_markup(ctx.content, byte_pos) {
139        return true;
140    }
141
142    false
143}
144
145/// Check if a line should be skipped due to MkDocs snippet syntax
146pub fn is_mkdocs_snippet_line(line: &str, flavor: MarkdownFlavor) -> bool {
147    flavor == MarkdownFlavor::MkDocs && mkdocs_snippets::is_snippet_marker(line)
148}
149
150/// Check if a line is a MkDocs admonition marker
151pub fn is_mkdocs_admonition_line(line: &str, flavor: MarkdownFlavor) -> bool {
152    flavor == MarkdownFlavor::MkDocs && mkdocs_admonitions::is_admonition_marker(line)
153}
154
155/// Check if a line is a MkDocs footnote definition
156pub fn is_mkdocs_footnote_line(line: &str, flavor: MarkdownFlavor) -> bool {
157    flavor == MarkdownFlavor::MkDocs && mkdocs_footnotes::is_footnote_definition(line)
158}
159
160/// Check if a line is a MkDocs tab marker
161pub fn is_mkdocs_tab_line(line: &str, flavor: MarkdownFlavor) -> bool {
162    flavor == MarkdownFlavor::MkDocs && mkdocs_tabs::is_tab_marker(line)
163}
164
165/// Check if a line is a MkDocstrings autodoc marker
166pub fn is_mkdocstrings_autodoc_line(line: &str, flavor: MarkdownFlavor) -> bool {
167    flavor == MarkdownFlavor::MkDocs && mkdocstrings_refs::is_autodoc_marker(line)
168}
169
170/// Check if a line contains MkDocs Critic Markup
171pub fn is_mkdocs_critic_line(line: &str, flavor: MarkdownFlavor) -> bool {
172    flavor == MarkdownFlavor::MkDocs && mkdocs_critic::contains_critic_markup(line)
173}
174
175/// Check if a byte position is within an HTML comment
176pub fn is_in_html_comment(content: &str, byte_pos: usize) -> bool {
177    for m in HTML_COMMENT_PATTERN.find_iter(content) {
178        if m.start() <= byte_pos && byte_pos < m.end() {
179            return true;
180        }
181    }
182    false
183}
184
185/// Check if a byte position is within an HTML tag
186pub fn is_in_html_tag(ctx: &LintContext, byte_pos: usize) -> bool {
187    for html_tag in ctx.html_tags().iter() {
188        if html_tag.byte_offset <= byte_pos && byte_pos < html_tag.byte_end {
189            return true;
190        }
191    }
192    false
193}
194
195/// Check if a byte position is within a math context (block or inline)
196pub fn is_in_math_context(ctx: &LintContext, byte_pos: usize) -> bool {
197    let content = ctx.content;
198
199    // Check if we're in a math block
200    if is_in_math_block(content, byte_pos) {
201        return true;
202    }
203
204    // Check if we're in inline math
205    if is_in_inline_math(content, byte_pos) {
206        return true;
207    }
208
209    false
210}
211
212/// Check if a byte position is within a math block ($$...$$)
213pub fn is_in_math_block(content: &str, byte_pos: usize) -> bool {
214    let mut in_math_block = false;
215    let mut current_pos = 0;
216
217    for line in content.lines() {
218        let line_start = current_pos;
219        let line_end = current_pos + line.len();
220
221        // Check if this line is a math block delimiter
222        if is_math_block_delimiter(line) {
223            if byte_pos >= line_start && byte_pos <= line_end {
224                // Position is on the delimiter line itself
225                return true;
226            }
227            in_math_block = !in_math_block;
228        } else if in_math_block && byte_pos >= line_start && byte_pos <= line_end {
229            // Position is inside a math block
230            return true;
231        }
232
233        current_pos = line_end + 1; // +1 for newline
234    }
235
236    false
237}
238
239/// Check if a byte position is within inline math ($...$)
240pub fn is_in_inline_math(content: &str, byte_pos: usize) -> bool {
241    // Find all inline math spans
242    for m in INLINE_MATH_REGEX.find_iter(content) {
243        if m.start() <= byte_pos && byte_pos < m.end() {
244            return true;
245        }
246    }
247    false
248}
249
250/// Check if a position is within a table cell
251pub fn is_in_table_cell(ctx: &LintContext, line_num: usize, _col: usize) -> bool {
252    // Check if this line is part of a table
253    for table_row in ctx.table_rows().iter() {
254        if table_row.line == line_num {
255            // This line is part of a table
256            // For now, we'll skip the entire table row
257            // Future enhancement: check specific column boundaries
258            return true;
259        }
260    }
261    false
262}
263
264/// Check if a line contains table syntax
265pub fn is_table_line(line: &str) -> bool {
266    let trimmed = line.trim();
267
268    // Check for table separator line
269    if trimmed
270        .chars()
271        .all(|c| c == '|' || c == '-' || c == ':' || c.is_whitespace())
272        && trimmed.contains('|')
273        && trimmed.contains('-')
274    {
275        return true;
276    }
277
278    // Check for table content line (starts and/or ends with |)
279    if (trimmed.starts_with('|') || trimmed.ends_with('|')) && trimmed.matches('|').count() >= 2 {
280        return true;
281    }
282
283    false
284}
285
286#[cfg(test)]
287mod tests {
288    use super::*;
289
290    #[test]
291    fn test_html_comment_detection() {
292        let content = "Text <!-- comment --> more text";
293        assert!(is_in_html_comment(content, 10)); // Inside comment
294        assert!(!is_in_html_comment(content, 0)); // Before comment
295        assert!(!is_in_html_comment(content, 25)); // After comment
296    }
297
298    #[test]
299    fn test_math_block_detection() {
300        let content = "Text\n$$\nmath content\n$$\nmore text";
301        assert!(is_in_math_block(content, 8)); // On opening $$
302        assert!(is_in_math_block(content, 15)); // Inside math block
303        assert!(!is_in_math_block(content, 0)); // Before math block
304        assert!(!is_in_math_block(content, 30)); // After math block
305    }
306
307    #[test]
308    fn test_inline_math_detection() {
309        let content = "Text $x + y$ and $$a^2 + b^2$$ here";
310        assert!(is_in_inline_math(content, 7)); // Inside first math
311        assert!(is_in_inline_math(content, 20)); // Inside second math
312        assert!(!is_in_inline_math(content, 0)); // Before math
313        assert!(!is_in_inline_math(content, 35)); // After math
314    }
315
316    #[test]
317    fn test_table_line_detection() {
318        assert!(is_table_line("| Header | Column |"));
319        assert!(is_table_line("|--------|--------|"));
320        assert!(is_table_line("| Cell 1 | Cell 2 |"));
321        assert!(!is_table_line("Regular text"));
322        assert!(!is_table_line("Just a pipe | here"));
323    }
324
325    #[test]
326    fn test_is_in_front_matter() {
327        // Test YAML frontmatter
328        let yaml_content = r#"---
329title: "My Post"
330tags: ["test", "example"]
331---
332
333# Content"#;
334
335        assert!(
336            is_in_front_matter(yaml_content, 0),
337            "Line 1 should be in YAML front matter"
338        );
339        assert!(
340            is_in_front_matter(yaml_content, 2),
341            "Line 3 should be in YAML front matter"
342        );
343        assert!(
344            is_in_front_matter(yaml_content, 3),
345            "Line 4 should be in YAML front matter"
346        );
347        assert!(
348            !is_in_front_matter(yaml_content, 4),
349            "Line 5 should NOT be in front matter"
350        );
351
352        // Test TOML frontmatter
353        let toml_content = r#"+++
354title = "My Post"
355tags = ["test", "example"]
356+++
357
358# Content"#;
359
360        assert!(
361            is_in_front_matter(toml_content, 0),
362            "Line 1 should be in TOML front matter"
363        );
364        assert!(
365            is_in_front_matter(toml_content, 2),
366            "Line 3 should be in TOML front matter"
367        );
368        assert!(
369            is_in_front_matter(toml_content, 3),
370            "Line 4 should be in TOML front matter"
371        );
372        assert!(
373            !is_in_front_matter(toml_content, 4),
374            "Line 5 should NOT be in front matter"
375        );
376
377        // Test TOML blocks NOT at beginning (should not be considered front matter)
378        let mixed_content = r#"# Content
379
380+++
381title = "Not frontmatter"
382+++
383
384More content"#;
385
386        assert!(
387            !is_in_front_matter(mixed_content, 2),
388            "TOML block not at beginning should NOT be front matter"
389        );
390        assert!(
391            !is_in_front_matter(mixed_content, 3),
392            "TOML block not at beginning should NOT be front matter"
393        );
394        assert!(
395            !is_in_front_matter(mixed_content, 4),
396            "TOML block not at beginning should NOT be front matter"
397        );
398    }
399}