rumdl_lib/utils/
mkdocstrings_refs.rs

1use regex::Regex;
2/// MkDocstrings cross-references detection utilities
3///
4/// MkDocstrings provides automatic cross-references to documented code objects
5/// using special syntax patterns for Python, JavaScript, and other languages.
6///
7/// Common patterns:
8/// - `::: module.Class` - Auto-doc insertion
9/// - `[module.Class][]` - Cross-reference link
10/// - `[text][module.Class]` - Cross-reference with custom text
11/// - `::: module.Class` with options block (YAML indented)
12use std::sync::LazyLock;
13
14/// Pattern to match auto-doc insertion markers
15/// ::: module.path.ClassName or ::: handler:module.path
16/// Lenient: accepts any non-whitespace after ::: to detect potentially dangerous patterns
17/// Security validation should happen at a different layer (e.g., a specific rule)
18static AUTODOC_MARKER: LazyLock<Regex> = LazyLock::new(|| {
19    Regex::new(
20        r"^(\s*):::\s+\S+.*$", // Just need non-whitespace after :::
21    )
22    .unwrap()
23});
24
25/// Pattern to match cross-reference links in various forms
26/// [module.Class][], [text][module.Class], [module.Class]
27static CROSSREF_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
28    Regex::new(
29        r"\[(?:[^\]]*)\]\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]|\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]\[\]"
30    ).unwrap()
31});
32
33/// Check if a line is an auto-doc insertion marker
34pub fn is_autodoc_marker(line: &str) -> bool {
35    // First check with regex
36    if !AUTODOC_MARKER.is_match(line) {
37        return false;
38    }
39
40    // Additional validation: reject obviously malformed paths
41    // like consecutive dots (module..Class) which Python/JS would reject
42    let trimmed = line.trim();
43    if let Some(start) = trimmed.find(":::") {
44        let after_marker = &trimmed[start + 3..].trim();
45        // Get the module path (first non-whitespace token)
46        if let Some(module_path) = after_marker.split_whitespace().next() {
47            // Reject paths with consecutive dots/colons or starting/ending with separator
48            if module_path.starts_with('.') || module_path.starts_with(':') {
49                return false; // Can't start with separator
50            }
51            if module_path.ends_with('.') || module_path.ends_with(':') {
52                return false; // Can't end with separator
53            }
54            if module_path.contains("..")
55                || module_path.contains("::")
56                || module_path.contains(".:")
57                || module_path.contains(":.")
58            {
59                return false; // No consecutive separators
60            }
61        }
62    }
63
64    // For a linter, we want to be lenient and detect most autodoc-like syntax
65    // even if it contains dangerous or potentially invalid module paths
66    // A separate rule can validate and warn about dangerous patterns
67    true
68}
69
70/// Check if a line contains cross-reference links
71pub fn contains_crossref(line: &str) -> bool {
72    CROSSREF_PATTERN.is_match(line)
73}
74
75/// Get the indentation level of an autodoc marker
76pub fn get_autodoc_indent(line: &str) -> Option<usize> {
77    if AUTODOC_MARKER.is_match(line) {
78        // Use consistent indentation calculation (tabs = 4 spaces)
79        return Some(super::mkdocs_common::get_line_indent(line));
80    }
81    None
82}
83
84/// Check if a line is part of autodoc options (YAML format)
85pub fn is_autodoc_options(line: &str, base_indent: usize) -> bool {
86    // Options must be indented at least 4 spaces more than the ::: marker
87    let line_indent = super::mkdocs_common::get_line_indent(line);
88
89    // Check if properly indented (at least 4 spaces from base)
90    if line_indent >= base_indent + 4 {
91        // Empty lines that are properly indented are considered part of options
92        if line.trim().is_empty() {
93            return true;
94        }
95
96        // YAML key-value pairs
97        if line.contains(':') {
98            return true;
99        }
100        // YAML list items
101        let trimmed = line.trim_start();
102        if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
103            return true;
104        }
105    }
106
107    false
108}
109
110/// Pre-compute all autodoc block ranges in the content
111/// Returns a sorted vector of byte ranges for efficient lookup
112pub fn detect_autodoc_block_ranges(content: &str) -> Vec<crate::utils::skip_context::ByteRange> {
113    let mut ranges = Vec::new();
114    let lines: Vec<&str> = content.lines().collect();
115    let mut byte_pos = 0;
116    let mut in_autodoc = false;
117    let mut autodoc_indent = 0;
118    let mut block_start = 0;
119
120    for line in lines {
121        let line_end = byte_pos + line.len();
122
123        // Check if we're starting an autodoc block
124        if is_autodoc_marker(line) {
125            in_autodoc = true;
126            autodoc_indent = get_autodoc_indent(line).unwrap_or(0);
127            block_start = byte_pos;
128        } else if in_autodoc {
129            // Check if we're still in autodoc options
130            if is_autodoc_options(line, autodoc_indent) {
131                // Continue in autodoc block
132            } else {
133                // Not part of options - check if this ends the block
134                // Completely empty lines (no indentation) don't end the block
135                if line.is_empty() {
136                    // Continue in autodoc
137                } else {
138                    // Non-option, non-empty line ends the autodoc block
139                    // Save the range up to the previous line
140                    ranges.push(crate::utils::skip_context::ByteRange {
141                        start: block_start,
142                        end: byte_pos.saturating_sub(1), // Don't include the newline before this line
143                    });
144                    in_autodoc = false;
145                    autodoc_indent = 0;
146                }
147            }
148        }
149
150        // Account for newline character
151        byte_pos = line_end + 1;
152    }
153
154    // If we ended while still in an autodoc block, save it
155    if in_autodoc {
156        ranges.push(crate::utils::skip_context::ByteRange {
157            start: block_start,
158            end: byte_pos.saturating_sub(1),
159        });
160    }
161
162    ranges
163}
164
165/// Check if a position is within any of the pre-computed autodoc block ranges
166pub fn is_within_autodoc_block_ranges(ranges: &[crate::utils::skip_context::ByteRange], position: usize) -> bool {
167    crate::utils::skip_context::is_in_html_comment_ranges(ranges, position)
168}
169
170/// Check if content at a byte position is within an autodoc block (DEPRECATED: use detect_autodoc_block_ranges + is_within_autodoc_block_ranges)
171pub fn is_within_autodoc_block(content: &str, position: usize) -> bool {
172    let lines: Vec<&str> = content.lines().collect();
173    let mut byte_pos = 0;
174    let mut in_autodoc = false;
175    let mut autodoc_indent = 0;
176
177    for line in lines {
178        let line_end = byte_pos + line.len();
179
180        // Check if we're starting an autodoc block
181        if is_autodoc_marker(line) {
182            in_autodoc = true;
183            autodoc_indent = get_autodoc_indent(line).unwrap_or(0);
184            // Check if position is on the autodoc marker line itself
185            if byte_pos <= position && position <= line_end {
186                return true;
187            }
188        } else if in_autodoc {
189            // Check if we're still in autodoc options
190            if is_autodoc_options(line, autodoc_indent) {
191                // This line is part of autodoc options
192                if byte_pos <= position && position <= line_end {
193                    return true;
194                }
195            } else {
196                // Not part of options - check if this ends the block
197                // Completely empty lines (no indentation) don't end the block
198                if line.is_empty() {
199                    // Continue in autodoc
200                } else {
201                    // Non-option, non-empty line ends the autodoc block
202                    in_autodoc = false;
203                    autodoc_indent = 0;
204                    // If the position is on this line, it's NOT in the autodoc block
205                    // (since we just ended the block)
206                    if byte_pos <= position && position <= line_end {
207                        return false;
208                    }
209                }
210            }
211        }
212
213        // Account for newline character
214        byte_pos = line_end + 1;
215    }
216
217    false
218}
219
220/// Check if a reference should be treated as a cross-reference (not a broken link)
221pub fn is_valid_crossref(ref_text: &str) -> bool {
222    // Cross-references typically follow module.Class or module:function patterns
223    // They often contain dots or colons
224    ref_text.contains('.') || ref_text.contains(':')
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230
231    #[test]
232    fn test_autodoc_marker_detection() {
233        assert!(is_autodoc_marker("::: mymodule.MyClass"));
234        assert!(is_autodoc_marker("::: package.module.Class"));
235        assert!(is_autodoc_marker("  ::: indented.Class"));
236        assert!(is_autodoc_marker("::: module:function"));
237        assert!(!is_autodoc_marker(":: Wrong number"));
238        assert!(!is_autodoc_marker("Regular text"));
239    }
240
241    #[test]
242    fn test_crossref_detection() {
243        assert!(contains_crossref("See [module.Class][]"));
244        assert!(contains_crossref("The [text][module.Class] here"));
245        assert!(contains_crossref("[package.module.Class][]"));
246        assert!(contains_crossref("[custom text][module:function]"));
247        assert!(!contains_crossref("Regular [link](url)"));
248        assert!(!contains_crossref("No references here"));
249    }
250
251    #[test]
252    fn test_autodoc_options() {
253        assert!(is_autodoc_options("    handler: python", 0));
254        assert!(is_autodoc_options("    options:", 0));
255        assert!(is_autodoc_options("      show_source: true", 0));
256        assert!(!is_autodoc_options("", 0)); // Empty lines are neutral
257        assert!(!is_autodoc_options("Not indented", 0));
258        assert!(!is_autodoc_options("  Only 2 spaces", 0));
259        // Test YAML list items
260        assert!(is_autodoc_options("            - window", 0));
261        assert!(is_autodoc_options("            - app", 0));
262    }
263
264    #[test]
265    fn test_within_autodoc_block() {
266        let content = r#"# API Documentation
267
268::: mymodule.MyClass
269    handler: python
270    options:
271      show_source: true
272      show_root_heading: true
273
274Regular text here.
275
276::: another.Class
277
278More text."#;
279
280        let handler_pos = content.find("handler:").unwrap();
281        let options_pos = content.find("show_source:").unwrap();
282        let regular_pos = content.find("Regular text").unwrap();
283        let more_pos = content.find("More text").unwrap();
284
285        assert!(is_within_autodoc_block(content, handler_pos));
286        assert!(is_within_autodoc_block(content, options_pos));
287        assert!(!is_within_autodoc_block(content, regular_pos));
288        assert!(!is_within_autodoc_block(content, more_pos));
289    }
290
291    #[test]
292    fn test_valid_crossref() {
293        assert!(is_valid_crossref("module.Class"));
294        assert!(is_valid_crossref("package.module.Class"));
295        assert!(is_valid_crossref("module:function"));
296        assert!(is_valid_crossref("numpy.ndarray"));
297        assert!(!is_valid_crossref("simple_word"));
298        assert!(!is_valid_crossref("no-dots-here"));
299    }
300}