rumdl_lib/utils/
mkdocstrings_refs.rs

1/// MkDocstrings cross-references detection utilities
2///
3/// MkDocstrings provides automatic cross-references to documented code objects
4/// using special syntax patterns for Python, JavaScript, and other languages.
5///
6/// Common patterns:
7/// - `::: module.Class` - Auto-doc insertion
8/// - `[module.Class][]` - Cross-reference link
9/// - `[text][module.Class]` - Cross-reference with custom text
10/// - `::: module.Class` with options block (YAML indented)
11use lazy_static::lazy_static;
12use regex::Regex;
13
14lazy_static! {
15    /// Pattern to match auto-doc insertion markers
16    /// ::: module.path.ClassName or ::: handler:module.path
17    /// Lenient: accepts any non-whitespace after ::: to detect potentially dangerous patterns
18    /// Security validation should happen at a different layer (e.g., a specific rule)
19    static ref AUTODOC_MARKER: Regex = Regex::new(
20        r"^(\s*):::\s+\S+.*$"  // Just need non-whitespace after :::
21    ).unwrap();
22
23    /// Pattern to match cross-reference links in various forms
24    /// [module.Class][], [text][module.Class], [module.Class]
25    static ref CROSSREF_PATTERN: Regex = Regex::new(
26        r"\[(?:[^\]]*)\]\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]|\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]\[\]"
27    ).unwrap();
28
29    /// Pattern to match handler options in YAML format (indented under :::)
30    static ref HANDLER_OPTIONS: Regex = Regex::new(
31        r"^(\s{4,})\w+:"
32    ).unwrap();
33
34    /// Pattern to validate module/class names
35    static ref VALID_MODULE_PATH: Regex = Regex::new(
36        r"^[a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*$"
37    ).unwrap();
38}
39
40/// Check if a line is an auto-doc insertion marker
41pub fn is_autodoc_marker(line: &str) -> bool {
42    // First check with regex
43    if !AUTODOC_MARKER.is_match(line) {
44        return false;
45    }
46
47    // Additional validation: reject obviously malformed paths
48    // like consecutive dots (module..Class) which Python/JS would reject
49    let trimmed = line.trim();
50    if let Some(start) = trimmed.find(":::") {
51        let after_marker = &trimmed[start + 3..].trim();
52        // Get the module path (first non-whitespace token)
53        if let Some(module_path) = after_marker.split_whitespace().next() {
54            // Reject paths with consecutive dots/colons or starting/ending with separator
55            if module_path.starts_with('.') || module_path.starts_with(':') {
56                return false; // Can't start with separator
57            }
58            if module_path.ends_with('.') || module_path.ends_with(':') {
59                return false; // Can't end with separator
60            }
61            if module_path.contains("..")
62                || module_path.contains("::")
63                || module_path.contains(".:")
64                || module_path.contains(":.")
65            {
66                return false; // No consecutive separators
67            }
68        }
69    }
70
71    // For a linter, we want to be lenient and detect most autodoc-like syntax
72    // even if it contains dangerous or potentially invalid module paths
73    // A separate rule can validate and warn about dangerous patterns
74    true
75}
76
77/// Check if a line contains cross-reference links
78pub fn contains_crossref(line: &str) -> bool {
79    CROSSREF_PATTERN.is_match(line)
80}
81
82/// Get the indentation level of an autodoc marker
83pub fn get_autodoc_indent(line: &str) -> Option<usize> {
84    if AUTODOC_MARKER.is_match(line) {
85        // Use consistent indentation calculation (tabs = 4 spaces)
86        return Some(super::mkdocs_common::get_line_indent(line));
87    }
88    None
89}
90
91/// Check if a line is part of autodoc options (YAML format)
92pub fn is_autodoc_options(line: &str, base_indent: usize) -> bool {
93    // Options must be indented at least 4 spaces more than the ::: marker
94    let line_indent = super::mkdocs_common::get_line_indent(line);
95
96    // Check if properly indented (at least 4 spaces from base)
97    if line_indent >= base_indent + 4 {
98        // Empty lines that are properly indented are considered part of options
99        if line.trim().is_empty() {
100            return true;
101        }
102
103        // YAML key-value pairs
104        if line.contains(':') {
105            return true;
106        }
107        // YAML list items
108        let trimmed = line.trim_start();
109        if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
110            return true;
111        }
112    }
113
114    false
115}
116
117/// Check if content at a byte position is within an autodoc block
118pub fn is_within_autodoc_block(content: &str, position: usize) -> bool {
119    let lines: Vec<&str> = content.lines().collect();
120    let mut byte_pos = 0;
121    let mut in_autodoc = false;
122    let mut autodoc_indent = 0;
123
124    for line in lines {
125        let line_end = byte_pos + line.len();
126
127        // Check if we're starting an autodoc block
128        if is_autodoc_marker(line) {
129            in_autodoc = true;
130            autodoc_indent = get_autodoc_indent(line).unwrap_or(0);
131            // Check if position is on the autodoc marker line itself
132            if byte_pos <= position && position <= line_end {
133                return true;
134            }
135        } else if in_autodoc {
136            // Check if we're still in autodoc options
137            if is_autodoc_options(line, autodoc_indent) {
138                // This line is part of autodoc options
139                if byte_pos <= position && position <= line_end {
140                    return true;
141                }
142            } else {
143                // Not part of options - check if this ends the block
144                // Completely empty lines (no indentation) don't end the block
145                if line.is_empty() {
146                    // Continue in autodoc
147                } else {
148                    // Non-option, non-empty line ends the autodoc block
149                    in_autodoc = false;
150                    autodoc_indent = 0;
151                    // If the position is on this line, it's NOT in the autodoc block
152                    // (since we just ended the block)
153                    if byte_pos <= position && position <= line_end {
154                        return false;
155                    }
156                }
157            }
158        }
159
160        // Account for newline character
161        byte_pos = line_end + 1;
162    }
163
164    false
165}
166
167/// Check if a reference should be treated as a cross-reference (not a broken link)
168pub fn is_valid_crossref(ref_text: &str) -> bool {
169    // Cross-references typically follow module.Class or module:function patterns
170    // They often contain dots or colons
171    ref_text.contains('.') || ref_text.contains(':')
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177
178    #[test]
179    fn test_autodoc_marker_detection() {
180        assert!(is_autodoc_marker("::: mymodule.MyClass"));
181        assert!(is_autodoc_marker("::: package.module.Class"));
182        assert!(is_autodoc_marker("  ::: indented.Class"));
183        assert!(is_autodoc_marker("::: module:function"));
184        assert!(!is_autodoc_marker(":: Wrong number"));
185        assert!(!is_autodoc_marker("Regular text"));
186    }
187
188    #[test]
189    fn test_crossref_detection() {
190        assert!(contains_crossref("See [module.Class][]"));
191        assert!(contains_crossref("The [text][module.Class] here"));
192        assert!(contains_crossref("[package.module.Class][]"));
193        assert!(contains_crossref("[custom text][module:function]"));
194        assert!(!contains_crossref("Regular [link](url)"));
195        assert!(!contains_crossref("No references here"));
196    }
197
198    #[test]
199    fn test_autodoc_options() {
200        assert!(is_autodoc_options("    handler: python", 0));
201        assert!(is_autodoc_options("    options:", 0));
202        assert!(is_autodoc_options("      show_source: true", 0));
203        assert!(!is_autodoc_options("", 0)); // Empty lines are neutral
204        assert!(!is_autodoc_options("Not indented", 0));
205        assert!(!is_autodoc_options("  Only 2 spaces", 0));
206        // Test YAML list items
207        assert!(is_autodoc_options("            - window", 0));
208        assert!(is_autodoc_options("            - app", 0));
209    }
210
211    #[test]
212    fn test_within_autodoc_block() {
213        let content = r#"# API Documentation
214
215::: mymodule.MyClass
216    handler: python
217    options:
218      show_source: true
219      show_root_heading: true
220
221Regular text here.
222
223::: another.Class
224
225More text."#;
226
227        let handler_pos = content.find("handler:").unwrap();
228        let options_pos = content.find("show_source:").unwrap();
229        let regular_pos = content.find("Regular text").unwrap();
230        let more_pos = content.find("More text").unwrap();
231
232        assert!(is_within_autodoc_block(content, handler_pos));
233        assert!(is_within_autodoc_block(content, options_pos));
234        assert!(!is_within_autodoc_block(content, regular_pos));
235        assert!(!is_within_autodoc_block(content, more_pos));
236    }
237
238    #[test]
239    fn test_valid_crossref() {
240        assert!(is_valid_crossref("module.Class"));
241        assert!(is_valid_crossref("package.module.Class"));
242        assert!(is_valid_crossref("module:function"));
243        assert!(is_valid_crossref("numpy.ndarray"));
244        assert!(!is_valid_crossref("simple_word"));
245        assert!(!is_valid_crossref("no-dots-here"));
246    }
247}