rumdl_lib/utils/
mkdocstrings_refs.rs

1/// MkDocstrings cross-references detection utilities
2///
3/// MkDocstrings provides automatic cross-references to documented code objects
4/// using special syntax patterns for Python, JavaScript, and other languages.
5///
6/// Common patterns:
7/// - `::: module.Class` - Auto-doc insertion
8/// - `[module.Class][]` - Cross-reference link
9/// - `[text][module.Class]` - Cross-reference with custom text
10/// - `::: module.Class` with options block (YAML indented)
11use lazy_static::lazy_static;
12use regex::Regex;
13
14lazy_static! {
15    /// Pattern to match auto-doc insertion markers
16    /// ::: module.path.ClassName or ::: handler:module.path
17    /// Lenient: accepts any non-whitespace after ::: to detect potentially dangerous patterns
18    /// Security validation should happen at a different layer (e.g., a specific rule)
19    static ref AUTODOC_MARKER: Regex = Regex::new(
20        r"^(\s*):::\s+\S+.*$"  // Just need non-whitespace after :::
21    ).unwrap();
22
23    /// Pattern to match cross-reference links in various forms
24    /// [module.Class][], [text][module.Class], [module.Class]
25    static ref CROSSREF_PATTERN: Regex = Regex::new(
26        r"\[(?:[^\]]*)\]\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]|\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]\[\]"
27    ).unwrap();
28
29    /// Pattern to match handler options in YAML format (indented under :::)
30    static ref HANDLER_OPTIONS: Regex = Regex::new(
31        r"^(\s{4,})\w+:"
32    ).unwrap();
33
34    /// Pattern to validate module/class names
35    static ref VALID_MODULE_PATH: Regex = Regex::new(
36        r"^[a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*$"
37    ).unwrap();
38}
39
40/// Check if a line is an auto-doc insertion marker
41pub fn is_autodoc_marker(line: &str) -> bool {
42    // First check with regex
43    if !AUTODOC_MARKER.is_match(line) {
44        return false;
45    }
46
47    // Additional validation: reject obviously malformed paths
48    // like consecutive dots (module..Class) which Python/JS would reject
49    let trimmed = line.trim();
50    if let Some(start) = trimmed.find(":::") {
51        let after_marker = &trimmed[start + 3..].trim();
52        // Get the module path (first non-whitespace token)
53        if let Some(module_path) = after_marker.split_whitespace().next() {
54            // Reject paths with consecutive dots/colons or starting/ending with separator
55            if module_path.starts_with('.') || module_path.starts_with(':') {
56                return false; // Can't start with separator
57            }
58            if module_path.ends_with('.') || module_path.ends_with(':') {
59                return false; // Can't end with separator
60            }
61            if module_path.contains("..")
62                || module_path.contains("::")
63                || module_path.contains(".:")
64                || module_path.contains(":.")
65            {
66                return false; // No consecutive separators
67            }
68        }
69    }
70
71    // For a linter, we want to be lenient and detect most autodoc-like syntax
72    // even if it contains dangerous or potentially invalid module paths
73    // A separate rule can validate and warn about dangerous patterns
74    true
75}
76
77/// Check if a line contains cross-reference links
78pub fn contains_crossref(line: &str) -> bool {
79    CROSSREF_PATTERN.is_match(line)
80}
81
82/// Get the indentation level of an autodoc marker
83pub fn get_autodoc_indent(line: &str) -> Option<usize> {
84    if AUTODOC_MARKER.is_match(line) {
85        // Use consistent indentation calculation (tabs = 4 spaces)
86        return Some(super::mkdocs_common::get_line_indent(line));
87    }
88    None
89}
90
91/// Check if a line is part of autodoc options (YAML format)
92pub fn is_autodoc_options(line: &str, base_indent: usize) -> bool {
93    // Options must be indented at least 4 spaces more than the ::: marker
94    let line_indent = super::mkdocs_common::get_line_indent(line);
95
96    // Empty lines within options are allowed
97    if line.trim().is_empty() {
98        return true;
99    }
100
101    // Check if it looks like YAML options (key: value format)
102    if line_indent >= base_indent + 4 && line.contains(':') {
103        return true;
104    }
105
106    false
107}
108
109/// Check if content at a byte position is within an autodoc block
110pub fn is_within_autodoc_block(content: &str, position: usize) -> bool {
111    let lines: Vec<&str> = content.lines().collect();
112    let mut byte_pos = 0;
113    let mut in_autodoc = false;
114    let mut autodoc_indent = 0;
115
116    for line in lines {
117        let line_end = byte_pos + line.len();
118
119        // Check if we're starting an autodoc block
120        if is_autodoc_marker(line) {
121            in_autodoc = true;
122            autodoc_indent = get_autodoc_indent(line).unwrap_or(0);
123        } else if in_autodoc {
124            // Check if we're still in autodoc options
125            if !is_autodoc_options(line, autodoc_indent) && !line.trim().is_empty() {
126                // Non-option line that's not empty ends the autodoc block
127                in_autodoc = false;
128                autodoc_indent = 0;
129            }
130        }
131
132        // Check if the position is within this line and we're in an autodoc block
133        if byte_pos <= position && position <= line_end && in_autodoc {
134            return true;
135        }
136
137        // Account for newline character
138        byte_pos = line_end + 1;
139    }
140
141    false
142}
143
144/// Check if a reference should be treated as a cross-reference (not a broken link)
145pub fn is_valid_crossref(ref_text: &str) -> bool {
146    // Cross-references typically follow module.Class or module:function patterns
147    // They often contain dots or colons
148    ref_text.contains('.') || ref_text.contains(':')
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    #[test]
156    fn test_autodoc_marker_detection() {
157        assert!(is_autodoc_marker("::: mymodule.MyClass"));
158        assert!(is_autodoc_marker("::: package.module.Class"));
159        assert!(is_autodoc_marker("  ::: indented.Class"));
160        assert!(is_autodoc_marker("::: module:function"));
161        assert!(!is_autodoc_marker(":: Wrong number"));
162        assert!(!is_autodoc_marker("Regular text"));
163    }
164
165    #[test]
166    fn test_crossref_detection() {
167        assert!(contains_crossref("See [module.Class][]"));
168        assert!(contains_crossref("The [text][module.Class] here"));
169        assert!(contains_crossref("[package.module.Class][]"));
170        assert!(contains_crossref("[custom text][module:function]"));
171        assert!(!contains_crossref("Regular [link](url)"));
172        assert!(!contains_crossref("No references here"));
173    }
174
175    #[test]
176    fn test_autodoc_options() {
177        assert!(is_autodoc_options("    handler: python", 0));
178        assert!(is_autodoc_options("    options:", 0));
179        assert!(is_autodoc_options("      show_source: true", 0));
180        assert!(is_autodoc_options("", 0)); // Empty lines allowed
181        assert!(!is_autodoc_options("Not indented", 0));
182        assert!(!is_autodoc_options("  Only 2 spaces", 0));
183    }
184
185    #[test]
186    fn test_within_autodoc_block() {
187        let content = r#"# API Documentation
188
189::: mymodule.MyClass
190    handler: python
191    options:
192      show_source: true
193      show_root_heading: true
194
195Regular text here.
196
197::: another.Class
198
199More text."#;
200
201        let handler_pos = content.find("handler:").unwrap();
202        let options_pos = content.find("show_source:").unwrap();
203        let regular_pos = content.find("Regular text").unwrap();
204        let more_pos = content.find("More text").unwrap();
205
206        assert!(is_within_autodoc_block(content, handler_pos));
207        assert!(is_within_autodoc_block(content, options_pos));
208        assert!(!is_within_autodoc_block(content, regular_pos));
209        assert!(!is_within_autodoc_block(content, more_pos));
210    }
211
212    #[test]
213    fn test_valid_crossref() {
214        assert!(is_valid_crossref("module.Class"));
215        assert!(is_valid_crossref("package.module.Class"));
216        assert!(is_valid_crossref("module:function"));
217        assert!(is_valid_crossref("numpy.ndarray"));
218        assert!(!is_valid_crossref("simple_word"));
219        assert!(!is_valid_crossref("no-dots-here"));
220    }
221}