Skip to main content

rumdl_lib/utils/
mkdocstrings_refs.rs

1use regex::Regex;
2/// MkDocstrings cross-references detection utilities
3///
4/// MkDocstrings provides automatic cross-references to documented code objects
5/// using special syntax patterns for Python, JavaScript, and other languages.
6///
7/// Common patterns:
8/// - `::: module.Class` - Auto-doc insertion
9/// - `[module.Class][]` - Cross-reference link
10/// - `[text][module.Class]` - Cross-reference with custom text
11/// - `::: module.Class` with options block (YAML indented)
12use std::sync::LazyLock;
13
14/// Pre-filter regex for auto-doc insertion markers.
15/// Matches any `:::` followed by non-whitespace. The actual validation
16/// (requiring `.` or `:` separators, rejecting Pandoc syntax) happens
17/// in `is_autodoc_marker()`.
18static AUTODOC_MARKER: LazyLock<Regex> = LazyLock::new(|| {
19    Regex::new(
20        r"^(\s*):::\s+\S+.*$", // Pre-filter: any non-whitespace after :::
21    )
22    .unwrap()
23});
24
25/// Pattern to match cross-reference links in various forms
26/// [module.Class][], [text][module.Class], [module.Class]
27static CROSSREF_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
28    Regex::new(
29        r"\[(?:[^\]]*)\]\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]|\[[a-zA-Z_][a-zA-Z0-9_]*(?:[:\.][a-zA-Z_][a-zA-Z0-9_]*)*\]\[\]"
30    ).unwrap()
31});
32
33/// Check if a line is an auto-doc insertion marker
34///
35/// Matches mkdocstrings syntax `::: module.Class` but NOT Pandoc fenced divs
36/// like `::: warning` or `::: {.note}`. The key distinction is that autodoc
37/// paths contain at least one `.` or `:` separator (e.g., `package.module`,
38/// `handler:path`), while Pandoc divs use plain words or `{}`-wrapped classes.
39pub fn is_autodoc_marker(line: &str) -> bool {
40    // First check with regex
41    if !AUTODOC_MARKER.is_match(line) {
42        return false;
43    }
44
45    let trimmed = line.trim();
46    if let Some(start) = trimmed.find(":::") {
47        let after_marker = &trimmed[start + 3..].trim();
48        // Get the module path (first non-whitespace token)
49        if let Some(module_path) = after_marker.split_whitespace().next() {
50            // Reject Pandoc attribute syntax: ::: {.note}, ::: {#id .class}
51            if module_path.starts_with('{') {
52                return false;
53            }
54
55            // Require at least one `.` or `:` separator to distinguish module
56            // paths (package.module.Class, handler:module) from Pandoc fenced
57            // div names (warning, note, danger)
58            if !module_path.contains('.') && !module_path.contains(':') {
59                return false;
60            }
61
62            // Reject malformed paths: can't start/end with separator
63            if module_path.starts_with('.') || module_path.starts_with(':') {
64                return false;
65            }
66            if module_path.ends_with('.') || module_path.ends_with(':') {
67                return false;
68            }
69            // Reject consecutive separators (module..Class, handler::path)
70            if module_path.contains("..")
71                || module_path.contains("::")
72                || module_path.contains(".:")
73                || module_path.contains(":.")
74            {
75                return false;
76            }
77        }
78    }
79
80    true
81}
82
83/// Check if a line contains cross-reference links
84pub fn contains_crossref(line: &str) -> bool {
85    CROSSREF_PATTERN.is_match(line)
86}
87
88/// Get the indentation level of an autodoc marker
89pub fn get_autodoc_indent(line: &str) -> Option<usize> {
90    if is_autodoc_marker(line) {
91        return Some(super::mkdocs_common::get_line_indent(line));
92    }
93    None
94}
95
96/// Check if a line is part of autodoc options (YAML format)
97pub fn is_autodoc_options(line: &str, base_indent: usize) -> bool {
98    // Options must be indented at least 4 spaces more than the ::: marker
99    let line_indent = super::mkdocs_common::get_line_indent(line);
100
101    // Check if properly indented (at least 4 spaces from base)
102    if line_indent >= base_indent + 4 {
103        // Empty lines that are properly indented are considered part of options
104        if line.trim().is_empty() {
105            return true;
106        }
107
108        // YAML key-value pairs
109        if line.contains(':') {
110            return true;
111        }
112        // YAML list items
113        let trimmed = line.trim_start();
114        if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
115            return true;
116        }
117    }
118
119    false
120}
121
122/// Pre-compute all autodoc block ranges in the content
123/// Returns a sorted vector of byte ranges for efficient lookup
124pub fn detect_autodoc_block_ranges(content: &str) -> Vec<crate::utils::skip_context::ByteRange> {
125    let mut ranges = Vec::new();
126    let lines: Vec<&str> = content.lines().collect();
127    let mut byte_pos = 0;
128    let mut in_autodoc = false;
129    let mut autodoc_indent = 0;
130    let mut block_start = 0;
131
132    for line in lines {
133        let line_end = byte_pos + line.len();
134
135        // Check if we're starting an autodoc block
136        if is_autodoc_marker(line) {
137            in_autodoc = true;
138            autodoc_indent = get_autodoc_indent(line).unwrap_or(0);
139            block_start = byte_pos;
140        } else if in_autodoc {
141            // Check if we're still in autodoc options
142            if is_autodoc_options(line, autodoc_indent) {
143                // Continue in autodoc block
144            } else {
145                // Not part of options - check if this ends the block
146                // Completely empty lines (no indentation) don't end the block
147                if line.is_empty() {
148                    // Continue in autodoc
149                } else {
150                    // Non-option, non-empty line ends the autodoc block
151                    // Save the range up to the previous line
152                    ranges.push(crate::utils::skip_context::ByteRange {
153                        start: block_start,
154                        end: byte_pos.saturating_sub(1), // Don't include the newline before this line
155                    });
156                    in_autodoc = false;
157                    autodoc_indent = 0;
158                }
159            }
160        }
161
162        // Account for newline character
163        byte_pos = line_end + 1;
164    }
165
166    // If we ended while still in an autodoc block, save it
167    if in_autodoc {
168        ranges.push(crate::utils::skip_context::ByteRange {
169            start: block_start,
170            end: byte_pos.saturating_sub(1),
171        });
172    }
173
174    ranges
175}
176
177/// Check if a position is within any of the pre-computed autodoc block ranges
178pub fn is_within_autodoc_block_ranges(ranges: &[crate::utils::skip_context::ByteRange], position: usize) -> bool {
179    crate::utils::skip_context::is_in_html_comment_ranges(ranges, position)
180}
181
182/// Check if a reference should be treated as a cross-reference (not a broken link)
183pub fn is_valid_crossref(ref_text: &str) -> bool {
184    // Cross-references typically follow module.Class or module:function patterns
185    // They often contain dots or colons
186    ref_text.contains('.') || ref_text.contains(':')
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192
193    #[test]
194    fn test_autodoc_marker_detection() {
195        // Valid mkdocstrings autodoc markers (dotted or colon-separated paths)
196        assert!(is_autodoc_marker("::: mymodule.MyClass"));
197        assert!(is_autodoc_marker("::: package.module.Class"));
198        assert!(is_autodoc_marker("  ::: indented.Class"));
199        assert!(is_autodoc_marker("::: module:function"));
200        assert!(is_autodoc_marker("::: handler:package.module"));
201        assert!(is_autodoc_marker("::: a.b"));
202
203        // Not autodoc: wrong syntax
204        assert!(!is_autodoc_marker(":: Wrong number"));
205        assert!(!is_autodoc_marker("Regular text"));
206        assert!(!is_autodoc_marker(":::"));
207        assert!(!is_autodoc_marker(":::    "));
208
209        // Not autodoc: Pandoc fenced divs (plain words, no separator)
210        assert!(!is_autodoc_marker("::: warning"));
211        assert!(!is_autodoc_marker("::: note"));
212        assert!(!is_autodoc_marker("::: danger"));
213        assert!(!is_autodoc_marker("::: sidebar"));
214        assert!(!is_autodoc_marker("  ::: callout"));
215
216        // Not autodoc: Pandoc attribute syntax
217        assert!(!is_autodoc_marker("::: {.note}"));
218        assert!(!is_autodoc_marker("::: {#myid .warning}"));
219        assert!(!is_autodoc_marker("::: {.note .important}"));
220
221        // Not autodoc: malformed paths
222        assert!(!is_autodoc_marker("::: .starts.with.dot"));
223        assert!(!is_autodoc_marker("::: ends.with.dot."));
224        assert!(!is_autodoc_marker("::: has..consecutive.dots"));
225        assert!(!is_autodoc_marker("::: :starts.with.colon"));
226    }
227
228    #[test]
229    fn test_crossref_detection() {
230        assert!(contains_crossref("See [module.Class][]"));
231        assert!(contains_crossref("The [text][module.Class] here"));
232        assert!(contains_crossref("[package.module.Class][]"));
233        assert!(contains_crossref("[custom text][module:function]"));
234        assert!(!contains_crossref("Regular [link](url)"));
235        assert!(!contains_crossref("No references here"));
236    }
237
238    #[test]
239    fn test_autodoc_options() {
240        assert!(is_autodoc_options("    handler: python", 0));
241        assert!(is_autodoc_options("    options:", 0));
242        assert!(is_autodoc_options("      show_source: true", 0));
243        assert!(!is_autodoc_options("", 0)); // Empty lines are neutral
244        assert!(!is_autodoc_options("Not indented", 0));
245        assert!(!is_autodoc_options("  Only 2 spaces", 0));
246        // Test YAML list items
247        assert!(is_autodoc_options("            - window", 0));
248        assert!(is_autodoc_options("            - app", 0));
249    }
250
251    #[test]
252    fn test_valid_crossref() {
253        assert!(is_valid_crossref("module.Class"));
254        assert!(is_valid_crossref("package.module.Class"));
255        assert!(is_valid_crossref("module:function"));
256        assert!(is_valid_crossref("numpy.ndarray"));
257        assert!(!is_valid_crossref("simple_word"));
258        assert!(!is_valid_crossref("no-dots-here"));
259    }
260}