Skip to main content

rumdl_lib/utils/
mkdocstrings_refs.rs

1use regex::Regex;
2/// MkDocstrings cross-references detection utilities
3///
4/// MkDocstrings provides automatic cross-references to documented code objects
5/// using special syntax patterns for Python, JavaScript, and other languages.
6///
7/// Common patterns:
8/// - `::: module.Class` - Auto-doc insertion
9/// - `[module.Class][]` - Cross-reference link
10/// - `[text][module.Class]` - Cross-reference with custom text
11/// - `::: module.Class` with options block (YAML indented)
12use std::sync::LazyLock;
13
14/// Pre-filter regex for auto-doc insertion markers.
15/// Matches any `:::` followed by non-whitespace. The actual validation
16/// (requiring `.` or `:` separators, rejecting Pandoc syntax) happens
17/// in `is_autodoc_marker()`.
18static AUTODOC_MARKER: LazyLock<Regex> = LazyLock::new(|| {
19    Regex::new(
20        r"^(\s*):::\s+\S+.*$", // Pre-filter: any non-whitespace after :::
21    )
22    .unwrap()
23});
24
25/// Check if a line is an auto-doc insertion marker
26///
27/// Matches mkdocstrings syntax `::: module.Class` but NOT Pandoc fenced divs
28/// like `::: warning` or `::: {.note}`. The key distinction is that autodoc
29/// paths contain at least one `.` or `:` separator (e.g., `package.module`,
30/// `handler:path`), while Pandoc divs use plain words or `{}`-wrapped classes.
31pub fn is_autodoc_marker(line: &str) -> bool {
32    // First check with regex
33    if !AUTODOC_MARKER.is_match(line) {
34        return false;
35    }
36
37    let trimmed = line.trim();
38    if let Some(start) = trimmed.find(":::") {
39        let after_marker = &trimmed[start + 3..].trim();
40        // Get the module path (first non-whitespace token)
41        if let Some(module_path) = after_marker.split_whitespace().next() {
42            // Reject Pandoc attribute syntax: ::: {.note}, ::: {#id .class}
43            if module_path.starts_with('{') {
44                return false;
45            }
46
47            // Require at least one `.` or `:` separator to distinguish module
48            // paths (package.module.Class, handler:module) from Pandoc fenced
49            // div names (warning, note, danger)
50            if !module_path.contains('.') && !module_path.contains(':') {
51                return false;
52            }
53
54            // Reject malformed paths: can't start/end with separator
55            if module_path.starts_with('.') || module_path.starts_with(':') {
56                return false;
57            }
58            if module_path.ends_with('.') || module_path.ends_with(':') {
59                return false;
60            }
61            // Reject consecutive separators (module..Class, handler::path)
62            if module_path.contains("..")
63                || module_path.contains("::")
64                || module_path.contains(".:")
65                || module_path.contains(":.")
66            {
67                return false;
68            }
69        }
70    }
71
72    true
73}
74
75/// Get the indentation level of an autodoc marker
76pub fn get_autodoc_indent(line: &str) -> Option<usize> {
77    if is_autodoc_marker(line) {
78        return Some(super::mkdocs_common::get_line_indent(line));
79    }
80    None
81}
82
83/// Check if a line is part of autodoc options (YAML format)
84pub fn is_autodoc_options(line: &str, base_indent: usize) -> bool {
85    // Options must be indented at least 4 spaces more than the ::: marker
86    let line_indent = super::mkdocs_common::get_line_indent(line);
87
88    // Check if properly indented (at least 4 spaces from base)
89    if line_indent >= base_indent + 4 {
90        // Empty lines that are properly indented are considered part of options
91        if line.trim().is_empty() {
92            return true;
93        }
94
95        // YAML key-value pairs
96        if line.contains(':') {
97            return true;
98        }
99        // YAML list items
100        let trimmed = line.trim_start();
101        if trimmed.starts_with("- ") || trimmed.starts_with("* ") {
102            return true;
103        }
104    }
105
106    false
107}
108
109/// Pre-compute all autodoc block ranges in the content
110/// Returns a sorted vector of byte ranges for efficient lookup
111pub fn detect_autodoc_block_ranges(content: &str) -> Vec<crate::utils::skip_context::ByteRange> {
112    let mut ranges = Vec::new();
113    let lines: Vec<&str> = content.lines().collect();
114    let mut byte_pos = 0;
115    let mut in_autodoc = false;
116    let mut autodoc_indent = 0;
117    let mut block_start = 0;
118
119    for line in lines {
120        let line_end = byte_pos + line.len();
121
122        // Check if we're starting an autodoc block
123        if is_autodoc_marker(line) {
124            in_autodoc = true;
125            autodoc_indent = get_autodoc_indent(line).unwrap_or(0);
126            block_start = byte_pos;
127        } else if in_autodoc {
128            // Check if we're still in autodoc options
129            if is_autodoc_options(line, autodoc_indent) {
130                // Continue in autodoc block
131            } else {
132                // Not part of options - check if this ends the block
133                // Completely empty lines (no indentation) don't end the block
134                if line.is_empty() {
135                    // Continue in autodoc
136                } else {
137                    // Non-option, non-empty line ends the autodoc block
138                    // Save the range up to the previous line
139                    ranges.push(crate::utils::skip_context::ByteRange {
140                        start: block_start,
141                        end: byte_pos.saturating_sub(1), // Don't include the newline before this line
142                    });
143                    in_autodoc = false;
144                    autodoc_indent = 0;
145                }
146            }
147        }
148
149        // Account for newline character
150        byte_pos = line_end + 1;
151    }
152
153    // If we ended while still in an autodoc block, save it
154    if in_autodoc {
155        ranges.push(crate::utils::skip_context::ByteRange {
156            start: block_start,
157            end: byte_pos.saturating_sub(1),
158        });
159    }
160
161    ranges
162}
163
164/// Check if a position is within any of the pre-computed autodoc block ranges
165pub fn is_within_autodoc_block_ranges(ranges: &[crate::utils::skip_context::ByteRange], position: usize) -> bool {
166    crate::utils::skip_context::is_in_html_comment_ranges(ranges, position)
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn test_autodoc_marker_detection() {
175        // Valid mkdocstrings autodoc markers (dotted or colon-separated paths)
176        assert!(is_autodoc_marker("::: mymodule.MyClass"));
177        assert!(is_autodoc_marker("::: package.module.Class"));
178        assert!(is_autodoc_marker("  ::: indented.Class"));
179        assert!(is_autodoc_marker("::: module:function"));
180        assert!(is_autodoc_marker("::: handler:package.module"));
181        assert!(is_autodoc_marker("::: a.b"));
182
183        // Not autodoc: wrong syntax
184        assert!(!is_autodoc_marker(":: Wrong number"));
185        assert!(!is_autodoc_marker("Regular text"));
186        assert!(!is_autodoc_marker(":::"));
187        assert!(!is_autodoc_marker(":::    "));
188
189        // Not autodoc: Pandoc fenced divs (plain words, no separator)
190        assert!(!is_autodoc_marker("::: warning"));
191        assert!(!is_autodoc_marker("::: note"));
192        assert!(!is_autodoc_marker("::: danger"));
193        assert!(!is_autodoc_marker("::: sidebar"));
194        assert!(!is_autodoc_marker("  ::: callout"));
195
196        // Not autodoc: Pandoc attribute syntax
197        assert!(!is_autodoc_marker("::: {.note}"));
198        assert!(!is_autodoc_marker("::: {#myid .warning}"));
199        assert!(!is_autodoc_marker("::: {.note .important}"));
200
201        // Not autodoc: malformed paths
202        assert!(!is_autodoc_marker("::: .starts.with.dot"));
203        assert!(!is_autodoc_marker("::: ends.with.dot."));
204        assert!(!is_autodoc_marker("::: has..consecutive.dots"));
205        assert!(!is_autodoc_marker("::: :starts.with.colon"));
206    }
207
208    #[test]
209    fn test_autodoc_options() {
210        assert!(is_autodoc_options("    handler: python", 0));
211        assert!(is_autodoc_options("    options:", 0));
212        assert!(is_autodoc_options("      show_source: true", 0));
213        assert!(!is_autodoc_options("", 0)); // Empty lines are neutral
214        assert!(!is_autodoc_options("Not indented", 0));
215        assert!(!is_autodoc_options("  Only 2 spaces", 0));
216        // Test YAML list items
217        assert!(is_autodoc_options("            - window", 0));
218        assert!(is_autodoc_options("            - app", 0));
219    }
220}