rumdl_lib/utils/
mkdocs_patterns.rs

1/// MkDocs pattern detection utilities
2///
3/// Provides centralized pattern detection for MkDocs auto-references.
4///
5/// # MkDocs Auto-References
6///
7/// This module detects patterns used by MkDocs ecosystem plugins, particularly:
8/// - **mkdocs-autorefs**: Automatic cross-references in documentation
9/// - **mkdocstrings**: Python API documentation generation
10///
11/// ## Supported Patterns
12///
13/// ### Module/Class References
14/// - Format: `module.Class`, `package.module.function`
15/// - Example: [`module.MyClass`][], [`api.endpoints.get_user`][]
16/// - Used for: Python API documentation cross-references
17///
18/// ### Header Anchors
19/// - Format: `getting-started`, `api-reference`
20/// - Example: [getting-started][], [installation-guide][]
21/// - Used for: Cross-references to documentation sections
22///
23/// ### API Paths
24/// - Format: `api/v1/endpoints`, `docs/reference/guide`
25/// - Example: [api/module.Class][], [docs/getting-started][]
26/// - Used for: Navigation and documentation structure references
27///
28/// ## References
29///
30/// - [mkdocs-autorefs](https://mkdocstrings.github.io/autorefs/)
31/// - [mkdocstrings](https://mkdocstrings.github.io/)
32/// - [MkDocs discussions](https://github.com/mkdocs/mkdocs/discussions/3754)
33///
34/// ## See Also
35///
36/// - [`MD042NoEmptyLinks`](crate::rules::MD042NoEmptyLinks) - Handles MkDocs auto-references
37/// - [`is_mkdocs_attribute_anchor`](crate::rules::md042_no_empty_links::MD042NoEmptyLinks::is_mkdocs_attribute_anchor) - Handles attr_list anchors
38pub fn is_mkdocs_auto_reference(reference: &str) -> bool {
39    // Reject empty or excessively long references for performance
40    if reference.is_empty() || reference.len() > 200 {
41        return false;
42    }
43
44    // Check for API paths first (can contain dots in components like api/module.Class)
45    if reference.contains('/') {
46        return is_valid_slash_pattern(reference);
47    }
48
49    // Check for module/class references (contains dots)
50    if reference.contains('.') {
51        return is_valid_dot_pattern(reference);
52    }
53
54    // Check for header anchors (contains hyphens)
55    if reference.contains('-') && !reference.contains(' ') {
56        return is_valid_hyphen_pattern(reference);
57    }
58    false
59}
60
61/// Validate dot patterns (module.Class, package.module.function)
62fn is_valid_dot_pattern(reference: &str) -> bool {
63    // Reject patterns that are just dots or start/end with dots
64    if reference.starts_with('.') || reference.ends_with('.') {
65        return false;
66    }
67
68    let parts: Vec<&str> = reference.split('.').collect();
69
70    // Must have at least 2 parts for a meaningful reference
71    if parts.len() < 2 {
72        return false;
73    }
74
75    // Each part must be a valid identifier
76    parts.iter().all(|part| {
77        !part.is_empty()
78            && part.len() <= 50  // Reasonable length limit
79            && is_valid_identifier(part)
80    })
81}
82
83/// Validate hyphen patterns (header-anchor, getting-started)
84fn is_valid_hyphen_pattern(reference: &str) -> bool {
85    // Reject patterns that start/end with hyphens or have consecutive hyphens
86    if reference.starts_with('-') || reference.ends_with('-') || reference.contains("--") {
87        return false;
88    }
89
90    // Must be at least 3 characters (a-b minimum)
91    if reference.len() < 3 {
92        return false;
93    }
94
95    // Check if all characters are valid for header anchors
96    reference
97        .chars()
98        .all(|c| c.is_ascii_lowercase() || c == '-' || c.is_ascii_digit())
99}
100
101/// Validate slash patterns (api/module, docs/reference/guide)
102fn is_valid_slash_pattern(reference: &str) -> bool {
103    let parts: Vec<&str> = reference.split('/').collect();
104
105    // Must have at least 2 parts for a meaningful path
106    if parts.len() < 2 {
107        return false;
108    }
109
110    // Each part must be valid
111    parts.iter().all(|part| {
112        !part.is_empty()
113            && part.len() <= 50  // Reasonable length limit per segment
114            && is_valid_path_component(part)
115    })
116}
117
118/// Check if a string is a valid identifier (for module/class names)
119fn is_valid_identifier(s: &str) -> bool {
120    // Python-style identifiers: alphanumeric and underscores
121    // Can't start with a digit
122    if s.is_empty() {
123        return false;
124    }
125
126    let first_char = s.chars().next().unwrap();
127    if !first_char.is_ascii_alphabetic() && first_char != '_' {
128        return false;
129    }
130
131    s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
132}
133
134/// Check if a string is a valid path component
135fn is_valid_path_component(s: &str) -> bool {
136    // Path components can contain alphanumeric, underscores, hyphens, and dots
137    // Allow dots in path components for patterns like "module.Class"
138    !s.is_empty()
139        && s.chars()
140            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
141}
142
143#[cfg(test)]
144mod tests {
145    use super::*;
146
147    #[test]
148    fn test_valid_dot_patterns() {
149        // Valid module references
150        assert!(is_mkdocs_auto_reference("module.Class"));
151        assert!(is_mkdocs_auto_reference("package.module.function"));
152        assert!(is_mkdocs_auto_reference("__init__.py"));
153        assert!(is_mkdocs_auto_reference("Class.__init__"));
154        assert!(is_mkdocs_auto_reference("a.b")); // Minimal valid
155
156        // Invalid patterns
157        assert!(!is_mkdocs_auto_reference(".")); // Single dot
158        assert!(!is_mkdocs_auto_reference("..")); // Double dots
159        assert!(!is_mkdocs_auto_reference("a.")); // Ends with dot
160        assert!(!is_mkdocs_auto_reference(".a")); // Starts with dot
161        assert!(!is_mkdocs_auto_reference("a..b")); // Double dot in middle
162        assert!(!is_mkdocs_auto_reference("127.0.0.1")); // IP address (digits start)
163    }
164
165    #[test]
166    fn test_valid_hyphen_patterns() {
167        // Valid header anchors
168        assert!(is_mkdocs_auto_reference("getting-started"));
169        assert!(is_mkdocs_auto_reference("api-reference"));
170        assert!(is_mkdocs_auto_reference("section-1"));
171        assert!(is_mkdocs_auto_reference("a-b")); // Minimal valid
172
173        // Invalid patterns
174        assert!(!is_mkdocs_auto_reference("-")); // Single hyphen
175        assert!(!is_mkdocs_auto_reference("--")); // Double hyphen
176        assert!(!is_mkdocs_auto_reference("-start")); // Starts with hyphen
177        assert!(!is_mkdocs_auto_reference("end-")); // Ends with hyphen
178        assert!(!is_mkdocs_auto_reference("double--hyphen")); // Consecutive hyphens
179        assert!(!is_mkdocs_auto_reference("UPPER-CASE")); // Uppercase
180        assert!(!is_mkdocs_auto_reference("Mixed-Case")); // Mixed case
181    }
182
183    #[test]
184    fn test_valid_slash_patterns() {
185        // Valid API paths
186        assert!(is_mkdocs_auto_reference("api/v1"));
187        assert!(is_mkdocs_auto_reference("docs/reference/guide"));
188        assert!(is_mkdocs_auto_reference("api/module.Class"));
189        assert!(is_mkdocs_auto_reference("a/b")); // Minimal valid
190
191        // Invalid patterns (not meaningful as MkDocs references)
192        assert!(!is_mkdocs_auto_reference("/")); // Single slash
193        assert!(!is_mkdocs_auto_reference("//")); // Double slash
194        assert!(!is_mkdocs_auto_reference("a//b")); // Double slash in middle
195    }
196
197    #[test]
198    fn test_length_limits() {
199        // Length limits for performance
200        let long_input = "a".repeat(201);
201        assert!(!is_mkdocs_auto_reference(&long_input));
202
203        // Empty input
204        assert!(!is_mkdocs_auto_reference(""));
205    }
206
207    #[test]
208    fn test_edge_cases() {
209        // Mixed patterns in same component (should fail)
210        assert!(!is_mkdocs_auto_reference("module.class-method")); // Dot and hyphen mixed
211
212        // Path with dots in components is valid for API paths
213        assert!(is_mkdocs_auto_reference("api/module.Class")); // Valid API path
214        assert!(is_mkdocs_auto_reference("api/module.function")); // Valid API path
215
216        // Special characters
217        assert!(!is_mkdocs_auto_reference("module.class!")); // Invalid character
218        assert!(!is_mkdocs_auto_reference("api/module?query")); // Query string
219        assert!(!is_mkdocs_auto_reference("header#anchor")); // Fragment
220
221        // Spaces
222        assert!(!is_mkdocs_auto_reference("module .class")); // Space after dot
223        assert!(!is_mkdocs_auto_reference("header -anchor")); // Space after hyphen
224        assert!(!is_mkdocs_auto_reference("api/ module")); // Space after slash
225    }
226}