rumdl_lib/utils/mkdocs_patterns.rs
1/// MkDocs pattern detection utilities
2///
3/// Provides centralized pattern detection for MkDocs auto-references.
4///
5/// # MkDocs Auto-References
6///
7/// This module detects patterns used by MkDocs ecosystem plugins, particularly:
8/// - **mkdocs-autorefs**: Automatic cross-references in documentation
9/// - **mkdocstrings**: Python API documentation generation
10///
11/// ## Supported Patterns
12///
13/// ### Module/Class References
14/// - Format: `module.Class`, `package.module.function`
15/// - Example: [`module.MyClass`][], [`api.endpoints.get_user`][]
16/// - Used for: Python API documentation cross-references
17///
18/// ### Header Anchors
19/// - Format: `getting-started`, `api-reference`
20/// - Example: [getting-started][], [installation-guide][]
21/// - Used for: Cross-references to documentation sections
22///
23/// ### API Paths
24/// - Format: `api/v1/endpoints`, `docs/reference/guide`
25/// - Example: [api/module.Class][], [docs/getting-started][]
26/// - Used for: Navigation and documentation structure references
27///
28/// ## References
29///
30/// - [mkdocs-autorefs](https://mkdocstrings.github.io/autorefs/)
31/// - [mkdocstrings](https://mkdocstrings.github.io/)
32/// - [MkDocs discussions](https://github.com/mkdocs/mkdocs/discussions/3754)
33///
34/// ## See Also
35///
36/// - [`MD042NoEmptyLinks`](crate::rules::MD042NoEmptyLinks) - Handles MkDocs auto-references
37/// - [`is_mkdocs_attribute_anchor`](crate::rules::md042_no_empty_links::MD042NoEmptyLinks::is_mkdocs_attribute_anchor) - Handles attr_list anchors
38pub fn is_mkdocs_auto_reference(reference: &str) -> bool {
39 // Reject empty or excessively long references for performance
40 if reference.is_empty() || reference.len() > 200 {
41 return false;
42 }
43
44 // Check for API paths first (can contain dots in components like api/module.Class)
45 if reference.contains('/') {
46 return is_valid_slash_pattern(reference);
47 }
48
49 // Check for module/class references (contains dots)
50 if reference.contains('.') {
51 return is_valid_dot_pattern(reference);
52 }
53
54 // Check for header anchors (contains hyphens)
55 if reference.contains('-') && !reference.contains(' ') {
56 return is_valid_hyphen_pattern(reference);
57 }
58 false
59}
60
61/// Validate dot patterns (module.Class, package.module.function)
62fn is_valid_dot_pattern(reference: &str) -> bool {
63 // Reject patterns that are just dots or start/end with dots
64 if reference.starts_with('.') || reference.ends_with('.') {
65 return false;
66 }
67
68 let parts: Vec<&str> = reference.split('.').collect();
69
70 // Must have at least 2 parts for a meaningful reference
71 if parts.len() < 2 {
72 return false;
73 }
74
75 // Each part must be a valid identifier
76 parts.iter().all(|part| {
77 !part.is_empty()
78 && part.len() <= 50 // Reasonable length limit
79 && is_valid_identifier(part)
80 })
81}
82
83/// Validate hyphen patterns (header-anchor, getting-started)
84fn is_valid_hyphen_pattern(reference: &str) -> bool {
85 // Reject patterns that start/end with hyphens or have consecutive hyphens
86 if reference.starts_with('-') || reference.ends_with('-') || reference.contains("--") {
87 return false;
88 }
89
90 // Must be at least 3 characters (a-b minimum)
91 if reference.len() < 3 {
92 return false;
93 }
94
95 // Check if all characters are valid for header anchors
96 reference
97 .chars()
98 .all(|c| c.is_ascii_lowercase() || c == '-' || c.is_ascii_digit())
99}
100
101/// Validate slash patterns (api/module, docs/reference/guide)
102fn is_valid_slash_pattern(reference: &str) -> bool {
103 let parts: Vec<&str> = reference.split('/').collect();
104
105 // Must have at least 2 parts for a meaningful path
106 if parts.len() < 2 {
107 return false;
108 }
109
110 // Each part must be valid
111 parts.iter().all(|part| {
112 !part.is_empty()
113 && part.len() <= 50 // Reasonable length limit per segment
114 && is_valid_path_component(part)
115 })
116}
117
118/// Check if a string is a valid identifier (for module/class names)
119fn is_valid_identifier(s: &str) -> bool {
120 // Python-style identifiers: alphanumeric and underscores
121 // Can't start with a digit
122 if s.is_empty() {
123 return false;
124 }
125
126 let first_char = s.chars().next().unwrap();
127 if !first_char.is_ascii_alphabetic() && first_char != '_' {
128 return false;
129 }
130
131 s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
132}
133
134/// Check if a string is a valid path component
135fn is_valid_path_component(s: &str) -> bool {
136 // Path components can contain alphanumeric, underscores, hyphens, and dots
137 // Allow dots in path components for patterns like "module.Class"
138 !s.is_empty()
139 && s.chars()
140 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn test_valid_dot_patterns() {
149 // Valid module references
150 assert!(is_mkdocs_auto_reference("module.Class"));
151 assert!(is_mkdocs_auto_reference("package.module.function"));
152 assert!(is_mkdocs_auto_reference("__init__.py"));
153 assert!(is_mkdocs_auto_reference("Class.__init__"));
154 assert!(is_mkdocs_auto_reference("a.b")); // Minimal valid
155
156 // Invalid patterns
157 assert!(!is_mkdocs_auto_reference(".")); // Single dot
158 assert!(!is_mkdocs_auto_reference("..")); // Double dots
159 assert!(!is_mkdocs_auto_reference("a.")); // Ends with dot
160 assert!(!is_mkdocs_auto_reference(".a")); // Starts with dot
161 assert!(!is_mkdocs_auto_reference("a..b")); // Double dot in middle
162 assert!(!is_mkdocs_auto_reference("127.0.0.1")); // IP address (digits start)
163 }
164
165 #[test]
166 fn test_valid_hyphen_patterns() {
167 // Valid header anchors
168 assert!(is_mkdocs_auto_reference("getting-started"));
169 assert!(is_mkdocs_auto_reference("api-reference"));
170 assert!(is_mkdocs_auto_reference("section-1"));
171 assert!(is_mkdocs_auto_reference("a-b")); // Minimal valid
172
173 // Invalid patterns
174 assert!(!is_mkdocs_auto_reference("-")); // Single hyphen
175 assert!(!is_mkdocs_auto_reference("--")); // Double hyphen
176 assert!(!is_mkdocs_auto_reference("-start")); // Starts with hyphen
177 assert!(!is_mkdocs_auto_reference("end-")); // Ends with hyphen
178 assert!(!is_mkdocs_auto_reference("double--hyphen")); // Consecutive hyphens
179 assert!(!is_mkdocs_auto_reference("UPPER-CASE")); // Uppercase
180 assert!(!is_mkdocs_auto_reference("Mixed-Case")); // Mixed case
181 }
182
183 #[test]
184 fn test_valid_slash_patterns() {
185 // Valid API paths
186 assert!(is_mkdocs_auto_reference("api/v1"));
187 assert!(is_mkdocs_auto_reference("docs/reference/guide"));
188 assert!(is_mkdocs_auto_reference("api/module.Class"));
189 assert!(is_mkdocs_auto_reference("a/b")); // Minimal valid
190
191 // Invalid patterns (not meaningful as MkDocs references)
192 assert!(!is_mkdocs_auto_reference("/")); // Single slash
193 assert!(!is_mkdocs_auto_reference("//")); // Double slash
194 assert!(!is_mkdocs_auto_reference("a//b")); // Double slash in middle
195 }
196
197 #[test]
198 fn test_length_limits() {
199 // Length limits for performance
200 let long_input = "a".repeat(201);
201 assert!(!is_mkdocs_auto_reference(&long_input));
202
203 // Empty input
204 assert!(!is_mkdocs_auto_reference(""));
205 }
206
207 #[test]
208 fn test_edge_cases() {
209 // Mixed patterns in same component (should fail)
210 assert!(!is_mkdocs_auto_reference("module.class-method")); // Dot and hyphen mixed
211
212 // Path with dots in components is valid for API paths
213 assert!(is_mkdocs_auto_reference("api/module.Class")); // Valid API path
214 assert!(is_mkdocs_auto_reference("api/module.function")); // Valid API path
215
216 // Special characters
217 assert!(!is_mkdocs_auto_reference("module.class!")); // Invalid character
218 assert!(!is_mkdocs_auto_reference("api/module?query")); // Query string
219 assert!(!is_mkdocs_auto_reference("header#anchor")); // Fragment
220
221 // Spaces
222 assert!(!is_mkdocs_auto_reference("module .class")); // Space after dot
223 assert!(!is_mkdocs_auto_reference("header -anchor")); // Space after hyphen
224 assert!(!is_mkdocs_auto_reference("api/ module")); // Space after slash
225 }
226}