rumdl_lib/utils/
mkdocs_patterns.rs

1/// MkDocs pattern detection utilities
2///
3/// Provides centralized pattern detection for MkDocs auto-references.
4/// Check if a reference is a valid MkDocs auto-reference pattern
5///
6/// MkDocs auto-references include:
7/// - Module/class references: `module.Class`, `package.module.function`
8/// - Header anchors: `getting-started`, `api-reference`
9/// - API paths: `api/v1/endpoints`, `docs/reference`
10pub fn is_mkdocs_auto_reference(reference: &str) -> bool {
11    // Reject empty or excessively long references for performance
12    if reference.is_empty() || reference.len() > 200 {
13        return false;
14    }
15
16    // Check for API paths first (can contain dots in components like api/module.Class)
17    if reference.contains('/') {
18        return is_valid_slash_pattern(reference);
19    }
20
21    // Check for module/class references (contains dots)
22    if reference.contains('.') {
23        return is_valid_dot_pattern(reference);
24    }
25
26    // Check for header anchors (contains hyphens)
27    if reference.contains('-') && !reference.contains(' ') {
28        return is_valid_hyphen_pattern(reference);
29    }
30
31    // Check for simple identifiers (single word class/function names)
32    if is_valid_identifier(reference) {
33        return true;
34    }
35
36    false
37}
38
39/// Validate dot patterns (module.Class, package.module.function)
40fn is_valid_dot_pattern(reference: &str) -> bool {
41    // Reject patterns that are just dots or start/end with dots
42    if reference.starts_with('.') || reference.ends_with('.') {
43        return false;
44    }
45
46    let parts: Vec<&str> = reference.split('.').collect();
47
48    // Must have at least 2 parts for a meaningful reference
49    if parts.len() < 2 {
50        return false;
51    }
52
53    // Each part must be a valid identifier
54    parts.iter().all(|part| {
55        !part.is_empty()
56            && part.len() <= 50  // Reasonable length limit
57            && is_valid_identifier(part)
58    })
59}
60
61/// Validate hyphen patterns (header-anchor, getting-started)
62fn is_valid_hyphen_pattern(reference: &str) -> bool {
63    // Reject patterns that start/end with hyphens or have consecutive hyphens
64    if reference.starts_with('-') || reference.ends_with('-') || reference.contains("--") {
65        return false;
66    }
67
68    // Must be at least 3 characters (a-b minimum)
69    if reference.len() < 3 {
70        return false;
71    }
72
73    // Check if all characters are valid for header anchors
74    reference
75        .chars()
76        .all(|c| c.is_ascii_lowercase() || c == '-' || c.is_ascii_digit())
77}
78
79/// Validate slash patterns (api/module, docs/reference/guide)
80fn is_valid_slash_pattern(reference: &str) -> bool {
81    let parts: Vec<&str> = reference.split('/').collect();
82
83    // Must have at least 2 parts for a meaningful path
84    if parts.len() < 2 {
85        return false;
86    }
87
88    // Each part must be valid
89    parts.iter().all(|part| {
90        !part.is_empty()
91            && part.len() <= 50  // Reasonable length limit per segment
92            && is_valid_path_component(part)
93    })
94}
95
96/// Check if a string is a valid identifier (for module/class names)
97fn is_valid_identifier(s: &str) -> bool {
98    // Python-style identifiers: alphanumeric and underscores
99    // Can't start with a digit
100    if s.is_empty() {
101        return false;
102    }
103
104    let first_char = s.chars().next().unwrap();
105    if !first_char.is_ascii_alphabetic() && first_char != '_' {
106        return false;
107    }
108
109    s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
110}
111
112/// Check if a string is a valid path component
113fn is_valid_path_component(s: &str) -> bool {
114    // Path components can contain alphanumeric, underscores, hyphens, and dots
115    // Allow dots in path components for patterns like "module.Class"
116    !s.is_empty()
117        && s.chars()
118            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    #[test]
126    fn test_valid_dot_patterns() {
127        // Valid module references
128        assert!(is_mkdocs_auto_reference("module.Class"));
129        assert!(is_mkdocs_auto_reference("package.module.function"));
130        assert!(is_mkdocs_auto_reference("__init__.py"));
131        assert!(is_mkdocs_auto_reference("Class.__init__"));
132        assert!(is_mkdocs_auto_reference("a.b")); // Minimal valid
133
134        // Invalid patterns
135        assert!(!is_mkdocs_auto_reference(".")); // Single dot
136        assert!(!is_mkdocs_auto_reference("..")); // Double dots
137        assert!(!is_mkdocs_auto_reference("a.")); // Ends with dot
138        assert!(!is_mkdocs_auto_reference(".a")); // Starts with dot
139        assert!(!is_mkdocs_auto_reference("a..b")); // Double dot in middle
140        assert!(!is_mkdocs_auto_reference("127.0.0.1")); // IP address (digits start)
141    }
142
143    #[test]
144    fn test_valid_hyphen_patterns() {
145        // Valid header anchors
146        assert!(is_mkdocs_auto_reference("getting-started"));
147        assert!(is_mkdocs_auto_reference("api-reference"));
148        assert!(is_mkdocs_auto_reference("section-1"));
149        assert!(is_mkdocs_auto_reference("a-b")); // Minimal valid
150
151        // Invalid patterns
152        assert!(!is_mkdocs_auto_reference("-")); // Single hyphen
153        assert!(!is_mkdocs_auto_reference("--")); // Double hyphen
154        assert!(!is_mkdocs_auto_reference("-start")); // Starts with hyphen
155        assert!(!is_mkdocs_auto_reference("end-")); // Ends with hyphen
156        assert!(!is_mkdocs_auto_reference("double--hyphen")); // Consecutive hyphens
157        assert!(!is_mkdocs_auto_reference("UPPER-CASE")); // Uppercase
158        assert!(!is_mkdocs_auto_reference("Mixed-Case")); // Mixed case
159    }
160
161    #[test]
162    fn test_valid_slash_patterns() {
163        // Valid API paths
164        assert!(is_mkdocs_auto_reference("api/v1"));
165        assert!(is_mkdocs_auto_reference("docs/reference/guide"));
166        assert!(is_mkdocs_auto_reference("api/module.Class"));
167        assert!(is_mkdocs_auto_reference("a/b")); // Minimal valid
168
169        // Invalid patterns (not meaningful as MkDocs references)
170        assert!(!is_mkdocs_auto_reference("/")); // Single slash
171        assert!(!is_mkdocs_auto_reference("//")); // Double slash
172        assert!(!is_mkdocs_auto_reference("a//b")); // Double slash in middle
173    }
174
175    #[test]
176    fn test_length_limits() {
177        // Length limits for performance
178        let long_input = "a".repeat(201);
179        assert!(!is_mkdocs_auto_reference(&long_input));
180
181        // Empty input
182        assert!(!is_mkdocs_auto_reference(""));
183    }
184
185    #[test]
186    fn test_edge_cases() {
187        // Mixed patterns in same component (should fail)
188        assert!(!is_mkdocs_auto_reference("module.class-method")); // Dot and hyphen mixed
189
190        // Path with dots in components is valid for API paths
191        assert!(is_mkdocs_auto_reference("api/module.Class")); // Valid API path
192        assert!(is_mkdocs_auto_reference("api/module.function")); // Valid API path
193
194        // Special characters
195        assert!(!is_mkdocs_auto_reference("module.class!")); // Invalid character
196        assert!(!is_mkdocs_auto_reference("api/module?query")); // Query string
197        assert!(!is_mkdocs_auto_reference("header#anchor")); // Fragment
198
199        // Spaces
200        assert!(!is_mkdocs_auto_reference("module .class")); // Space after dot
201        assert!(!is_mkdocs_auto_reference("header -anchor")); // Space after hyphen
202        assert!(!is_mkdocs_auto_reference("api/ module")); // Space after slash
203    }
204}