rumdl_lib/utils/
mkdocs_patterns.rs

1/// MkDocs pattern detection utilities
2///
3/// Provides centralized pattern detection for MkDocs auto-references.
4/// Check if a reference is a valid MkDocs auto-reference pattern
5///
6/// MkDocs auto-references include:
7/// - Module/class references: `module.Class`, `package.module.function`
8/// - Header anchors: `getting-started`, `api-reference`
9/// - API paths: `api/v1/endpoints`, `docs/reference`
10pub fn is_mkdocs_auto_reference(reference: &str) -> bool {
11    // Reject empty or excessively long references for performance
12    if reference.is_empty() || reference.len() > 200 {
13        return false;
14    }
15
16    // Check for API paths first (can contain dots in components like api/module.Class)
17    if reference.contains('/') {
18        return is_valid_slash_pattern(reference);
19    }
20
21    // Check for module/class references (contains dots)
22    if reference.contains('.') {
23        return is_valid_dot_pattern(reference);
24    }
25
26    // Check for header anchors (contains hyphens)
27    if reference.contains('-') && !reference.contains(' ') {
28        return is_valid_hyphen_pattern(reference);
29    }
30
31    false
32}
33
34/// Validate dot patterns (module.Class, package.module.function)
35fn is_valid_dot_pattern(reference: &str) -> bool {
36    // Reject patterns that are just dots or start/end with dots
37    if reference.starts_with('.') || reference.ends_with('.') {
38        return false;
39    }
40
41    let parts: Vec<&str> = reference.split('.').collect();
42
43    // Must have at least 2 parts for a meaningful reference
44    if parts.len() < 2 {
45        return false;
46    }
47
48    // Each part must be a valid identifier
49    parts.iter().all(|part| {
50        !part.is_empty()
51            && part.len() <= 50  // Reasonable length limit
52            && is_valid_identifier(part)
53    })
54}
55
56/// Validate hyphen patterns (header-anchor, getting-started)
57fn is_valid_hyphen_pattern(reference: &str) -> bool {
58    // Reject patterns that start/end with hyphens or have consecutive hyphens
59    if reference.starts_with('-') || reference.ends_with('-') || reference.contains("--") {
60        return false;
61    }
62
63    // Must be at least 3 characters (a-b minimum)
64    if reference.len() < 3 {
65        return false;
66    }
67
68    // Check if all characters are valid for header anchors
69    reference
70        .chars()
71        .all(|c| c.is_ascii_lowercase() || c == '-' || c.is_ascii_digit())
72}
73
74/// Validate slash patterns (api/module, docs/reference/guide)
75fn is_valid_slash_pattern(reference: &str) -> bool {
76    let parts: Vec<&str> = reference.split('/').collect();
77
78    // Must have at least 2 parts for a meaningful path
79    if parts.len() < 2 {
80        return false;
81    }
82
83    // Each part must be valid
84    parts.iter().all(|part| {
85        !part.is_empty()
86            && part.len() <= 50  // Reasonable length limit per segment
87            && is_valid_path_component(part)
88    })
89}
90
91/// Check if a string is a valid identifier (for module/class names)
92fn is_valid_identifier(s: &str) -> bool {
93    // Python-style identifiers: alphanumeric and underscores
94    // Can't start with a digit
95    if s.is_empty() {
96        return false;
97    }
98
99    let first_char = s.chars().next().unwrap();
100    if !first_char.is_ascii_alphabetic() && first_char != '_' {
101        return false;
102    }
103
104    s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
105}
106
107/// Check if a string is a valid path component
108fn is_valid_path_component(s: &str) -> bool {
109    // Path components can contain alphanumeric, underscores, hyphens, and dots
110    // Allow dots in path components for patterns like "module.Class"
111    !s.is_empty()
112        && s.chars()
113            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
114}
115
116#[cfg(test)]
117mod tests {
118    use super::*;
119
120    #[test]
121    fn test_valid_dot_patterns() {
122        // Valid module references
123        assert!(is_mkdocs_auto_reference("module.Class"));
124        assert!(is_mkdocs_auto_reference("package.module.function"));
125        assert!(is_mkdocs_auto_reference("__init__.py"));
126        assert!(is_mkdocs_auto_reference("Class.__init__"));
127        assert!(is_mkdocs_auto_reference("a.b")); // Minimal valid
128
129        // Invalid patterns
130        assert!(!is_mkdocs_auto_reference(".")); // Single dot
131        assert!(!is_mkdocs_auto_reference("..")); // Double dots
132        assert!(!is_mkdocs_auto_reference("a.")); // Ends with dot
133        assert!(!is_mkdocs_auto_reference(".a")); // Starts with dot
134        assert!(!is_mkdocs_auto_reference("a..b")); // Double dot in middle
135        assert!(!is_mkdocs_auto_reference("127.0.0.1")); // IP address (digits start)
136    }
137
138    #[test]
139    fn test_valid_hyphen_patterns() {
140        // Valid header anchors
141        assert!(is_mkdocs_auto_reference("getting-started"));
142        assert!(is_mkdocs_auto_reference("api-reference"));
143        assert!(is_mkdocs_auto_reference("section-1"));
144        assert!(is_mkdocs_auto_reference("a-b")); // Minimal valid
145
146        // Invalid patterns
147        assert!(!is_mkdocs_auto_reference("-")); // Single hyphen
148        assert!(!is_mkdocs_auto_reference("--")); // Double hyphen
149        assert!(!is_mkdocs_auto_reference("-start")); // Starts with hyphen
150        assert!(!is_mkdocs_auto_reference("end-")); // Ends with hyphen
151        assert!(!is_mkdocs_auto_reference("double--hyphen")); // Consecutive hyphens
152        assert!(!is_mkdocs_auto_reference("UPPER-CASE")); // Uppercase
153        assert!(!is_mkdocs_auto_reference("Mixed-Case")); // Mixed case
154    }
155
156    #[test]
157    fn test_valid_slash_patterns() {
158        // Valid API paths
159        assert!(is_mkdocs_auto_reference("api/v1"));
160        assert!(is_mkdocs_auto_reference("docs/reference/guide"));
161        assert!(is_mkdocs_auto_reference("api/module.Class"));
162        assert!(is_mkdocs_auto_reference("a/b")); // Minimal valid
163
164        // Invalid patterns (not meaningful as MkDocs references)
165        assert!(!is_mkdocs_auto_reference("/")); // Single slash
166        assert!(!is_mkdocs_auto_reference("//")); // Double slash
167        assert!(!is_mkdocs_auto_reference("a//b")); // Double slash in middle
168    }
169
170    #[test]
171    fn test_length_limits() {
172        // Length limits for performance
173        let long_input = "a".repeat(201);
174        assert!(!is_mkdocs_auto_reference(&long_input));
175
176        // Empty input
177        assert!(!is_mkdocs_auto_reference(""));
178    }
179
180    #[test]
181    fn test_edge_cases() {
182        // Mixed patterns in same component (should fail)
183        assert!(!is_mkdocs_auto_reference("module.class-method")); // Dot and hyphen mixed
184
185        // Path with dots in components is valid for API paths
186        assert!(is_mkdocs_auto_reference("api/module.Class")); // Valid API path
187        assert!(is_mkdocs_auto_reference("api/module.function")); // Valid API path
188
189        // Special characters
190        assert!(!is_mkdocs_auto_reference("module.class!")); // Invalid character
191        assert!(!is_mkdocs_auto_reference("api/module?query")); // Query string
192        assert!(!is_mkdocs_auto_reference("header#anchor")); // Fragment
193
194        // Spaces
195        assert!(!is_mkdocs_auto_reference("module .class")); // Space after dot
196        assert!(!is_mkdocs_auto_reference("header -anchor")); // Space after hyphen
197        assert!(!is_mkdocs_auto_reference("api/ module")); // Space after slash
198    }
199}