rumdl_lib/utils/
mkdocs_patterns.rs

1/// MkDocs pattern detection utilities
2///
3/// Provides centralized pattern detection for MkDocs auto-references.
4/// Check if a reference is a valid MkDocs auto-reference pattern
5///
6/// MkDocs auto-references include:
7/// - Module/class references: `module.Class`, `package.module.function`
8/// - Header anchors: `getting-started`, `api-reference`
9/// - API paths: `api/v1/endpoints`, `docs/reference`
10pub fn is_mkdocs_auto_reference(reference: &str) -> bool {
11    // Reject empty or excessively long references for performance
12    if reference.is_empty() || reference.len() > 200 {
13        return false;
14    }
15
16    // Check for API paths first (can contain dots in components like api/module.Class)
17    if reference.contains('/') {
18        return is_valid_slash_pattern(reference);
19    }
20
21    // Check for module/class references (contains dots)
22    if reference.contains('.') {
23        return is_valid_dot_pattern(reference);
24    }
25
26    // Check for header anchors (contains hyphens)
27    if reference.contains('-') && !reference.contains(' ') {
28        return is_valid_hyphen_pattern(reference);
29    }
30    false
31}
32
33/// Validate dot patterns (module.Class, package.module.function)
34fn is_valid_dot_pattern(reference: &str) -> bool {
35    // Reject patterns that are just dots or start/end with dots
36    if reference.starts_with('.') || reference.ends_with('.') {
37        return false;
38    }
39
40    let parts: Vec<&str> = reference.split('.').collect();
41
42    // Must have at least 2 parts for a meaningful reference
43    if parts.len() < 2 {
44        return false;
45    }
46
47    // Each part must be a valid identifier
48    parts.iter().all(|part| {
49        !part.is_empty()
50            && part.len() <= 50  // Reasonable length limit
51            && is_valid_identifier(part)
52    })
53}
54
55/// Validate hyphen patterns (header-anchor, getting-started)
56fn is_valid_hyphen_pattern(reference: &str) -> bool {
57    // Reject patterns that start/end with hyphens or have consecutive hyphens
58    if reference.starts_with('-') || reference.ends_with('-') || reference.contains("--") {
59        return false;
60    }
61
62    // Must be at least 3 characters (a-b minimum)
63    if reference.len() < 3 {
64        return false;
65    }
66
67    // Check if all characters are valid for header anchors
68    reference
69        .chars()
70        .all(|c| c.is_ascii_lowercase() || c == '-' || c.is_ascii_digit())
71}
72
73/// Validate slash patterns (api/module, docs/reference/guide)
74fn is_valid_slash_pattern(reference: &str) -> bool {
75    let parts: Vec<&str> = reference.split('/').collect();
76
77    // Must have at least 2 parts for a meaningful path
78    if parts.len() < 2 {
79        return false;
80    }
81
82    // Each part must be valid
83    parts.iter().all(|part| {
84        !part.is_empty()
85            && part.len() <= 50  // Reasonable length limit per segment
86            && is_valid_path_component(part)
87    })
88}
89
90/// Check if a string is a valid identifier (for module/class names)
91fn is_valid_identifier(s: &str) -> bool {
92    // Python-style identifiers: alphanumeric and underscores
93    // Can't start with a digit
94    if s.is_empty() {
95        return false;
96    }
97
98    let first_char = s.chars().next().unwrap();
99    if !first_char.is_ascii_alphabetic() && first_char != '_' {
100        return false;
101    }
102
103    s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_')
104}
105
106/// Check if a string is a valid path component
107fn is_valid_path_component(s: &str) -> bool {
108    // Path components can contain alphanumeric, underscores, hyphens, and dots
109    // Allow dots in path components for patterns like "module.Class"
110    !s.is_empty()
111        && s.chars()
112            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.')
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118
119    #[test]
120    fn test_valid_dot_patterns() {
121        // Valid module references
122        assert!(is_mkdocs_auto_reference("module.Class"));
123        assert!(is_mkdocs_auto_reference("package.module.function"));
124        assert!(is_mkdocs_auto_reference("__init__.py"));
125        assert!(is_mkdocs_auto_reference("Class.__init__"));
126        assert!(is_mkdocs_auto_reference("a.b")); // Minimal valid
127
128        // Invalid patterns
129        assert!(!is_mkdocs_auto_reference(".")); // Single dot
130        assert!(!is_mkdocs_auto_reference("..")); // Double dots
131        assert!(!is_mkdocs_auto_reference("a.")); // Ends with dot
132        assert!(!is_mkdocs_auto_reference(".a")); // Starts with dot
133        assert!(!is_mkdocs_auto_reference("a..b")); // Double dot in middle
134        assert!(!is_mkdocs_auto_reference("127.0.0.1")); // IP address (digits start)
135    }
136
137    #[test]
138    fn test_valid_hyphen_patterns() {
139        // Valid header anchors
140        assert!(is_mkdocs_auto_reference("getting-started"));
141        assert!(is_mkdocs_auto_reference("api-reference"));
142        assert!(is_mkdocs_auto_reference("section-1"));
143        assert!(is_mkdocs_auto_reference("a-b")); // Minimal valid
144
145        // Invalid patterns
146        assert!(!is_mkdocs_auto_reference("-")); // Single hyphen
147        assert!(!is_mkdocs_auto_reference("--")); // Double hyphen
148        assert!(!is_mkdocs_auto_reference("-start")); // Starts with hyphen
149        assert!(!is_mkdocs_auto_reference("end-")); // Ends with hyphen
150        assert!(!is_mkdocs_auto_reference("double--hyphen")); // Consecutive hyphens
151        assert!(!is_mkdocs_auto_reference("UPPER-CASE")); // Uppercase
152        assert!(!is_mkdocs_auto_reference("Mixed-Case")); // Mixed case
153    }
154
155    #[test]
156    fn test_valid_slash_patterns() {
157        // Valid API paths
158        assert!(is_mkdocs_auto_reference("api/v1"));
159        assert!(is_mkdocs_auto_reference("docs/reference/guide"));
160        assert!(is_mkdocs_auto_reference("api/module.Class"));
161        assert!(is_mkdocs_auto_reference("a/b")); // Minimal valid
162
163        // Invalid patterns (not meaningful as MkDocs references)
164        assert!(!is_mkdocs_auto_reference("/")); // Single slash
165        assert!(!is_mkdocs_auto_reference("//")); // Double slash
166        assert!(!is_mkdocs_auto_reference("a//b")); // Double slash in middle
167    }
168
169    #[test]
170    fn test_length_limits() {
171        // Length limits for performance
172        let long_input = "a".repeat(201);
173        assert!(!is_mkdocs_auto_reference(&long_input));
174
175        // Empty input
176        assert!(!is_mkdocs_auto_reference(""));
177    }
178
179    #[test]
180    fn test_edge_cases() {
181        // Mixed patterns in same component (should fail)
182        assert!(!is_mkdocs_auto_reference("module.class-method")); // Dot and hyphen mixed
183
184        // Path with dots in components is valid for API paths
185        assert!(is_mkdocs_auto_reference("api/module.Class")); // Valid API path
186        assert!(is_mkdocs_auto_reference("api/module.function")); // Valid API path
187
188        // Special characters
189        assert!(!is_mkdocs_auto_reference("module.class!")); // Invalid character
190        assert!(!is_mkdocs_auto_reference("api/module?query")); // Query string
191        assert!(!is_mkdocs_auto_reference("header#anchor")); // Fragment
192
193        // Spaces
194        assert!(!is_mkdocs_auto_reference("module .class")); // Space after dot
195        assert!(!is_mkdocs_auto_reference("header -anchor")); // Space after hyphen
196        assert!(!is_mkdocs_auto_reference("api/ module")); // Space after slash
197    }
198}