Skip to main content

reinhardt_urls/routers/pattern/
path_pattern.rs

1use super::validation::{
2	MAX_PATH_SEGMENTS, MAX_PATTERN_LENGTH, MAX_REGEX_SIZE, type_spec_to_regex, validate_path_param,
3	validate_reverse_param,
4};
5use aho_corasick::AhoCorasick;
6use regex::Regex;
7use std::collections::{HashMap, HashSet};
8
9/// Path pattern for URL matching
10/// Similar to Django's URL patterns but using composition
11#[derive(Clone, Debug)]
12pub struct PathPattern {
13	/// Original pattern string (may contain type specifiers)
14	pattern: String,
15	/// Pattern normalized to `{name}` format for URL reversal
16	normalized_pattern: String,
17	pub(super) regex: Regex,
18	pub(super) param_names: Vec<String>,
19	/// Parameter names that use the `path` type specifier.
20	/// These require post-match validation to reject directory traversal.
21	pub(super) path_type_params: HashSet<String>,
22	/// Pre-built Aho-Corasick automaton for efficient URL reversal
23	/// This is constructed once during pattern creation for O(n+m+z) reversal
24	aho_corasick: Option<AhoCorasick>,
25}
26
27/// Parse result containing regex, param names, and normalized pattern for URL reversal
28struct ParsePatternResult {
29	regex_str: String,
30	param_names: Vec<String>,
31	/// Parameter names that use the `path` type specifier
32	path_type_params: HashSet<String>,
33	/// Pattern normalized to `{name}` format for URL reversal
34	/// e.g., "/users/{<int:id>}/" -> "/users/{id}/"
35	normalized_pattern: String,
36}
37
38impl PathPattern {
39	/// Create a new path pattern
40	/// Patterns like "/users/{id}/" are converted to regex
41	///
42	/// # Examples
43	///
44	/// ```
45	/// use reinhardt_urls::routers::{PathPattern, path};
46	///
47	/// // Create a simple pattern without parameters
48	/// let pattern = PathPattern::new(path!("/users/")).unwrap();
49	/// assert_eq!(pattern.pattern(), "/users/");
50	///
51	/// // Create a pattern with a parameter
52	/// let pattern = PathPattern::new(path!("/users/{id}/")).unwrap();
53	/// assert_eq!(pattern.param_names(), &["id"]);
54	/// ```
55	pub fn new(pattern: impl Into<String>) -> Result<Self, String> {
56		let pattern = pattern.into();
57
58		// Reject patterns exceeding the maximum length to prevent ReDoS
59		if pattern.len() > MAX_PATTERN_LENGTH {
60			return Err(format!(
61				"Pattern length {} exceeds maximum allowed length of {} bytes",
62				pattern.len(),
63				MAX_PATTERN_LENGTH
64			));
65		}
66
67		// Reject patterns with excessive path segments to prevent resource exhaustion
68		let segment_count = pattern.split('/').count();
69		if segment_count > MAX_PATH_SEGMENTS {
70			return Err(format!(
71				"Pattern has {} path segments, exceeding maximum of {}",
72				segment_count, MAX_PATH_SEGMENTS
73			));
74		}
75
76		let parse_result = Self::parse_pattern(&pattern)?;
77
78		// Use RegexBuilder with size limits to prevent memory exhaustion
79		let regex = regex::RegexBuilder::new(&parse_result.regex_str)
80			.size_limit(MAX_REGEX_SIZE)
81			.build()
82			.map_err(|e| format!("Failed to compile pattern regex: {}", e))?;
83
84		// Build Aho-Corasick automaton for URL reversal if there are parameters
85		let aho_corasick = if !parse_result.param_names.is_empty() {
86			let placeholders: Vec<String> = parse_result
87				.param_names
88				.iter()
89				.map(|name| format!("{{{}}}", name))
90				.collect();
91
92			AhoCorasick::new(&placeholders)
93				.map(Some)
94				.map_err(|e| format!("Failed to build Aho-Corasick automaton: {}", e))?
95		} else {
96			None
97		};
98
99		Ok(Self {
100			pattern,
101			normalized_pattern: parse_result.normalized_pattern,
102			regex,
103			param_names: parse_result.param_names,
104			path_type_params: parse_result.path_type_params,
105			aho_corasick,
106		})
107	}
108
109	fn parse_pattern(pattern: &str) -> Result<ParsePatternResult, String> {
110		let mut regex_str = String::from("^");
111		let mut param_names = Vec::new();
112		let mut path_type_params = HashSet::new();
113		let mut normalized_pattern = String::new();
114		let mut chars = pattern.chars().peekable();
115
116		while let Some(ch) = chars.next() {
117			match ch {
118				'{' => {
119					// Extract parameter content (everything between { and })
120					let mut param_content = String::new();
121					while let Some(&next_ch) = chars.peek() {
122						if next_ch == '}' {
123							chars.next(); // consume '}'
124							break;
125						}
126						param_content.push(chars.next().unwrap());
127					}
128
129					if param_content.is_empty() {
130						return Err("Empty parameter name".to_string());
131					}
132
133					// Check for typed parameter syntax: {<type:name>}
134					let (param_name, regex_pattern) =
135						if param_content.starts_with('<') && param_content.ends_with('>') {
136							// Parse {<type:name>}
137							let inner = &param_content[1..param_content.len() - 1]; // Remove < >
138							if let Some(colon_pos) = inner.find(':') {
139								let type_spec = &inner[..colon_pos];
140								let name = &inner[colon_pos + 1..];
141								if name.is_empty() {
142									return Err(format!(
143										"Empty parameter name in typed parameter: {{<{}:>}}",
144										type_spec
145									));
146								}
147								if type_spec == "path" {
148									path_type_params.insert(name.to_string());
149								}
150								(name.to_string(), type_spec_to_regex(type_spec))
151							} else {
152								return Err(format!(
153									"Invalid typed parameter syntax: {{<{}>}}. Expected {{<type:name>}}",
154									inner
155								));
156							}
157						} else {
158							// Simple {name} parameter - use default [^/]+
159							(param_content, "[^/]+")
160						};
161
162					param_names.push(param_name.clone());
163					regex_str.push_str(&format!("(?P<{}>{})", param_name, regex_pattern));
164					// Write normalized placeholder for URL reversal
165					normalized_pattern.push_str(&format!("{{{}}}", param_name));
166				}
167				_ => {
168					// Escape special regex characters
169					if ".*+?^${}()|[]\\".contains(ch) {
170						regex_str.push('\\');
171					}
172					regex_str.push(ch);
173					// Copy literal characters to normalized pattern
174					normalized_pattern.push(ch);
175				}
176			}
177		}
178
179		regex_str.push('$');
180		Ok(ParsePatternResult {
181			regex_str,
182			param_names,
183			path_type_params,
184			normalized_pattern,
185		})
186	}
187	/// Get the original pattern string
188	///
189	/// # Examples
190	///
191	/// ```
192	/// use reinhardt_urls::routers::{PathPattern, path};
193	///
194	/// let pattern = PathPattern::new(path!("/users/{id}/")).unwrap();
195	/// assert_eq!(pattern.pattern(), "/users/{id}/");
196	/// ```
197	pub fn pattern(&self) -> &str {
198		&self.pattern
199	}
200
201	/// Convert pattern to matchit-compatible format
202	///
203	/// Transforms path-type parameters from `{<path:name>}` to `{*name}`
204	/// for use with the matchit radix router. Non-path parameters remain
205	/// as `{name}`.
206	pub(crate) fn to_matchit_pattern(&self) -> String {
207		let mut result = String::new();
208		let mut chars = self.pattern.chars().peekable();
209
210		while let Some(ch) = chars.next() {
211			if ch == '{' {
212				let mut param_content = String::new();
213				while let Some(&next_ch) = chars.peek() {
214					if next_ch == '}' {
215						chars.next();
216						break;
217					}
218					param_content.push(chars.next().unwrap());
219				}
220
221				// Check for typed parameter: {<type:name>}
222				if param_content.starts_with('<') && param_content.ends_with('>') {
223					let inner = &param_content[1..param_content.len() - 1];
224					if let Some(colon_pos) = inner.find(':') {
225						let type_spec = &inner[..colon_pos];
226						let name = &inner[colon_pos + 1..];
227						if type_spec == "path" {
228							// Convert path type to matchit catch-all: {*name}
229							result.push_str(&format!("{{*{}}}", name));
230						} else {
231							// Other typed params use simple {name}
232							result.push_str(&format!("{{{}}}", name));
233						}
234					} else {
235						result.push_str(&format!("{{{}}}", param_content));
236					}
237				} else {
238					// Simple {name} parameter
239					result.push_str(&format!("{{{}}}", param_content));
240				}
241			} else {
242				result.push(ch);
243			}
244		}
245
246		result
247	}
248	/// Get the list of parameter names in the pattern
249	///
250	/// # Examples
251	///
252	/// ```
253	/// use reinhardt_urls::routers::{PathPattern, path};
254	///
255	/// let pattern = PathPattern::new(path!("/users/{user_id}/posts/{post_id}/")).unwrap();
256	/// assert_eq!(pattern.param_names(), &["user_id", "post_id"]);
257	/// ```
258	pub fn param_names(&self) -> &[String] {
259		&self.param_names
260	}
261
262	/// Test if the pattern matches a given path
263	///
264	/// # Examples
265	///
266	/// ```
267	/// use reinhardt_urls::routers::{PathPattern, path};
268	///
269	/// let pattern = PathPattern::new(path!("/users/{id}/")).unwrap();
270	/// assert!(pattern.is_match("/users/123/"));
271	/// assert!(!pattern.is_match("/users/"));
272	/// ```
273	pub fn is_match(&self, path: &str) -> bool {
274		self.regex.is_match(path)
275	}
276
277	/// Match a path and extract parameters
278	///
279	/// # Examples
280	///
281	/// ```
282	/// use reinhardt_urls::routers::{PathPattern, path};
283	///
284	/// let pattern = PathPattern::new(path!("/users/{id}/")).unwrap();
285	/// let params = pattern.extract_params("/users/123/").unwrap();
286	/// assert_eq!(params.get("id"), Some(&"123".to_string()));
287	/// ```
288	pub fn extract_params(&self, path: &str) -> Option<HashMap<String, String>> {
289		self.regex.captures(path).and_then(|captures| {
290			let mut params = HashMap::new();
291			for name in self.param_names() {
292				if let Some(value) = captures.name(name) {
293					let val = value.as_str();
294					// Validate path-type parameters against directory traversal
295					if self.path_type_params.contains(name) && !validate_path_param(val) {
296						return None;
297					}
298					params.insert(name.clone(), val.to_string());
299				}
300			}
301			Some(params)
302		})
303	}
304
305	/// Reverse URL pattern with parameters using Aho-Corasick algorithm
306	///
307	/// This method uses pre-built Aho-Corasick automaton for efficient
308	/// multi-pattern matching with O(n+m+z) complexity where:
309	/// - n: pattern length
310	/// - m: total parameter values length
311	/// - z: number of placeholder matches
312	///
313	/// # Arguments
314	///
315	/// * `params` - HashMap of parameter names to values
316	///
317	/// # Returns
318	///
319	/// * `Ok(String)` - Reversed URL with parameters substituted
320	/// * `Err(String)` - If required parameters are missing
321	///
322	/// # Examples
323	///
324	/// ```
325	/// use reinhardt_urls::routers::{PathPattern, path};
326	/// use std::collections::HashMap;
327	///
328	/// let pattern = PathPattern::new(path!("/users/{id}/posts/{post_id}/")).unwrap();
329	///
330	/// let mut params = HashMap::new();
331	/// params.insert("id".to_string(), "123".to_string());
332	/// params.insert("post_id".to_string(), "456".to_string());
333	///
334	/// let url = pattern.reverse(&params).unwrap();
335	/// assert_eq!(url, "/users/123/posts/456/");
336	/// ```
337	pub fn reverse(&self, params: &HashMap<String, String>) -> Result<String, String> {
338		// Validate all required parameters are present
339		for param_name in &self.param_names {
340			if !params.contains_key(param_name) {
341				return Err(format!("Missing required parameter: {}", param_name));
342			}
343		}
344
345		// Validate parameter values against injection attacks
346		for (name, value) in params {
347			if !validate_reverse_param(value) {
348				return Err(format!(
349					"Invalid parameter value for '{}': contains dangerous characters",
350					name
351				));
352			}
353		}
354
355		// If no parameters, return normalized pattern as-is
356		if self.param_names.is_empty() {
357			return Ok(self.normalized_pattern.clone());
358		}
359
360		// Use Aho-Corasick if available, otherwise fallback to single-pass
361		match &self.aho_corasick {
362			Some(ac) => {
363				// Find all matches using Aho-Corasick on normalized pattern
364				let mut replacements = Vec::new();
365				for mat in ac.find_iter(&self.normalized_pattern) {
366					let param_name = &self.param_names[mat.pattern()];
367					// We already validated all params exist above
368					let value = params.get(param_name).unwrap();
369					replacements.push((mat.start(), mat.end(), value.clone()));
370				}
371
372				// Apply replacements from right to left to avoid position shifts
373				let mut result = self.normalized_pattern.clone();
374				for (start, end, value) in replacements.into_iter().rev() {
375					result.replace_range(start..end, &value);
376				}
377
378				Ok(result)
379			}
380			None => {
381				// Fallback: no parameters, just return normalized pattern
382				Ok(self.normalized_pattern.clone())
383			}
384		}
385	}
386}