use crate::error::{CacheError, Result};
use regex::Regex;
#[allow(dead_code)]
pub const MAX_PATTERN_LENGTH: usize = 256;
#[allow(dead_code)]
pub const MAX_WILDCARDS: usize = 10;
#[allow(dead_code)]
pub fn compile_regex(pattern: &str) -> Result<regex::Regex> {
if pattern.len() > MAX_PATTERN_LENGTH {
return Err(CacheError::InvalidInput(format!(
"Regex pattern exceeds maximum length of {} bytes (got {})",
MAX_PATTERN_LENGTH,
pattern.len()
)));
}
let wildcard_count = pattern.bytes().filter(|&b| b == b'*' || b == b'+').count();
if wildcard_count > MAX_WILDCARDS {
return Err(CacheError::InvalidInput(format!(
"Regex pattern contains too many quantifiers ({} > {})",
wildcard_count, MAX_WILDCARDS
)));
}
let dangerous_patterns = [
r"\([^)]*\)\++", r"\([^)]*(\([^)]*\))+\)", ];
for dangerous in &dangerous_patterns {
if let Ok(dangerous_regex) = Regex::new(dangerous) {
if dangerous_regex.is_match(pattern) {
return Err(CacheError::InvalidInput(
"Regex pattern contains potentially dangerous quantifier pattern".to_string(),
));
}
}
}
Regex::new(pattern).map_err(|e| CacheError::InvalidInput(format!("Invalid regex pattern: {}", e)))
}
#[allow(dead_code)]
pub fn match_safe(regex: &Regex, input: &str) -> Result<bool> {
if input.len() > 1_000_000 {
return Err(CacheError::InvalidInput(
"Input string too long for regex matching".to_string(),
));
}
Ok(regex.is_match(input))
}
#[allow(dead_code)]
pub fn glob_to_regex(pattern: &str, double_star_allowed: bool) -> Result<String> {
if pattern.len() > MAX_PATTERN_LENGTH {
return Err(CacheError::InvalidInput(format!(
"Glob pattern exceeds maximum length of {} bytes (got {})",
MAX_PATTERN_LENGTH,
pattern.len()
)));
}
let single_star_count = pattern.bytes().filter(|&b| b == b'*').count();
if double_star_allowed {
let double_star_count = pattern.matches("**").count();
if single_star_count - (double_star_count * 2) > MAX_WILDCARDS {
return Err(CacheError::InvalidInput(format!(
"Glob pattern contains too many wildcards (max {})",
MAX_WILDCARDS
)));
}
} else if single_star_count > MAX_WILDCARDS {
return Err(CacheError::InvalidInput(format!(
"Glob pattern contains too many wildcards (max {})",
MAX_WILDCARDS
)));
}
let mut regex_pattern = String::with_capacity(pattern.len() * 2);
let mut chars = pattern.chars().peekable();
let mut in_escape = false;
while let Some(c) = chars.next() {
if in_escape {
regex_pattern.push_str(®ex::escape(&c.to_string()));
in_escape = false;
continue;
}
match c {
'\\' if !in_escape => {
if chars.peek() == Some(&'*') {
chars.next();
regex_pattern.push('*');
} else {
in_escape = true;
}
}
'*' => {
if double_star_allowed && chars.clone().next() == Some('*') {
chars.next();
if chars.peek() == Some(&'/') {
chars.next();
regex_pattern.push_str("(?:.*/)?");
} else {
regex_pattern.push_str(".*");
}
} else {
regex_pattern.push_str("[^/]*");
}
}
'?' => regex_pattern.push('.'),
'[' => {
return Err(CacheError::InvalidInput(
"Character class '[...]' not allowed in glob patterns".to_string(),
));
}
'{' | '}' => {
return Err(CacheError::InvalidInput(
"Brace expansion not allowed in glob patterns".to_string(),
));
}
c => regex_pattern.push_str(®ex::escape(&c.to_string())),
}
}
Ok(format!("^{}$", regex_pattern))
}
#[allow(dead_code)]
pub fn compile_glob_pattern(pattern: &str, double_star_allowed: bool) -> Result<Regex> {
let regex_pattern = glob_to_regex(pattern, double_star_allowed)?;
compile_regex(®ex_pattern)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_compile_regex_valid_pattern() {
let result = compile_regex(".*");
assert!(result.is_ok());
}
#[test]
fn test_compile_regex_invalid_pattern() {
let result = compile_regex("[invalid");
assert!(result.is_err());
}
#[test]
fn test_compile_regex_dangerous_pattern() {
let result = compile_regex(r"(a+)+$");
assert!(result.is_err());
}
#[test]
fn test_compile_regex_too_long() {
let long_pattern = "a".repeat(MAX_PATTERN_LENGTH + 1);
let result = compile_regex(&long_pattern);
assert!(result.is_err());
}
#[test]
fn test_compile_regex_too_many_quantifiers() {
let pattern = "*".repeat(MAX_WILDCARDS + 1);
let result = compile_regex(&pattern);
assert!(result.is_err());
}
#[test]
fn test_glob_to_regex_simple() {
let result = glob_to_regex("*.txt", false);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("file.md"));
}
#[test]
fn test_glob_to_regex_disallowed_chars() {
let result = glob_to_regex("[abc]", false);
assert!(result.is_err());
let result = glob_to_regex("{a,b}", false);
assert!(result.is_err());
}
#[test]
fn test_glob_to_regex_too_long() {
let long_pattern = "a".repeat(MAX_PATTERN_LENGTH + 1);
let result = glob_to_regex(&long_pattern, false);
assert!(result.is_err());
}
#[test]
fn test_glob_to_regex_too_many_wildcards() {
let pattern = "*".repeat(MAX_WILDCARDS + 1);
let result = glob_to_regex(&pattern, false);
assert!(result.is_err());
}
#[test]
fn test_match_safe_valid() {
let regex = Regex::new(".*").unwrap();
let result = match_safe(®ex, "test");
assert!(result.is_ok());
assert!(result.unwrap());
}
#[test]
fn test_match_safe_too_long_input() {
let regex = Regex::new(".*").unwrap();
let long_input = "a".repeat(1_000_001);
let result = match_safe(®ex, &long_input);
assert!(result.is_err());
}
#[test]
fn test_compile_glob_pattern() {
let result = compile_glob_pattern("*.rs", false);
assert!(result.is_ok());
let regex = result.unwrap();
assert!(regex.is_match("test.rs"));
assert!(!regex.is_match("test.txt"));
}
#[test]
fn test_glob_to_regex_double_star_allowed() {
let result = glob_to_regex("**/*.rs", true);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("test.rs"));
assert!(regex.is_match("dir/test.rs"));
assert!(regex.is_match("dir/subdir/test.rs"));
}
#[test]
fn test_glob_to_regex_double_star_too_many_wildcards() {
let pattern = "*a".repeat(MAX_WILDCARDS + 1);
let result = glob_to_regex(&pattern, true);
assert!(result.is_err());
}
#[test]
fn test_glob_to_regex_double_star_no_slash() {
let result = glob_to_regex("**file", true);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("dir/file"));
assert!(regex.is_match("file"));
}
#[test]
fn test_glob_to_regex_double_star_with_slash() {
let result = glob_to_regex("**/file", true);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("file"));
assert!(regex.is_match("dir/file"));
}
#[test]
fn test_glob_to_regex_escape_character() {
let result = glob_to_regex("\\a", false);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("a"));
}
#[test]
fn test_glob_to_regex_escaped_star() {
let result = glob_to_regex("\\*", false);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
assert_eq!(regex_pattern, "^*$");
}
#[test]
fn test_glob_to_regex_backslash_at_end() {
let result = glob_to_regex("test\\", false);
assert!(result.is_ok());
}
#[test]
fn test_glob_to_regex_question_mark() {
let result = glob_to_regex("?.txt", false);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("a.txt"));
assert!(!regex.is_match("ab.txt"));
}
#[test]
fn test_glob_to_regex_mixed_wildcards() {
let result = glob_to_regex("?*.txt", false);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("a.txt"));
assert!(regex.is_match("ab.txt"));
assert!(!regex.is_match(".txt"));
}
#[test]
fn test_compile_regex_empty_pattern() {
let result = compile_regex("");
assert!(result.is_ok());
}
#[test]
fn test_compile_regex_exact_length_limit() {
let pattern = "a".repeat(MAX_PATTERN_LENGTH);
let result = compile_regex(&pattern);
assert!(result.is_ok());
}
#[test]
fn test_compile_regex_exact_quantifier_limit() {
let pattern = "a*".repeat(MAX_WILDCARDS);
let result = compile_regex(&pattern);
assert!(result.is_ok());
}
#[test]
fn test_compile_regex_nested_parentheses_quantifier() {
let result = compile_regex(r"((a+)+)");
assert!(result.is_err());
}
#[test]
fn test_match_safe_exact_limit() {
let regex = Regex::new(".*").unwrap();
let input = "a".repeat(1_000_000);
let result = match_safe(®ex, &input);
assert!(result.is_ok());
}
#[test]
fn test_match_safe_no_match() {
let regex = Regex::new("^b+$").unwrap();
let result = match_safe(®ex, "aaa");
assert!(result.is_ok());
assert!(!result.unwrap());
}
#[test]
fn test_compile_glob_pattern_double_star() {
let result = compile_glob_pattern("**/*.rs", true);
assert!(result.is_ok());
let regex = result.unwrap();
assert!(regex.is_match("test.rs"));
assert!(regex.is_match("dir/test.rs"));
}
#[test]
fn test_compile_glob_pattern_question_mark() {
let result = compile_glob_pattern("?.txt", false);
assert!(result.is_ok());
let regex = result.unwrap();
assert!(regex.is_match("a.txt"));
}
#[test]
fn test_glob_to_regex_single_star_no_slash_match() {
let result = glob_to_regex("*.txt", false);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("file.txt"));
assert!(!regex.is_match("dir/file.txt"));
}
#[test]
fn test_glob_to_regex_regular_character() {
let result = glob_to_regex("test.txt", false);
assert!(result.is_ok());
let regex_pattern = result.unwrap();
let regex = Regex::new(®ex_pattern).unwrap();
assert!(regex.is_match("test.txt"));
}
#[test]
fn test_glob_to_regex_double_star_allowed_exact_limit() {
let pattern = "**".repeat(MAX_WILDCARDS);
let result = glob_to_regex(&pattern, true);
assert!(result.is_ok());
}
}