use regex::Regex;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
lazy_static::lazy_static! {
static ref PATTERN_CACHE: Mutex<HashMap<String, Regex>> = Mutex::new(HashMap::new());
}
pub fn translate_pattern(pattern: &str) -> String {
let mut regex_pattern = String::new();
let mut i = 0;
let chars: Vec<char> = pattern.chars().collect();
let n = chars.len();
while i < n {
let c = chars[i];
match c {
'*' => {
if i + 1 < n && chars[i + 1] == '*' {
if i + 2 < n && chars[i + 2] == '/' {
regex_pattern.push_str("(?:[^/]+/)*");
i += 3;
} else if i + 2 == n {
regex_pattern.push_str(".*");
i += 2;
} else {
regex_pattern.push_str(".*");
i += 2;
}
} else {
regex_pattern.push_str("[^/]*");
i += 1;
}
}
'?' => {
regex_pattern.push_str("[^/]");
i += 1;
}
'[' => {
let mut j = i + 1;
if j < n && (chars[j] == '!' || chars[j] == '^') {
j += 1;
}
if j < n && chars[j] == ']' {
j += 1;
}
while j < n && chars[j] != ']' {
j += 1;
}
if j >= n {
regex_pattern.push_str("\\[");
i += 1;
} else {
let mut class_content = String::new();
let mut k = i + 1;
if k < n && (chars[k] == '!' || chars[k] == '^') {
class_content.push('^');
k += 1;
}
while k < j {
let ch = chars[k];
if ch == '\\' && k + 1 < j {
class_content.push('\\');
class_content.push(chars[k + 1]);
k += 2;
} else {
class_content.push(ch);
k += 1;
}
}
regex_pattern.push('[');
regex_pattern.push_str(&class_content);
regex_pattern.push(']');
i = j + 1;
}
}
_ => {
match c {
'\\' | '.' | '^' | '$' | '+' | '{' | '}' | '|' | '(' | ')' => {
regex_pattern.push('\\');
regex_pattern.push(c);
}
_ => {
regex_pattern.push(c);
}
}
i += 1;
}
}
}
format!("^{}$", regex_pattern)
}
pub fn compile_pattern(pattern: &str) -> Result<Regex, regex::Error> {
let mut cache = PATTERN_CACHE.lock().unwrap();
if let Some(regex) = cache.get(pattern) {
return Ok(regex.clone());
}
let regex_pattern = translate_pattern(pattern);
let regex = Regex::new(®ex_pattern)?;
cache.insert(pattern.to_string(), regex.clone());
Ok(regex)
}
pub fn pattern_match(name: &str, pattern: &str) -> Result<bool, regex::Error> {
let regex = compile_pattern(pattern)?;
Ok(regex.is_match(name))
}
pub fn pattern_filter(names: &[String], pattern: &str) -> Result<Vec<String>, regex::Error> {
let regex = compile_pattern(pattern)?;
Ok(names
.iter()
.filter(|name| regex.is_match(name))
.cloned()
.collect())
}
pub fn normalize_path(path: &Path) -> String {
path.to_string_lossy().replace('\\', "/")
}
pub fn get_matching_files<P: AsRef<Path>>(
dirname: P,
include_patterns: &[String],
exclude_patterns: &[String],
) -> Result<Vec<PathBuf>, Box<dyn std::error::Error>> {
let dirname = dirname.as_ref().canonicalize()?;
let include_patterns = if include_patterns.is_empty() {
vec!["**".to_string()]
} else {
include_patterns.to_vec()
};
let mut include_regexes = Vec::new();
for pattern in &include_patterns {
include_regexes.push(compile_pattern(pattern)?);
}
let mut exclude_regexes = Vec::new();
for pattern in exclude_patterns {
exclude_regexes.push(compile_pattern(pattern)?);
}
let mut matched_files = Vec::new();
fn walk_dir(
dir: &Path,
base_dir: &Path,
include_regexes: &[Regex],
exclude_regexes: &[Regex],
matched_files: &mut Vec<PathBuf>,
) -> Result<(), Box<dyn std::error::Error>> {
if !dir.is_dir() {
return Ok(());
}
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
walk_dir(
&path,
base_dir,
include_regexes,
exclude_regexes,
matched_files,
)?;
} else if path.is_file() {
let relative_path = path.strip_prefix(base_dir)?;
let normalized_path = normalize_path(relative_path);
let included = include_regexes
.iter()
.any(|regex| regex.is_match(&normalized_path));
if included {
let excluded = exclude_regexes
.iter()
.any(|regex| regex.is_match(&normalized_path));
if !excluded {
matched_files.push(path);
}
}
}
}
Ok(())
}
walk_dir(
&dirname,
&dirname,
&include_regexes,
&exclude_regexes,
&mut matched_files,
)?;
matched_files.sort();
Ok(matched_files)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_translate_pattern() {
assert_eq!(translate_pattern("*.rst"), "^[^/]*\\.rst$");
assert_eq!(translate_pattern("**"), "^.*$");
assert_eq!(
translate_pattern("**/index.rst"),
"^(?:[^/]+/)*index\\.rst$"
);
assert_eq!(translate_pattern("docs/*.rst"), "^docs/[^/]*\\.rst$");
assert_eq!(translate_pattern("[abc].rst"), "^[abc]\\.rst$");
assert_eq!(translate_pattern("[!abc].rst"), "^[^abc]\\.rst$");
}
#[test]
fn test_pattern_match() {
assert!(pattern_match("index.rst", "*.rst").unwrap());
assert!(pattern_match("docs/index.rst", "**/*.rst").unwrap());
assert!(pattern_match("docs/api/module.rst", "**/api/*.rst").unwrap());
assert!(!pattern_match("_build/index.html", "*.rst").unwrap());
assert!(pattern_match("_build/index.html", "**").unwrap());
assert!(pattern_match("a.rst", "[abc].rst").unwrap());
assert!(!pattern_match("d.rst", "[abc].rst").unwrap());
assert!(!pattern_match("a.rst", "[!abc].rst").unwrap());
assert!(pattern_match("d.rst", "[!abc].rst").unwrap());
}
#[test]
fn test_get_matching_files() {
let temp_dir = TempDir::new().unwrap();
let base_path = temp_dir.path();
fs::create_dir_all(base_path.join("docs")).unwrap();
fs::create_dir_all(base_path.join("_build")).unwrap();
fs::write(base_path.join("index.rst"), "content").unwrap();
fs::write(base_path.join("docs/api.rst"), "content").unwrap();
fs::write(base_path.join("_build/index.html"), "content").unwrap();
fs::write(base_path.join("README.md"), "content").unwrap();
let files = get_matching_files(base_path, &["**/*.rst".to_string()], &[]).unwrap();
assert_eq!(files.len(), 2);
assert!(files.iter().any(|p| p.file_name().unwrap() == "index.rst"));
assert!(files.iter().any(|p| p.file_name().unwrap() == "api.rst"));
let files =
get_matching_files(base_path, &["**".to_string()], &["_build/**".to_string()]).unwrap();
assert!(!files.iter().any(|p| p.to_string_lossy().contains("_build")));
let files = get_matching_files(
base_path,
&["**/*.rst".to_string()],
&["docs/**".to_string()],
)
.unwrap();
assert_eq!(files.len(), 1);
assert!(files.iter().any(|p| p.file_name().unwrap() == "index.rst"));
assert!(!files.iter().any(|p| p.file_name().unwrap() == "api.rst"));
}
}