use glob::Pattern;
use std::path::{Path, PathBuf};
use walkdir::{DirEntry, WalkDir};
pub struct FileWalker {
root: PathBuf,
include_patterns: Vec<Pattern>,
exclude_patterns: Vec<Pattern>,
}
impl FileWalker {
pub fn new(root: PathBuf, include: Vec<String>, exclude: Vec<String>) -> Self {
let include_patterns = include
.iter()
.filter_map(|p| Pattern::new(p).ok())
.collect();
let exclude_patterns = exclude
.iter()
.filter_map(|p| Pattern::new(p).ok())
.collect();
Self {
root,
include_patterns,
exclude_patterns,
}
}
pub fn root(&self) -> &Path {
&self.root
}
pub fn walk(&self) -> impl Iterator<Item = PathBuf> + '_ {
WalkDir::new(&self.root)
.follow_links(false)
.into_iter()
.filter_entry(|e| !self.is_excluded_dir(e))
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| self.should_include(e.path()))
.map(|e| e.path().to_path_buf())
}
fn is_excluded_dir(&self, entry: &DirEntry) -> bool {
if !entry.file_type().is_dir() {
return false;
}
let path = entry.path();
let relative = path.strip_prefix(&self.root).unwrap_or(path);
let relative_str = relative.to_string_lossy();
for pattern in &self.exclude_patterns {
if pattern.matches(&relative_str) || pattern.matches(&format!("{}/", relative_str)) {
return true;
}
let pattern_str = pattern.as_str();
if pattern_str.contains("**") {
if let Some(dir_name) = extract_dir_name(pattern_str) {
if let Some(entry_name) = entry.file_name().to_str() {
if entry_name == dir_name {
return true;
}
}
}
}
}
false
}
fn should_include(&self, path: &Path) -> bool {
let relative = path.strip_prefix(&self.root).unwrap_or(path);
let relative_str = relative.to_string_lossy();
for pattern in &self.exclude_patterns {
if pattern.matches(&relative_str) {
return false;
}
}
if self.include_patterns.is_empty() {
return true;
}
for pattern in &self.include_patterns {
if pattern.matches(&relative_str) {
return true;
}
}
false
}
pub fn should_index(&self, path: &Path) -> bool {
self.should_include(path)
}
}
fn extract_dir_name(pattern: &str) -> Option<&str> {
let trimmed = pattern.trim_start_matches("**/").trim_end_matches("/**");
if !trimmed.contains('/') && !trimmed.contains('*') {
Some(trimmed)
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn test_file_walker() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::write(root.join("test.rs"), "fn main() {}").unwrap();
fs::write(root.join("test.txt"), "hello").unwrap();
fs::write(root.join("readme.md"), "# Test").unwrap();
let walker = FileWalker::new(
root.to_path_buf(),
vec!["**/*.rs".to_string(), "**/*.md".to_string()],
vec![],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 2);
}
#[test]
fn test_exclusion() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("node_modules")).unwrap();
fs::write(root.join("src/main.rs"), "fn main() {}").unwrap();
fs::write(root.join("node_modules/pkg.js"), "module").unwrap();
let walker = FileWalker::new(
root.to_path_buf(),
vec!["**/*.rs".to_string(), "**/*.js".to_string()],
vec!["**/node_modules/**".to_string()],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 1);
assert!(files[0].to_string_lossy().contains("main.rs"));
}
#[test]
fn test_extract_dir_name() {
assert_eq!(extract_dir_name("**/node_modules/**"), Some("node_modules"));
assert_eq!(extract_dir_name("**/target/**"), Some("target"));
assert_eq!(extract_dir_name("**/*.rs"), None);
assert_eq!(extract_dir_name("**/src/test/**"), None);
}
#[test]
fn test_empty_directory() {
let dir = tempdir().unwrap();
let walker = FileWalker::new(
dir.path().to_path_buf(),
vec!["**/*.rs".to_string()],
vec![],
);
let files: Vec<_> = walker.walk().collect();
assert!(files.is_empty());
}
#[test]
fn test_nested_directories() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("a/b/c/d")).unwrap();
fs::write(root.join("a/file1.rs"), "// level 1").unwrap();
fs::write(root.join("a/b/file2.rs"), "// level 2").unwrap();
fs::write(root.join("a/b/c/file3.rs"), "// level 3").unwrap();
fs::write(root.join("a/b/c/d/file4.rs"), "// level 4").unwrap();
let walker = FileWalker::new(root.to_path_buf(), vec!["**/*.rs".to_string()], vec![]);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 4);
}
#[test]
fn test_multiple_exclusion_patterns() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::create_dir_all(root.join("src")).unwrap();
fs::create_dir_all(root.join("node_modules")).unwrap();
fs::create_dir_all(root.join("target/debug")).unwrap();
fs::create_dir_all(root.join(".git")).unwrap();
fs::write(root.join("src/main.rs"), "code").unwrap();
fs::write(root.join("node_modules/pkg.js"), "module").unwrap();
fs::write(root.join("target/debug/binary"), "binary").unwrap();
fs::write(root.join(".git/config"), "gitconfig").unwrap();
let walker = FileWalker::new(
root.to_path_buf(),
vec!["**/*".to_string()],
vec![
"**/node_modules/**".to_string(),
"**/target/**".to_string(),
"**/.git/**".to_string(),
],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 1);
assert!(files[0].to_string_lossy().contains("main.rs"));
}
#[test]
fn test_no_include_patterns_includes_all() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::write(root.join("file1.rs"), "rust").unwrap();
fs::write(root.join("file2.txt"), "text").unwrap();
fs::write(root.join("file3.md"), "markdown").unwrap();
let walker = FileWalker::new(
root.to_path_buf(),
vec![], vec![],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 3);
}
#[test]
fn test_should_index() {
let dir = tempdir().unwrap();
let root = dir.path();
let walker = FileWalker::new(
root.to_path_buf(),
vec!["**/*.rs".to_string()],
vec!["**/excluded/**".to_string()],
);
assert!(walker.should_index(&root.join("src/main.rs")));
assert!(!walker.should_index(&root.join("src/main.txt")));
assert!(!walker.should_index(&root.join("excluded/file.rs")));
}
#[test]
fn test_root_accessor() {
let dir = tempdir().unwrap();
let walker = FileWalker::new(dir.path().to_path_buf(), vec![], vec![]);
assert_eq!(walker.root(), dir.path());
}
#[test]
fn test_extension_case_sensitivity() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::write(root.join("file_upper.RS"), "uppercase").unwrap();
fs::write(root.join("file_lower.rs"), "lowercase").unwrap();
fs::write(root.join("file_mixed.Rs"), "mixed").unwrap();
let walker = FileWalker::new(root.to_path_buf(), vec!["**/*.rs".to_string()], vec![]);
let files: Vec<_> = walker.walk().collect();
assert!(
files.len() >= 1,
"Expected at least 1 file, got {}",
files.len()
);
}
#[test]
fn test_files_without_extension() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::write(root.join("Makefile"), "all:").unwrap();
fs::write(root.join("Dockerfile"), "FROM").unwrap();
fs::write(root.join("README"), "readme").unwrap();
let walker = FileWalker::new(
root.to_path_buf(),
vec!["**/Makefile".to_string(), "**/Dockerfile".to_string()],
vec![],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 2);
}
#[test]
fn test_hidden_files() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::write(root.join(".hidden"), "hidden").unwrap();
fs::write(root.join(".gitignore"), "gitignore").unwrap();
fs::write(root.join("visible.txt"), "visible").unwrap();
let walker = FileWalker::new(root.to_path_buf(), vec!["**/*".to_string()], vec![]);
let files: Vec<_> = walker.walk().collect();
assert!(files.len() >= 1);
}
#[test]
fn test_multiple_extensions_pattern() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::write(root.join("file.ts"), "typescript").unwrap();
fs::write(root.join("file.tsx"), "tsx").unwrap();
fs::write(root.join("file.js"), "javascript").unwrap();
fs::write(root.join("file.jsx"), "jsx").unwrap();
let walker = FileWalker::new(
root.to_path_buf(),
vec!["**/*.ts".to_string(), "**/*.tsx".to_string()],
vec![],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 2);
}
#[test]
fn test_specific_file_exclusion() {
let dir = tempdir().unwrap();
let root = dir.path();
fs::write(root.join("keep.rs"), "keep").unwrap();
fs::write(root.join("exclude.rs"), "exclude").unwrap();
let walker = FileWalker::new(
root.to_path_buf(),
vec!["**/*.rs".to_string()],
vec!["**/exclude.rs".to_string()],
);
let files: Vec<_> = walker.walk().collect();
assert_eq!(files.len(), 1);
assert!(files[0].to_string_lossy().contains("keep.rs"));
}
#[test]
fn test_walks_are_deterministic() {
let dir = tempdir().unwrap();
let root = dir.path();
for i in 0..5 {
fs::write(root.join(format!("file{}.rs", i)), "content").unwrap();
}
let walker = FileWalker::new(root.to_path_buf(), vec!["**/*.rs".to_string()], vec![]);
let files1: Vec<_> = walker.walk().collect();
let files2: Vec<_> = walker.walk().collect();
assert_eq!(files1.len(), files2.len());
}
}