use crate::Language;
use globset::{Glob, GlobSet, GlobSetBuilder};
use rayon::prelude::*;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;
#[derive(Debug, Clone, Default)]
pub struct WalkerConfig {
pub exclude_patterns: Vec<String>,
pub languages: Vec<Language>,
pub max_depth: usize,
pub follow_links: bool,
pub large_file_threshold: u64,
pub skip_large_files: bool,
}
pub fn build_glob_set(patterns: &[String]) -> Option<GlobSet> {
if patterns.is_empty() {
return None;
}
let mut builder = GlobSetBuilder::new();
for pattern in patterns {
if let Ok(glob) = Glob::new(pattern) {
builder.add(glob);
}
}
builder.build().ok()
}
fn is_excluded(path: &Path, glob_set: &Option<GlobSet>) -> bool {
if let Some(gs) = glob_set {
if gs.is_match(path) {
return true;
}
let path_str = path.to_string_lossy();
if let Some(stripped_str) = path_str.strip_prefix("./") {
let stripped = Path::new(stripped_str);
if gs.is_match(stripped) {
return true;
}
}
if let Some(name) = path.file_name() {
if gs.is_match(name) {
return true;
}
}
for component in path.components() {
if let std::path::Component::Normal(name) = component {
if gs.is_match(Path::new(name)) {
return true;
}
}
}
}
false
}
fn matches_language_filter(path: &Path, languages: &[Language]) -> bool {
if languages.is_empty() {
return true;
}
if let Some(lang) = Language::from_path(path) {
languages.contains(&lang)
} else {
false
}
}
fn check_file_size(path: &Path, threshold: u64) -> Option<String> {
if threshold == 0 {
return None;
}
if let Ok(metadata) = std::fs::metadata(path) {
let size = metadata.len();
if size > threshold {
let size_mb = size as f64 / 1024.0 / 1024.0;
return Some(format!(
"Large file ({:.1} MB): {}",
size_mb,
path.display()
));
}
}
None
}
pub fn walk_files(root: &Path, config: &WalkerConfig) -> Vec<PathBuf> {
let glob_set = build_glob_set(&config.exclude_patterns);
let mut walker = WalkDir::new(root).follow_links(config.follow_links);
if config.max_depth > 0 {
walker = walker.max_depth(config.max_depth);
}
walker
.into_iter()
.filter_entry(|e| {
!is_excluded(e.path(), &glob_set)
})
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| !is_excluded(e.path(), &glob_set))
.filter(|e| matches_language_filter(e.path(), &config.languages))
.map(|e| e.path().to_path_buf())
.collect()
}
pub fn walk_files_parallel<F, T>(root: &Path, config: &WalkerConfig, processor: F) -> Vec<T>
where
F: Fn(&Path) -> T + Send + Sync,
T: Send,
{
let files = walk_files(root, config);
files.par_iter().map(|path| processor(path)).collect()
}
pub fn walk_paths(paths: &[PathBuf], config: &WalkerConfig) -> (Vec<PathBuf>, Vec<String>) {
let glob_set = build_glob_set(&config.exclude_patterns);
let mut result = Vec::new();
let mut warnings = Vec::new();
for path in paths {
if path.is_file() {
if is_excluded(path, &glob_set) {
warnings.push(format!(
"Path '{}' is excluded by exclude patterns",
path.display()
));
} else if !matches_language_filter(path, &config.languages) {
warnings.push(format!(
"Path '{}' does not match language filter",
path.display()
));
} else {
if let Some(warning) = check_file_size(path, config.large_file_threshold) {
warnings.push(warning);
if config.skip_large_files {
continue; }
}
result.push(path.clone());
}
} else if path.is_dir() {
let dir_files = walk_files(path, config);
for file in dir_files {
if let Some(warning) = check_file_size(&file, config.large_file_threshold) {
warnings.push(warning);
if config.skip_large_files {
continue; }
}
result.push(file);
}
} else if !path.exists() {
warnings.push(format!("Path '{}' does not exist", path.display()));
}
}
(result, warnings)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_excluded() {
let patterns = vec!["*.log".to_string(), "node_modules/**".to_string()];
let glob_set = build_glob_set(&patterns);
assert!(is_excluded(Path::new("debug.log"), &glob_set));
assert!(is_excluded(
Path::new("node_modules/package/index.js"),
&glob_set
));
assert!(!is_excluded(Path::new("src/main.rs"), &glob_set));
}
#[test]
fn test_matches_language_filter() {
let languages = vec![Language::Rust, Language::Python];
assert!(matches_language_filter(
Path::new("src/main.rs"),
&languages
));
assert!(matches_language_filter(
Path::new("scripts/build.py"),
&languages
));
assert!(!matches_language_filter(Path::new("index.js"), &languages));
assert!(matches_language_filter(Path::new("index.js"), &[]));
}
}