use ignore::overrides::OverrideBuilder;
use ignore::types::TypesBuilder;
use ignore::WalkBuilder;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, LazyLock, Mutex};
use super::searcher;
use super::types::FileMatch;
const DEFAULT_SKIP_DIRS: &[&str] = &[
".git",
"node_modules",
"__pycache__",
".tox",
".mypy_cache",
".pytest_cache",
"dist",
"build",
".eggs",
"venv",
".venv",
"target",
".cargo",
".ruff_cache",
];
static DEFAULT_SKIP_SET: LazyLock<HashSet<&'static str>> =
LazyLock::new(|| DEFAULT_SKIP_DIRS.iter().copied().collect());
fn build_walker(
source_dirs: &[String],
glob_pattern: &str,
type_filter: Option<&str>,
skip_dirs: Option<&[String]>,
respect_gitignore: bool,
) -> Result<WalkBuilder, String> {
if source_dirs.is_empty() {
return Err("No source directories provided.".into());
}
let custom_skip: Option<HashSet<String>> =
skip_dirs.map(|dirs| dirs.iter().map(|s| s.to_string()).collect());
let mut builder = WalkBuilder::new(&source_dirs[0]);
for dir in &source_dirs[1..] {
builder.add(dir);
}
builder.hidden(false);
builder.git_ignore(respect_gitignore);
builder.git_global(respect_gitignore);
builder.git_exclude(respect_gitignore);
if let Some(type_name) = type_filter {
let mut types_builder = TypesBuilder::new();
types_builder.add_defaults();
types_builder.select(type_name);
let types = types_builder
.build()
.map_err(|e| format!("Unknown file type '{}': {}", type_name, e))?;
builder.types(types);
}
if glob_pattern != "*" {
let mut overrides = OverrideBuilder::new(&source_dirs[0]);
overrides
.add(glob_pattern)
.map_err(|e| format!("Invalid glob '{}': {}", glob_pattern, e))?;
let built = overrides
.build()
.map_err(|e| format!("Glob build error: {}", e))?;
builder.overrides(built);
}
builder.filter_entry(move |entry| {
if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
if let Some(name) = entry.file_name().to_str() {
return match &custom_skip {
Some(set) => !set.contains(name),
None => !DEFAULT_SKIP_SET.contains(name),
};
}
}
true
});
Ok(builder)
}
pub fn walk_sequential(
source_dirs: &[String],
glob_pattern: &str,
type_filter: Option<&str>,
skip_dirs: Option<&[String]>,
respect_gitignore: bool,
) -> Result<Vec<PathBuf>, String> {
let mut builder = build_walker(
source_dirs,
glob_pattern,
type_filter,
skip_dirs,
respect_gitignore,
)?;
let walker = builder.sort_by_file_path(|a, b| a.cmp(b)).build();
let mut paths = Vec::new();
for entry in walker.flatten() {
if entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
paths.push(entry.into_path());
}
}
Ok(paths)
}
#[allow(clippy::too_many_arguments)]
pub fn walk_and_search_parallel(
source_dirs: &[String],
glob_pattern: &str,
type_filter: Option<&str>,
skip_dirs: Option<&[String]>,
respect_gitignore: bool,
matcher: &grep_regex::RegexMatcher,
context_before: usize,
context_after: usize,
multiline: bool,
match_limit: usize,
) -> Result<Vec<FileMatch>, String> {
let builder = build_walker(
source_dirs,
glob_pattern,
type_filter,
skip_dirs,
respect_gitignore,
)?;
let walker = builder.build_parallel();
let all_batches: Arc<Mutex<Vec<Vec<FileMatch>>>> = Arc::new(Mutex::new(Vec::new()));
let total_count = Arc::new(AtomicUsize::new(0));
let has_context = context_before > 0 || context_after > 0;
walker.run(|| {
let all_batches = Arc::clone(&all_batches);
let total_count = Arc::clone(&total_count);
let mut thread_searcher = searcher::build_searcher(
context_before,
context_after,
multiline,
true, );
let mut thread_sink = searcher::CollectSink::new(has_context);
struct FlushGuard {
matches: Vec<FileMatch>,
target: Arc<Mutex<Vec<Vec<FileMatch>>>>,
}
impl Drop for FlushGuard {
fn drop(&mut self) {
if !self.matches.is_empty() {
self.target
.lock()
.unwrap()
.push(std::mem::take(&mut self.matches));
}
}
}
let mut guard = FlushGuard {
matches: Vec::new(),
target: Arc::clone(&all_batches),
};
Box::new(move |entry| {
if match_limit > 0 && total_count.load(Ordering::Relaxed) >= match_limit {
return ignore::WalkState::Quit;
}
let entry = match entry {
Ok(e) => e,
Err(_) => return ignore::WalkState::Continue,
};
if !entry.file_type().map(|t| t.is_file()).unwrap_or(false) {
return ignore::WalkState::Continue;
}
thread_sink.clear();
if let Some(fm) = searcher::search_file(
entry.path(),
matcher,
&mut thread_searcher,
&mut thread_sink,
) {
let new_total =
total_count.fetch_add(fm.match_count, Ordering::Relaxed) + fm.match_count;
guard.matches.push(fm);
if match_limit > 0 && new_total >= match_limit {
return ignore::WalkState::Quit;
}
}
ignore::WalkState::Continue
})
});
let batches = Arc::try_unwrap(all_batches).unwrap().into_inner().unwrap();
let total_len: usize = batches.iter().map(|b| b.len()).sum();
let mut matches = Vec::with_capacity(total_len);
for batch in batches {
matches.extend(batch);
}
matches.sort_by(|a, b| a.path.cmp(&b.path));
Ok(matches)
}
pub fn relativize(path: &Path, relative_to: Option<&Path>, source_dir: &Path) -> String {
let base = relative_to.unwrap_or(source_dir);
path.strip_prefix(base)
.unwrap_or(path)
.to_string_lossy()
.to_string()
}