use ignore::{WalkBuilder, gitignore::Gitignore};
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct WalkOptions {
pub file_type: Option<String>,
pub exclude_extensions: Vec<String>,
pub ignore_patterns: Vec<String>,
}
impl WalkOptions {
#[must_use]
pub fn from_file_type(file_type: Option<&str>) -> Self {
Self {
file_type: file_type.map(str::to_string),
..Self::default()
}
}
}
#[must_use]
pub fn collect_files(root: &Path, file_type: Option<&str>) -> Vec<PathBuf> {
collect_files_with_options(root, &WalkOptions::from_file_type(file_type))
}
#[must_use]
pub fn collect_files_with_options(root: &Path, options: &WalkOptions) -> Vec<PathBuf> {
let files = Arc::new(Mutex::new(Vec::new()));
let excluded_extensions = Arc::new(normalized_extensions(&options.exclude_extensions));
let ignore_matcher = build_ignore_matcher(root, &options.ignore_patterns).map(Arc::new);
let mut builder = WalkBuilder::new(root);
builder.hidden(true).git_ignore(true).git_global(true);
if let Some(ft) = options.file_type.as_deref() {
let mut types_builder = ignore::types::TypesBuilder::new();
types_builder.add_defaults();
types_builder.select(ft);
if let Ok(types) = types_builder.build() {
builder.types(types);
}
}
builder.build_parallel().run(|| {
let files = Arc::clone(&files);
let excluded_extensions = Arc::clone(&excluded_extensions);
let ignore_matcher = ignore_matcher.clone();
Box::new(move |entry| {
let Ok(entry) = entry else {
return ignore::WalkState::Continue;
};
let Some(file_type) = entry.file_type() else {
return ignore::WalkState::Continue;
};
let is_dir = file_type.is_dir();
if ignore_matcher
.as_ref()
.is_some_and(|matcher| is_ignored(matcher, entry.path(), is_dir))
{
return if is_dir {
ignore::WalkState::Skip
} else {
ignore::WalkState::Continue
};
}
if !file_type.is_file() {
return ignore::WalkState::Continue;
}
if has_excluded_extension(entry.path(), &excluded_extensions) {
return ignore::WalkState::Continue;
}
if let Some(name) = entry.path().file_name().and_then(|n| n.to_str())
&& matches!(
name,
"Cargo.lock"
| "package-lock.json"
| "yarn.lock"
| "pnpm-lock.yaml"
| "poetry.lock"
| "Gemfile.lock"
| "go.sum"
)
{
return ignore::WalkState::Continue;
}
if let Ok(mut files) = files.lock() {
files.push(entry.into_path());
}
ignore::WalkState::Continue
})
});
let mut files = Arc::try_unwrap(files)
.ok()
.and_then(|files| files.into_inner().ok())
.unwrap_or_default();
files.sort();
files
}
fn normalized_extensions(extensions: &[String]) -> HashSet<String> {
extensions
.iter()
.filter_map(|ext| {
let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
(!normalized.is_empty()).then_some(normalized)
})
.collect()
}
fn has_excluded_extension(path: &Path, excluded_extensions: &HashSet<String>) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.map(|ext| excluded_extensions.contains(&ext.to_ascii_lowercase()))
.unwrap_or(false)
}
fn build_ignore_matcher(root: &Path, patterns: &[String]) -> Option<Gitignore> {
if patterns.is_empty() {
return None;
}
let mut builder = ignore::gitignore::GitignoreBuilder::new(root);
for pattern in patterns {
if let Err(error) = builder.add_line(None, pattern) {
tracing::warn!(pattern, %error, "invalid ripvec ignore pattern; skipping");
}
}
builder.build().ok().filter(|matcher| !matcher.is_empty())
}
fn is_ignored(matcher: &Gitignore, path: &Path, is_dir: bool) -> bool {
matcher
.matched_path_or_any_parents(path, is_dir)
.is_ignore()
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::TempDir;
fn write_file(root: &Path, relative: &str) {
let path = root.join(relative);
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent).expect("create parent");
}
std::fs::write(path, "test").expect("write file");
}
fn collect_relative(root: &Path, options: &WalkOptions) -> Vec<String> {
collect_files_with_options(root, options)
.into_iter()
.map(|path| {
path.strip_prefix(root)
.expect("under root")
.to_string_lossy()
.replace('\\', "/")
})
.collect()
}
#[test]
fn excludes_extensions_case_insensitively() {
let dir = TempDir::new().expect("tempdir");
write_file(dir.path(), "src/main.rs");
write_file(dir.path(), "logs/events.JSONL");
write_file(dir.path(), "README.md");
let files = collect_relative(
dir.path(),
&WalkOptions {
exclude_extensions: vec!["jsonl".to_string(), ".md".to_string()],
..WalkOptions::default()
},
);
assert_eq!(files, ["src/main.rs"]);
}
#[test]
fn excludes_gitignore_style_patterns() {
let dir = TempDir::new().expect("tempdir");
write_file(dir.path(), "src/main.rs");
write_file(dir.path(), "generated/schema.rs");
write_file(dir.path(), "notes/keep.md");
write_file(dir.path(), "notes/drop.md");
let files = collect_relative(
dir.path(),
&WalkOptions {
ignore_patterns: vec![
"generated/".to_string(),
"*.md".to_string(),
"!notes/keep.md".to_string(),
],
..WalkOptions::default()
},
);
assert_eq!(files, ["notes/keep.md", "src/main.rs"]);
}
#[test]
fn relative_roots_with_ignore_patterns_do_not_panic() {
let dir = tempfile::Builder::new()
.prefix("ripvec-walk-test-")
.tempdir_in(".")
.expect("tempdir in current directory");
let root = PathBuf::from(dir.path().file_name().expect("tempdir file name"));
write_file(&root, "src/main.rs");
write_file(&root, "notes/drop.md");
let files = collect_relative(
&root,
&WalkOptions {
ignore_patterns: vec!["*.md".to_string()],
..WalkOptions::default()
},
);
assert_eq!(files, ["src/main.rs"]);
}
}