1use ignore::{WalkBuilder, gitignore::Gitignore};
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, Mutex};
11
12#[derive(Debug, Clone, Default, PartialEq, Eq)]
14pub struct WalkOptions {
15 pub file_type: Option<String>,
17 pub exclude_extensions: Vec<String>,
19 pub ignore_patterns: Vec<String>,
21}
22
23impl WalkOptions {
24 #[must_use]
25 pub fn from_file_type(file_type: Option<&str>) -> Self {
26 Self {
27 file_type: file_type.map(str::to_string),
28 ..Self::default()
29 }
30 }
31}
32
33#[must_use]
45pub fn collect_files(root: &Path, file_type: Option<&str>) -> Vec<PathBuf> {
46 collect_files_with_options(root, &WalkOptions::from_file_type(file_type))
47}
48
49#[must_use]
52pub fn collect_files_with_options(root: &Path, options: &WalkOptions) -> Vec<PathBuf> {
53 let files = Arc::new(Mutex::new(Vec::new()));
54 let excluded_extensions = Arc::new(normalized_extensions(&options.exclude_extensions));
55 let ignore_matcher = build_ignore_matcher(root, &options.ignore_patterns).map(Arc::new);
56
57 let mut builder = WalkBuilder::new(root);
58 builder.hidden(true).git_ignore(true).git_global(true);
59
60 if let Some(ft) = options.file_type.as_deref() {
61 let mut types_builder = ignore::types::TypesBuilder::new();
62 types_builder.add_defaults();
63 types_builder.select(ft);
64 if let Ok(types) = types_builder.build() {
65 builder.types(types);
66 }
67 }
68
69 builder.build_parallel().run(|| {
70 let files = Arc::clone(&files);
71 let excluded_extensions = Arc::clone(&excluded_extensions);
72 let ignore_matcher = ignore_matcher.clone();
73 Box::new(move |entry| {
74 let Ok(entry) = entry else {
75 return ignore::WalkState::Continue;
76 };
77 let Some(file_type) = entry.file_type() else {
78 return ignore::WalkState::Continue;
79 };
80 let is_dir = file_type.is_dir();
81 if ignore_matcher
82 .as_ref()
83 .is_some_and(|matcher| is_ignored(matcher, entry.path(), is_dir))
84 {
85 return if is_dir {
86 ignore::WalkState::Skip
87 } else {
88 ignore::WalkState::Continue
89 };
90 }
91 if !file_type.is_file() {
92 return ignore::WalkState::Continue;
93 }
94 if has_excluded_extension(entry.path(), &excluded_extensions) {
95 return ignore::WalkState::Continue;
96 }
97 if let Some(name) = entry.path().file_name().and_then(|n| n.to_str())
99 && matches!(
100 name,
101 "Cargo.lock"
102 | "package-lock.json"
103 | "yarn.lock"
104 | "pnpm-lock.yaml"
105 | "poetry.lock"
106 | "Gemfile.lock"
107 | "go.sum"
108 )
109 {
110 return ignore::WalkState::Continue;
111 }
112 if let Ok(mut files) = files.lock() {
113 files.push(entry.into_path());
114 }
115 ignore::WalkState::Continue
116 })
117 });
118
119 let mut files = Arc::try_unwrap(files)
120 .ok()
121 .and_then(|files| files.into_inner().ok())
122 .unwrap_or_default();
123 files.sort();
124 files
125}
126
127fn normalized_extensions(extensions: &[String]) -> HashSet<String> {
128 extensions
129 .iter()
130 .filter_map(|ext| {
131 let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
132 (!normalized.is_empty()).then_some(normalized)
133 })
134 .collect()
135}
136
137fn has_excluded_extension(path: &Path, excluded_extensions: &HashSet<String>) -> bool {
138 path.extension()
139 .and_then(|ext| ext.to_str())
140 .map(|ext| excluded_extensions.contains(&ext.to_ascii_lowercase()))
141 .unwrap_or(false)
142}
143
144fn build_ignore_matcher(root: &Path, patterns: &[String]) -> Option<Gitignore> {
145 if patterns.is_empty() {
146 return None;
147 }
148 let mut builder = ignore::gitignore::GitignoreBuilder::new(root);
149 for pattern in patterns {
150 if let Err(error) = builder.add_line(None, pattern) {
151 tracing::warn!(pattern, %error, "invalid ripvec ignore pattern; skipping");
152 }
153 }
154 builder.build().ok().filter(|matcher| !matcher.is_empty())
155}
156
157fn is_ignored(matcher: &Gitignore, path: &Path, is_dir: bool) -> bool {
158 matcher
159 .matched_path_or_any_parents(path, is_dir)
160 .is_ignore()
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166 use tempfile::TempDir;
167
168 fn write_file(root: &Path, relative: &str) {
169 let path = root.join(relative);
170 if let Some(parent) = path.parent() {
171 std::fs::create_dir_all(parent).expect("create parent");
172 }
173 std::fs::write(path, "test").expect("write file");
174 }
175
176 fn collect_relative(root: &Path, options: &WalkOptions) -> Vec<String> {
177 collect_files_with_options(root, options)
178 .into_iter()
179 .map(|path| {
180 path.strip_prefix(root)
181 .expect("under root")
182 .to_string_lossy()
183 .replace('\\', "/")
184 })
185 .collect()
186 }
187
188 #[test]
189 fn excludes_extensions_case_insensitively() {
190 let dir = TempDir::new().expect("tempdir");
191 write_file(dir.path(), "src/main.rs");
192 write_file(dir.path(), "logs/events.JSONL");
193 write_file(dir.path(), "README.md");
194
195 let files = collect_relative(
196 dir.path(),
197 &WalkOptions {
198 exclude_extensions: vec!["jsonl".to_string(), ".md".to_string()],
199 ..WalkOptions::default()
200 },
201 );
202
203 assert_eq!(files, ["src/main.rs"]);
204 }
205
206 #[test]
207 fn excludes_gitignore_style_patterns() {
208 let dir = TempDir::new().expect("tempdir");
209 write_file(dir.path(), "src/main.rs");
210 write_file(dir.path(), "generated/schema.rs");
211 write_file(dir.path(), "notes/keep.md");
212 write_file(dir.path(), "notes/drop.md");
213
214 let files = collect_relative(
215 dir.path(),
216 &WalkOptions {
217 ignore_patterns: vec![
218 "generated/".to_string(),
219 "*.md".to_string(),
220 "!notes/keep.md".to_string(),
221 ],
222 ..WalkOptions::default()
223 },
224 );
225
226 assert_eq!(files, ["notes/keep.md", "src/main.rs"]);
227 }
228
229 #[test]
230 fn relative_roots_with_ignore_patterns_do_not_panic() {
231 let dir = tempfile::Builder::new()
232 .prefix("ripvec-walk-test-")
233 .tempdir_in(".")
234 .expect("tempdir in current directory");
235 let root = PathBuf::from(dir.path().file_name().expect("tempdir file name"));
236 write_file(&root, "src/main.rs");
237 write_file(&root, "notes/drop.md");
238
239 let files = collect_relative(
240 &root,
241 &WalkOptions {
242 ignore_patterns: vec!["*.md".to_string()],
243 ..WalkOptions::default()
244 },
245 );
246
247 assert_eq!(files, ["src/main.rs"]);
248 }
249}