1use ignore::{WalkBuilder, gitignore::Gitignore};
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, Mutex};
11
12const SHELL_EXTENSIONS: &[&str] = &["sh", "bash", "bats"];
18
19#[derive(Debug, Clone, Default, PartialEq, Eq)]
21pub struct WalkOptions {
22 pub file_type: Option<String>,
24 pub include_extensions: Vec<String>,
29 pub exclude_extensions: Vec<String>,
31 pub ignore_patterns: Vec<String>,
33 pub include_shell_scripts: bool,
36}
37
38impl WalkOptions {
39 #[must_use]
40 pub fn from_file_type(file_type: Option<&str>) -> Self {
41 Self {
42 file_type: file_type.map(str::to_string),
43 ..Self::default()
44 }
45 }
46}
47
48#[must_use]
60pub fn collect_files(root: &Path, file_type: Option<&str>) -> Vec<PathBuf> {
61 collect_files_with_options(root, &WalkOptions::from_file_type(file_type))
62}
63
64#[must_use]
67pub fn collect_files_with_options(root: &Path, options: &WalkOptions) -> Vec<PathBuf> {
68 let files = Arc::new(Mutex::new(Vec::new()));
69 let excluded_extensions = Arc::new(normalized_extensions(&options.exclude_extensions));
70 let included_extensions = Arc::new(normalized_extensions(&options.include_extensions));
71 let ignore_matcher = build_ignore_matcher(root, &options.ignore_patterns).map(Arc::new);
72 let include_shell_scripts = options.include_shell_scripts;
73
74 let mut builder = WalkBuilder::new(root);
75 builder.hidden(true).git_ignore(true).git_global(true);
76
77 if let Some(ft) = options.file_type.as_deref() {
78 let mut types_builder = ignore::types::TypesBuilder::new();
79 types_builder.add_defaults();
80 types_builder.select(ft);
81 if let Ok(types) = types_builder.build() {
82 builder.types(types);
83 }
84 }
85
86 builder.build_parallel().run(|| {
87 let files = Arc::clone(&files);
88 let excluded_extensions = Arc::clone(&excluded_extensions);
89 let included_extensions = Arc::clone(&included_extensions);
90 let ignore_matcher = ignore_matcher.clone();
91 Box::new(move |entry| {
92 let Ok(entry) = entry else {
93 return ignore::WalkState::Continue;
94 };
95 let Some(file_type) = entry.file_type() else {
96 return ignore::WalkState::Continue;
97 };
98 let is_dir = file_type.is_dir();
99 if ignore_matcher
100 .as_ref()
101 .is_some_and(|matcher| is_ignored(matcher, entry.path(), is_dir))
102 {
103 return if is_dir {
104 ignore::WalkState::Skip
105 } else {
106 ignore::WalkState::Continue
107 };
108 }
109 if !file_type.is_file() {
110 return ignore::WalkState::Continue;
111 }
112 if has_excluded_extension(entry.path(), &excluded_extensions) {
113 return ignore::WalkState::Continue;
114 }
115 if !included_extensions.is_empty()
116 && !has_included_extension(entry.path(), &included_extensions)
117 {
118 return ignore::WalkState::Continue;
119 }
120 if let Some(name) = entry.path().file_name().and_then(|n| n.to_str())
122 && matches!(
123 name,
124 "Cargo.lock"
125 | "package-lock.json"
126 | "yarn.lock"
127 | "pnpm-lock.yaml"
128 | "poetry.lock"
129 | "Gemfile.lock"
130 | "go.sum"
131 )
132 {
133 return ignore::WalkState::Continue;
134 }
135 if !include_shell_scripts && is_shell_script(entry.path()) {
136 return ignore::WalkState::Continue;
137 }
138 if let Ok(mut files) = files.lock() {
139 files.push(entry.into_path());
140 }
141 ignore::WalkState::Continue
142 })
143 });
144
145 let mut files = Arc::try_unwrap(files)
146 .ok()
147 .and_then(|files| files.into_inner().ok())
148 .unwrap_or_default();
149 files.sort();
150 files
151}
152
153fn is_shell_script(path: &Path) -> bool {
155 path.extension()
156 .and_then(|ext| ext.to_str())
157 .map(|ext| SHELL_EXTENSIONS.contains(&ext.to_ascii_lowercase().as_str()))
158 .unwrap_or(false)
159}
160
161fn normalized_extensions(extensions: &[String]) -> HashSet<String> {
162 extensions
163 .iter()
164 .filter_map(|ext| {
165 let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
166 (!normalized.is_empty()).then_some(normalized)
167 })
168 .collect()
169}
170
171fn has_excluded_extension(path: &Path, excluded_extensions: &HashSet<String>) -> bool {
172 path.extension()
173 .and_then(|ext| ext.to_str())
174 .map(|ext| excluded_extensions.contains(&ext.to_ascii_lowercase()))
175 .unwrap_or(false)
176}
177
178fn has_included_extension(path: &Path, included_extensions: &HashSet<String>) -> bool {
179 path.extension()
180 .and_then(|ext| ext.to_str())
181 .map(|ext| included_extensions.contains(&ext.to_ascii_lowercase()))
182 .unwrap_or(false)
183}
184
185fn build_ignore_matcher(root: &Path, patterns: &[String]) -> Option<Gitignore> {
186 if patterns.is_empty() {
187 return None;
188 }
189 let mut builder = ignore::gitignore::GitignoreBuilder::new(root);
190 for pattern in patterns {
191 if let Err(error) = builder.add_line(None, pattern) {
192 tracing::warn!(pattern, %error, "invalid ripvec ignore pattern; skipping");
193 }
194 }
195 builder.build().ok().filter(|matcher| !matcher.is_empty())
196}
197
198fn is_ignored(matcher: &Gitignore, path: &Path, is_dir: bool) -> bool {
199 matcher
200 .matched_path_or_any_parents(path, is_dir)
201 .is_ignore()
202}
203
204#[cfg(test)]
205mod tests {
206 use super::*;
207 use tempfile::TempDir;
208
209 fn write_file(root: &Path, relative: &str) {
210 let path = root.join(relative);
211 if let Some(parent) = path.parent() {
212 std::fs::create_dir_all(parent).expect("create parent");
213 }
214 std::fs::write(path, "test").expect("write file");
215 }
216
217 fn collect_relative(root: &Path, options: &WalkOptions) -> Vec<String> {
218 collect_files_with_options(root, options)
219 .into_iter()
220 .map(|path| {
221 path.strip_prefix(root)
222 .expect("under root")
223 .to_string_lossy()
224 .replace('\\', "/")
225 })
226 .collect()
227 }
228
229 #[test]
230 fn excludes_extensions_case_insensitively() {
231 let dir = TempDir::new().expect("tempdir");
232 write_file(dir.path(), "src/main.rs");
233 write_file(dir.path(), "logs/events.JSONL");
234 write_file(dir.path(), "README.md");
235
236 let files = collect_relative(
237 dir.path(),
238 &WalkOptions {
239 exclude_extensions: vec!["jsonl".to_string(), ".md".to_string()],
240 ..WalkOptions::default()
241 },
242 );
243
244 assert_eq!(files, ["src/main.rs"]);
245 }
246
247 #[test]
248 fn excludes_gitignore_style_patterns() {
249 let dir = TempDir::new().expect("tempdir");
250 write_file(dir.path(), "src/main.rs");
251 write_file(dir.path(), "generated/schema.rs");
252 write_file(dir.path(), "notes/keep.md");
253 write_file(dir.path(), "notes/drop.md");
254
255 let files = collect_relative(
256 dir.path(),
257 &WalkOptions {
258 ignore_patterns: vec![
259 "generated/".to_string(),
260 "*.md".to_string(),
261 "!notes/keep.md".to_string(),
262 ],
263 ..WalkOptions::default()
264 },
265 );
266
267 assert_eq!(files, ["notes/keep.md", "src/main.rs"]);
268 }
269
270 #[test]
271 fn walker_excludes_shell_scripts_by_default() {
272 let dir = TempDir::new().expect("tempdir");
273 write_file(dir.path(), "src/main.rs");
274 write_file(dir.path(), "scripts/setup.sh");
275 write_file(dir.path(), "scripts/ci.bash");
276 write_file(dir.path(), "tests/suite.bats");
277 write_file(dir.path(), "README.md");
278
279 let files = collect_relative(dir.path(), &WalkOptions::default());
280
281 assert!(
282 files.contains(&"src/main.rs".to_string()),
283 "Rust file should be included"
284 );
285 assert!(
286 files.contains(&"README.md".to_string()),
287 "Markdown file should be included"
288 );
289 assert!(
290 !files.contains(&"scripts/setup.sh".to_string()),
291 ".sh should be excluded by default"
292 );
293 assert!(
294 !files.contains(&"scripts/ci.bash".to_string()),
295 ".bash should be excluded by default"
296 );
297 assert!(
298 !files.contains(&"tests/suite.bats".to_string()),
299 ".bats should be excluded by default"
300 );
301 }
302
303 #[test]
304 fn walker_includes_shell_scripts_when_metadata_enabled() {
305 let dir = TempDir::new().expect("tempdir");
306 write_file(dir.path(), "src/main.rs");
307 write_file(dir.path(), "scripts/setup.sh");
308 write_file(dir.path(), "scripts/ci.bash");
309 write_file(dir.path(), "tests/suite.bats");
310
311 let opts = WalkOptions {
312 include_shell_scripts: true,
313 ..WalkOptions::default()
314 };
315 let files = collect_relative(dir.path(), &opts);
316
317 assert!(
318 files.contains(&"scripts/setup.sh".to_string()),
319 ".sh included when include_shell_scripts=true"
320 );
321 assert!(
322 files.contains(&"scripts/ci.bash".to_string()),
323 ".bash included when include_shell_scripts=true"
324 );
325 assert!(
326 files.contains(&"tests/suite.bats".to_string()),
327 ".bats included when include_shell_scripts=true"
328 );
329 }
330
331 #[test]
332 fn relative_roots_with_ignore_patterns_do_not_panic() {
333 let dir = tempfile::Builder::new()
334 .prefix("ripvec-walk-test-")
335 .tempdir_in(".")
336 .expect("tempdir in current directory");
337 let root = PathBuf::from(dir.path().file_name().expect("tempdir file name"));
338 write_file(&root, "src/main.rs");
339 write_file(&root, "notes/drop.md");
340
341 let files = collect_relative(
342 &root,
343 &WalkOptions {
344 ignore_patterns: vec!["*.md".to_string()],
345 ..WalkOptions::default()
346 },
347 );
348
349 assert_eq!(files, ["src/main.rs"]);
350 }
351}