1use ignore::{WalkBuilder, gitignore::Gitignore};
8use std::collections::HashSet;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, Mutex};
11
12#[derive(Debug, Clone, Default, PartialEq, Eq)]
14pub struct WalkOptions {
15 pub file_type: Option<String>,
17 pub include_extensions: Vec<String>,
22 pub exclude_extensions: Vec<String>,
24 pub ignore_patterns: Vec<String>,
26}
27
28impl WalkOptions {
29 #[must_use]
30 pub fn from_file_type(file_type: Option<&str>) -> Self {
31 Self {
32 file_type: file_type.map(str::to_string),
33 ..Self::default()
34 }
35 }
36}
37
38#[must_use]
50pub fn collect_files(root: &Path, file_type: Option<&str>) -> Vec<PathBuf> {
51 collect_files_with_options(root, &WalkOptions::from_file_type(file_type))
52}
53
54#[must_use]
57pub fn collect_files_with_options(root: &Path, options: &WalkOptions) -> Vec<PathBuf> {
58 let files = Arc::new(Mutex::new(Vec::new()));
59 let excluded_extensions = Arc::new(normalized_extensions(&options.exclude_extensions));
60 let included_extensions = Arc::new(normalized_extensions(&options.include_extensions));
61 let ignore_matcher = build_ignore_matcher(root, &options.ignore_patterns).map(Arc::new);
62
63 let mut builder = WalkBuilder::new(root);
64 builder.hidden(true).git_ignore(true).git_global(true);
65
66 if let Some(ft) = options.file_type.as_deref() {
67 let mut types_builder = ignore::types::TypesBuilder::new();
68 types_builder.add_defaults();
69 types_builder.select(ft);
70 if let Ok(types) = types_builder.build() {
71 builder.types(types);
72 }
73 }
74
75 builder.build_parallel().run(|| {
76 let files = Arc::clone(&files);
77 let excluded_extensions = Arc::clone(&excluded_extensions);
78 let included_extensions = Arc::clone(&included_extensions);
79 let ignore_matcher = ignore_matcher.clone();
80 Box::new(move |entry| {
81 let Ok(entry) = entry else {
82 return ignore::WalkState::Continue;
83 };
84 let Some(file_type) = entry.file_type() else {
85 return ignore::WalkState::Continue;
86 };
87 let is_dir = file_type.is_dir();
88 if ignore_matcher
89 .as_ref()
90 .is_some_and(|matcher| is_ignored(matcher, entry.path(), is_dir))
91 {
92 return if is_dir {
93 ignore::WalkState::Skip
94 } else {
95 ignore::WalkState::Continue
96 };
97 }
98 if !file_type.is_file() {
99 return ignore::WalkState::Continue;
100 }
101 if has_excluded_extension(entry.path(), &excluded_extensions) {
102 return ignore::WalkState::Continue;
103 }
104 if !included_extensions.is_empty()
105 && !has_included_extension(entry.path(), &included_extensions)
106 {
107 return ignore::WalkState::Continue;
108 }
109 if let Some(name) = entry.path().file_name().and_then(|n| n.to_str())
111 && matches!(
112 name,
113 "Cargo.lock"
114 | "package-lock.json"
115 | "yarn.lock"
116 | "pnpm-lock.yaml"
117 | "poetry.lock"
118 | "Gemfile.lock"
119 | "go.sum"
120 )
121 {
122 return ignore::WalkState::Continue;
123 }
124 if let Ok(mut files) = files.lock() {
125 files.push(entry.into_path());
126 }
127 ignore::WalkState::Continue
128 })
129 });
130
131 let mut files = Arc::try_unwrap(files)
132 .ok()
133 .and_then(|files| files.into_inner().ok())
134 .unwrap_or_default();
135 files.sort();
136 files
137}
138
139fn normalized_extensions(extensions: &[String]) -> HashSet<String> {
140 extensions
141 .iter()
142 .filter_map(|ext| {
143 let normalized = ext.trim().trim_start_matches('.').to_ascii_lowercase();
144 (!normalized.is_empty()).then_some(normalized)
145 })
146 .collect()
147}
148
149fn has_excluded_extension(path: &Path, excluded_extensions: &HashSet<String>) -> bool {
150 path.extension()
151 .and_then(|ext| ext.to_str())
152 .map(|ext| excluded_extensions.contains(&ext.to_ascii_lowercase()))
153 .unwrap_or(false)
154}
155
156fn has_included_extension(path: &Path, included_extensions: &HashSet<String>) -> bool {
157 path.extension()
158 .and_then(|ext| ext.to_str())
159 .map(|ext| included_extensions.contains(&ext.to_ascii_lowercase()))
160 .unwrap_or(false)
161}
162
163fn build_ignore_matcher(root: &Path, patterns: &[String]) -> Option<Gitignore> {
164 if patterns.is_empty() {
165 return None;
166 }
167 let mut builder = ignore::gitignore::GitignoreBuilder::new(root);
168 for pattern in patterns {
169 if let Err(error) = builder.add_line(None, pattern) {
170 tracing::warn!(pattern, %error, "invalid ripvec ignore pattern; skipping");
171 }
172 }
173 builder.build().ok().filter(|matcher| !matcher.is_empty())
174}
175
176fn is_ignored(matcher: &Gitignore, path: &Path, is_dir: bool) -> bool {
177 matcher
178 .matched_path_or_any_parents(path, is_dir)
179 .is_ignore()
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185 use tempfile::TempDir;
186
187 fn write_file(root: &Path, relative: &str) {
188 let path = root.join(relative);
189 if let Some(parent) = path.parent() {
190 std::fs::create_dir_all(parent).expect("create parent");
191 }
192 std::fs::write(path, "test").expect("write file");
193 }
194
195 fn collect_relative(root: &Path, options: &WalkOptions) -> Vec<String> {
196 collect_files_with_options(root, options)
197 .into_iter()
198 .map(|path| {
199 path.strip_prefix(root)
200 .expect("under root")
201 .to_string_lossy()
202 .replace('\\', "/")
203 })
204 .collect()
205 }
206
207 #[test]
208 fn excludes_extensions_case_insensitively() {
209 let dir = TempDir::new().expect("tempdir");
210 write_file(dir.path(), "src/main.rs");
211 write_file(dir.path(), "logs/events.JSONL");
212 write_file(dir.path(), "README.md");
213
214 let files = collect_relative(
215 dir.path(),
216 &WalkOptions {
217 exclude_extensions: vec!["jsonl".to_string(), ".md".to_string()],
218 ..WalkOptions::default()
219 },
220 );
221
222 assert_eq!(files, ["src/main.rs"]);
223 }
224
225 #[test]
226 fn excludes_gitignore_style_patterns() {
227 let dir = TempDir::new().expect("tempdir");
228 write_file(dir.path(), "src/main.rs");
229 write_file(dir.path(), "generated/schema.rs");
230 write_file(dir.path(), "notes/keep.md");
231 write_file(dir.path(), "notes/drop.md");
232
233 let files = collect_relative(
234 dir.path(),
235 &WalkOptions {
236 ignore_patterns: vec![
237 "generated/".to_string(),
238 "*.md".to_string(),
239 "!notes/keep.md".to_string(),
240 ],
241 ..WalkOptions::default()
242 },
243 );
244
245 assert_eq!(files, ["notes/keep.md", "src/main.rs"]);
246 }
247
248 #[test]
249 fn relative_roots_with_ignore_patterns_do_not_panic() {
250 let dir = tempfile::Builder::new()
251 .prefix("ripvec-walk-test-")
252 .tempdir_in(".")
253 .expect("tempdir in current directory");
254 let root = PathBuf::from(dir.path().file_name().expect("tempdir file name"));
255 write_file(&root, "src/main.rs");
256 write_file(&root, "notes/drop.md");
257
258 let files = collect_relative(
259 &root,
260 &WalkOptions {
261 ignore_patterns: vec!["*.md".to_string()],
262 ..WalkOptions::default()
263 },
264 );
265
266 assert_eq!(files, ["src/main.rs"]);
267 }
268}