1use std::collections::HashSet;
6use std::fmt;
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10use crate::{WalkerDirEntry, WalkerError, WalkerFs};
11use crate::glob_path::GlobPath;
12use crate::ignore::IgnoreFilter;
13use crate::filter::IncludeExclude;
14
15#[derive(Debug, Clone, Copy, Default)]
17pub struct EntryTypes {
18 pub files: bool,
20 pub dirs: bool,
22}
23
24impl EntryTypes {
25 pub fn files_only() -> Self {
27 Self {
28 files: true,
29 dirs: false,
30 }
31 }
32
33 pub fn dirs_only() -> Self {
35 Self {
36 files: false,
37 dirs: true,
38 }
39 }
40
41 pub fn all() -> Self {
43 Self {
44 files: true,
45 dirs: true,
46 }
47 }
48}
49
50pub type ErrorCallback = Arc<dyn Fn(&Path, &WalkerError) + Send + Sync>;
55
56pub struct WalkOptions {
58 pub max_depth: Option<usize>,
60 pub min_depth: Option<usize>,
64 pub max_filesize: Option<u64>,
68 pub entry_types: EntryTypes,
70 pub respect_gitignore: bool,
72 pub include_hidden: bool,
74 pub filter: IncludeExclude,
76 pub follow_symlinks: bool,
80 pub on_error: Option<ErrorCallback>,
83 pub types: Option<Arc<ignore::types::Types>>,
87}
88
89impl fmt::Debug for WalkOptions {
90 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91 f.debug_struct("WalkOptions")
92 .field("max_depth", &self.max_depth)
93 .field("min_depth", &self.min_depth)
94 .field("max_filesize", &self.max_filesize)
95 .field("entry_types", &self.entry_types)
96 .field("respect_gitignore", &self.respect_gitignore)
97 .field("include_hidden", &self.include_hidden)
98 .field("filter", &self.filter)
99 .field("follow_symlinks", &self.follow_symlinks)
100 .field("on_error", &self.on_error.as_ref().map(|_| "..."))
101 .field("types", &self.types.as_ref().map(|_| "..."))
102 .finish()
103 }
104}
105
106impl Clone for WalkOptions {
107 fn clone(&self) -> Self {
108 Self {
109 max_depth: self.max_depth,
110 min_depth: self.min_depth,
111 max_filesize: self.max_filesize,
112 entry_types: self.entry_types,
113 respect_gitignore: self.respect_gitignore,
114 include_hidden: self.include_hidden,
115 filter: self.filter.clone(),
116 follow_symlinks: self.follow_symlinks,
117 on_error: self.on_error.clone(),
118 types: self.types.clone(),
119 }
120 }
121}
122
123impl Default for WalkOptions {
124 fn default() -> Self {
125 Self {
126 max_depth: None,
127 min_depth: None,
128 max_filesize: None,
129 entry_types: EntryTypes::files_only(),
130 respect_gitignore: true,
131 include_hidden: false,
132 filter: IncludeExclude::new(),
133 follow_symlinks: false,
134 on_error: None,
135 types: None,
136 }
137 }
138}
139
140pub struct FileWalker<'a, F: WalkerFs> {
153 fs: &'a F,
154 root: PathBuf,
155 pattern: Option<GlobPath>,
156 options: WalkOptions,
157 ignore_filter: Option<IgnoreFilter>,
158}
159
160impl<'a, F: WalkerFs> FileWalker<'a, F> {
161 pub fn new(fs: &'a F, root: impl AsRef<Path>) -> Self {
163 Self {
164 fs,
165 root: root.as_ref().to_path_buf(),
166 pattern: None,
167 options: WalkOptions::default(),
168 ignore_filter: None,
169 }
170 }
171
172 pub fn with_pattern(mut self, pattern: GlobPath) -> Self {
174 self.pattern = Some(pattern);
175 self
176 }
177
178 pub fn with_options(mut self, options: WalkOptions) -> Self {
180 self.options = options;
181 self
182 }
183
184 pub fn with_ignore(mut self, filter: IgnoreFilter) -> Self {
186 self.ignore_filter = Some(filter);
187 self
188 }
189
190 pub async fn collect(mut self) -> Result<Vec<PathBuf>, crate::WalkerError> {
192 let base_filter = if self.options.respect_gitignore {
194 let mut filter = self
195 .ignore_filter
196 .take()
197 .unwrap_or_else(IgnoreFilter::with_defaults);
198
199 let gitignore_path = self.root.join(".gitignore");
201 if self.fs.exists(&gitignore_path).await {
202 match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
203 Ok(gitignore) => filter.merge(&gitignore),
204 Err(err) => {
205 if let Some(ref cb) = self.options.on_error {
206 cb(&gitignore_path, &err);
207 }
208 }
209 }
210 }
211 Some(filter)
212 } else {
213 self.ignore_filter.take()
214 };
215
216 let mut results = Vec::new();
217 let mut visited_dirs: HashSet<PathBuf> = HashSet::new();
219 if self.options.follow_symlinks {
220 visited_dirs.insert(self.root.clone());
221 }
222 let mut stack = vec![(self.root.clone(), 0usize, base_filter.clone())];
224
225 while let Some((dir, depth, current_filter)) = stack.pop() {
226 if let Some(max) = self.options.max_depth
228 && depth > max {
229 continue;
230 }
231
232 let entries = match self.fs.list_dir(&dir).await {
234 Ok(entries) => entries,
235 Err(err) => {
236 if let Some(ref cb) = self.options.on_error {
237 cb(&dir, &err);
238 }
239 continue;
240 }
241 };
242
243 let mut entries: Vec<_> = entries
245 .into_iter()
246 .map(|e| {
247 let name = e.name().to_string();
248 let is_dir = e.is_dir();
249 let is_symlink = e.is_symlink();
250 (name, is_dir, is_symlink)
251 })
252 .collect();
253 entries.sort_by(|a, b| a.0.cmp(&b.0));
254
255 let mut dirs_to_push = Vec::new();
258
259 for (entry_name, entry_is_dir, entry_is_symlink) in entries {
260 let full_path = dir.join(&entry_name);
261
262 if !self.options.include_hidden
269 && self.pattern.is_none()
270 && entry_name.starts_with('.')
271 {
272 continue;
273 }
274
275 if let Some(ref filter) = current_filter {
277 let relative = self.relative_path(&full_path);
278 if filter.is_ignored(&relative, entry_is_dir) {
279 continue;
280 }
281 }
282
283 if let Some(ref types) = self.options.types
287 && types.matched(&full_path, entry_is_dir).is_ignore() {
288 continue;
289 }
290
291 if !self.options.filter.is_empty() {
293 let relative = self.relative_path(&full_path);
294 if self.options.filter.should_exclude(&relative) {
295 continue;
296 }
297 if let Some(name) = full_path.file_name()
299 && self
300 .options
301 .filter
302 .should_exclude(Path::new(name))
303 {
304 continue;
305 }
306 }
307
308 if entry_is_dir {
309 if entry_is_symlink && !self.options.follow_symlinks {
311 if self.options.entry_types.files
313 && self.matches_pattern(&full_path)
314 && self.depth_yields(depth)
315 && self.size_within_limit(self.fs, &full_path).await
316 {
317 results.push(full_path);
318 }
319 continue;
320 }
321
322 if entry_is_symlink && self.options.follow_symlinks {
324 let canonical = self.fs.canonicalize(&full_path).await;
325 if !visited_dirs.insert(canonical) {
326 if let Some(ref cb) = self.options.on_error {
328 cb(
329 &full_path,
330 &WalkerError::SymlinkCycle(full_path.display().to_string()),
331 );
332 }
333 continue;
334 }
335 }
336
337 let child_filter = if self.options.respect_gitignore {
339 let gitignore_path = full_path.join(".gitignore");
340 if self.fs.exists(&gitignore_path).await {
341 match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
342 Ok(nested_gitignore) => {
343 current_filter
345 .as_ref()
346 .map(|f| f.merged_with(&nested_gitignore))
347 .or(Some(nested_gitignore))
348 }
349 Err(err) => {
350 if let Some(ref cb) = self.options.on_error {
351 cb(&gitignore_path, &err);
352 }
353 current_filter.clone()
354 }
355 }
356 } else {
357 current_filter.clone()
358 }
359 } else {
360 current_filter.clone()
361 };
362
363 let should_recurse = match &self.pattern {
368 None => true,
369 Some(pat) => {
370 let relative = self.relative_path(&full_path);
371 pat.could_descend(&relative, self.options.include_hidden)
372 }
373 };
374
375 if should_recurse {
376 dirs_to_push.push((full_path.clone(), depth + 1, child_filter));
377 }
378
379 if self.options.entry_types.dirs
381 && self.matches_pattern(&full_path)
382 && self.depth_yields(depth)
383 {
384 results.push(full_path);
385 }
386 } else {
387 if self.options.entry_types.files
389 && self.matches_pattern(&full_path)
390 && self.depth_yields(depth)
391 && self.size_within_limit(self.fs, &full_path).await
392 {
393 results.push(full_path);
394 }
395 }
396 }
397
398 dirs_to_push.reverse();
401 stack.extend(dirs_to_push);
402 }
403
404 Ok(results)
405 }
406
407 fn relative_path(&self, full_path: &Path) -> PathBuf {
408 full_path
409 .strip_prefix(&self.root)
410 .map(|p| p.to_path_buf())
411 .unwrap_or_else(|_| full_path.to_path_buf())
412 }
413
414 fn matches_pattern(&self, path: &Path) -> bool {
415 match &self.pattern {
416 Some(pattern) => {
417 let relative = self.relative_path(path);
418 pattern.matches_walk(&relative, self.options.include_hidden)
419 }
420 None => true,
421 }
422 }
423
424 fn depth_yields(&self, depth: usize) -> bool {
427 match self.options.min_depth {
428 None | Some(0) => true,
429 Some(min) => depth >= min,
430 }
431 }
432
433 async fn size_within_limit(&self, fs: &F, path: &Path) -> bool {
437 let Some(limit) = self.options.max_filesize else {
438 return true;
439 };
440 match fs.file_size(path).await {
441 Some(size) => size <= limit,
442 None => true,
443 }
444 }
445}
446
447#[cfg(test)]
448mod tests {
449 use super::*;
450 use crate::{WalkerDirEntry, WalkerError, WalkerFs};
451 use std::collections::HashMap;
452 use std::sync::Arc;
453 use tokio::sync::RwLock;
454
455 struct MemEntry {
457 name: String,
458 is_dir: bool,
459 is_symlink: bool,
460 }
461
462 impl WalkerDirEntry for MemEntry {
463 fn name(&self) -> &str { &self.name }
464 fn is_dir(&self) -> bool { self.is_dir }
465 fn is_file(&self) -> bool { !self.is_dir }
466 fn is_symlink(&self) -> bool { self.is_symlink }
467 }
468
469 struct MemoryFs {
473 files: Arc<RwLock<HashMap<PathBuf, Vec<u8>>>>,
474 dirs: Arc<RwLock<std::collections::HashSet<PathBuf>>>,
475 symlinks: Arc<RwLock<HashMap<PathBuf, PathBuf>>>,
477 }
478
479 impl MemoryFs {
480 fn new() -> Self {
481 let mut dirs = std::collections::HashSet::new();
482 dirs.insert(PathBuf::from("/"));
483 Self {
484 files: Arc::new(RwLock::new(HashMap::new())),
485 dirs: Arc::new(RwLock::new(dirs)),
486 symlinks: Arc::new(RwLock::new(HashMap::new())),
487 }
488 }
489
490 async fn add_file(&self, path: &str, content: &[u8]) {
491 let path = PathBuf::from(path);
492 if let Some(parent) = path.parent() {
494 self.ensure_dirs(parent).await;
495 }
496 self.files.write().await.insert(path, content.to_vec());
497 }
498
499 async fn add_dir(&self, path: &str) {
500 self.ensure_dirs(&PathBuf::from(path)).await;
501 }
502
503 async fn add_dir_symlink(&self, link: &str, target: &str) {
506 let link_path = PathBuf::from(link);
507 let target_path = PathBuf::from(target);
508 if let Some(parent) = link_path.parent() {
510 self.ensure_dirs(parent).await;
511 }
512 self.dirs.write().await.insert(link_path.clone());
514 self.symlinks.write().await.insert(link_path, target_path);
515 }
516
517 fn resolve_path(path: &Path, symlinks: &HashMap<PathBuf, PathBuf>) -> PathBuf {
520 let mut resolved = PathBuf::new();
521 for component in path.components() {
522 resolved.push(component);
523 if let Some(target) = symlinks.get(&resolved) {
525 resolved = target.clone();
526 }
527 }
528 resolved
529 }
530
531 async fn ensure_dirs(&self, path: &Path) {
532 let mut dirs = self.dirs.write().await;
533 let mut current = PathBuf::new();
534 for component in path.components() {
535 current.push(component);
536 dirs.insert(current.clone());
537 }
538 }
539 }
540
541 #[async_trait::async_trait]
542 impl WalkerFs for MemoryFs {
543 type DirEntry = MemEntry;
544
545 async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
546 let symlinks = self.symlinks.read().await;
547
548 let resolved = Self::resolve_path(path, &symlinks);
550
551 let files = self.files.read().await;
552 let dirs = self.dirs.read().await;
553
554 let mut entries = Vec::new();
555 let mut seen = std::collections::HashSet::new();
556
557 for file_path in files.keys() {
559 if let Some(parent) = file_path.parent() {
560 if parent == resolved {
561 if let Some(name) = file_path.file_name() {
562 let name_str = name.to_string_lossy().to_string();
563 if seen.insert(name_str.clone()) {
564 entries.push(MemEntry {
565 name: name_str,
566 is_dir: false,
567 is_symlink: false,
568 });
569 }
570 }
571 }
572 }
573 }
574
575 for dir_path in dirs.iter() {
577 if let Some(parent) = dir_path.parent() {
578 if parent == resolved && dir_path != &resolved {
579 if let Some(name) = dir_path.file_name() {
580 let name_str = name.to_string_lossy().to_string();
581 if seen.insert(name_str.clone()) {
582 let is_symlink = symlinks.contains_key(dir_path);
583 entries.push(MemEntry {
584 name: name_str,
585 is_dir: true,
586 is_symlink,
587 });
588 }
589 }
590 }
591 }
592 }
593
594 Ok(entries)
595 }
596
597 async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
598 let files = self.files.read().await;
599 files.get(path)
600 .cloned()
601 .ok_or_else(|| WalkerError::NotFound(path.display().to_string()))
602 }
603
604 async fn is_dir(&self, path: &Path) -> bool {
605 self.dirs.read().await.contains(path)
606 }
607
608 async fn exists(&self, path: &Path) -> bool {
609 self.files.read().await.contains_key(path)
610 || self.dirs.read().await.contains(path)
611 }
612
613 async fn canonicalize(&self, path: &Path) -> PathBuf {
614 let symlinks = self.symlinks.read().await;
615 Self::resolve_path(path, &symlinks)
616 }
617 }
618
619 async fn make_test_fs() -> MemoryFs {
620 let fs = MemoryFs::new();
621
622 fs.add_dir("/src").await;
623 fs.add_dir("/src/lib").await;
624 fs.add_dir("/test").await;
625 fs.add_dir("/.git").await;
626 fs.add_dir("/node_modules").await;
627
628 fs.add_file("/src/main.rs", b"fn main() {}").await;
629 fs.add_file("/src/lib.rs", b"pub mod lib;").await;
630 fs.add_file("/src/lib/utils.rs", b"pub fn util() {}").await;
631 fs.add_file("/test/main_test.rs", b"#[test]").await;
632 fs.add_file("/README.md", b"# Test").await;
633 fs.add_file("/.hidden", b"secret").await;
634 fs.add_file("/.git/config", b"[core]").await;
635 fs.add_file("/node_modules/pkg.json", b"{}").await;
636
637 fs
638 }
639
640 #[tokio::test]
641 async fn test_walk_all_files() {
642 let fs = make_test_fs().await;
643
644 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
645 respect_gitignore: false,
646 include_hidden: true,
647 ..Default::default()
648 });
649
650 let files = walker.collect().await.unwrap();
651
652 assert!(files.iter().any(|p| p.ends_with("main.rs")));
653 assert!(files.iter().any(|p| p.ends_with("lib.rs")));
654 assert!(files.iter().any(|p| p.ends_with("README.md")));
655 assert!(files.iter().any(|p| p.ends_with(".hidden")));
656 }
657
658 #[tokio::test]
659 async fn test_walk_with_pattern() {
660 let fs = make_test_fs().await;
661
662 let walker = FileWalker::new(&fs, "/")
663 .with_pattern(GlobPath::new("**/*.rs").unwrap())
664 .with_options(WalkOptions {
665 respect_gitignore: false,
666 ..Default::default()
667 });
668
669 let files = walker.collect().await.unwrap();
670
671 assert!(files.iter().any(|p| p.ends_with("main.rs")));
672 assert!(files.iter().any(|p| p.ends_with("lib.rs")));
673 assert!(files.iter().any(|p| p.ends_with("utils.rs")));
674 assert!(!files.iter().any(|p| p.ends_with("README.md")));
675 }
676
677 #[tokio::test]
678 async fn test_walk_respects_gitignore() {
679 let fs = make_test_fs().await;
680
681 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
682 respect_gitignore: true,
683 ..Default::default()
684 });
685
686 let files = walker.collect().await.unwrap();
687
688 assert!(!files
689 .iter()
690 .any(|p| p.to_string_lossy().contains(".git")));
691 assert!(!files
692 .iter()
693 .any(|p| p.to_string_lossy().contains("node_modules")));
694
695 assert!(files.iter().any(|p| p.ends_with("main.rs")));
696 }
697
698 #[tokio::test]
699 async fn test_walk_hides_dotfiles() {
700 let fs = make_test_fs().await;
701
702 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
703 include_hidden: false,
704 respect_gitignore: false,
705 ..Default::default()
706 });
707
708 let files = walker.collect().await.unwrap();
709
710 assert!(!files.iter().any(|p| p.ends_with(".hidden")));
711 assert!(files.iter().any(|p| p.ends_with("main.rs")));
712 }
713
714 #[tokio::test]
715 async fn test_dot_pattern_matches_dotfiles() {
716 let fs = MemoryFs::new();
718 fs.add_file("/.gitignore", b"x").await;
719 fs.add_file("/.env", b"x").await;
720 fs.add_file("/visible.txt", b"x").await;
721
722 let walker = FileWalker::new(&fs, "/")
723 .with_pattern(GlobPath::new(".*").unwrap())
724 .with_options(WalkOptions {
725 respect_gitignore: false,
726 ..Default::default()
727 });
728 let files = walker.collect().await.unwrap();
729
730 assert!(files.iter().any(|p| p.ends_with(".gitignore")));
731 assert!(files.iter().any(|p| p.ends_with(".env")));
732 assert!(!files.iter().any(|p| p.ends_with("visible.txt")));
733 }
734
735 #[tokio::test]
736 async fn test_star_skips_dotfiles() {
737 let fs = MemoryFs::new();
739 fs.add_file("/.env", b"x").await;
740 fs.add_file("/visible.txt", b"x").await;
741
742 let walker = FileWalker::new(&fs, "/")
743 .with_pattern(GlobPath::new("*").unwrap())
744 .with_options(WalkOptions {
745 respect_gitignore: false,
746 entry_types: EntryTypes::all(),
747 ..Default::default()
748 });
749 let files = walker.collect().await.unwrap();
750
751 assert!(!files.iter().any(|p| p.ends_with(".env")));
752 assert!(files.iter().any(|p| p.ends_with("visible.txt")));
753 }
754
755 #[tokio::test]
756 async fn test_literal_dotdir_is_traversed() {
757 let fs = MemoryFs::new();
759 fs.add_file("/.github/workflows/ci.yml", b"x").await;
760 fs.add_file("/.github/.secret", b"x").await;
761
762 let walker = FileWalker::new(&fs, "/")
763 .with_pattern(GlobPath::new(".github/**/*.yml").unwrap())
764 .with_options(WalkOptions {
765 respect_gitignore: false,
766 ..Default::default()
767 });
768 let files = walker.collect().await.unwrap();
769
770 assert!(files.iter().any(|p| p.ends_with("ci.yml")));
771 }
772
773 #[tokio::test]
774 async fn test_dotdir_star_excludes_nested_dotfiles() {
775 let fs = MemoryFs::new();
778 fs.add_file("/.github/config.yml", b"x").await;
779 fs.add_file("/.github/.secret", b"x").await;
780
781 let walker = FileWalker::new(&fs, "/")
782 .with_pattern(GlobPath::new(".github/*").unwrap())
783 .with_options(WalkOptions {
784 respect_gitignore: false,
785 entry_types: EntryTypes::all(),
786 ..Default::default()
787 });
788 let files = walker.collect().await.unwrap();
789
790 assert!(files.iter().any(|p| p.ends_with("config.yml")));
791 assert!(!files.iter().any(|p| p.ends_with(".secret")));
792 }
793
794 #[tokio::test]
795 async fn test_globstar_skips_dotdirs_without_dotglob() {
796 let fs = MemoryFs::new();
798 fs.add_file("/.github/buried.rs", b"x").await;
799 fs.add_file("/top.rs", b"x").await;
800
801 let walker = FileWalker::new(&fs, "/")
802 .with_pattern(GlobPath::new("**/*.rs").unwrap())
803 .with_options(WalkOptions {
804 respect_gitignore: false,
805 ..Default::default()
806 });
807 let files = walker.collect().await.unwrap();
808
809 assert!(files.iter().any(|p| p.ends_with("top.rs")));
810 assert!(!files.iter().any(|p| p.ends_with("buried.rs")));
811 }
812
813 #[tokio::test]
814 async fn test_globstar_then_explicit_dotfile() {
815 let fs = MemoryFs::new();
818 fs.add_file("/.env", b"x").await;
819 fs.add_file("/sub/.env", b"x").await;
820 fs.add_file("/.hidden/.env", b"x").await;
821 fs.add_file("/sub/visible.txt", b"x").await;
822
823 let walker = FileWalker::new(&fs, "/")
824 .with_pattern(GlobPath::new("**/.env").unwrap())
825 .with_options(WalkOptions {
826 respect_gitignore: false,
827 ..Default::default()
828 });
829 let files = walker.collect().await.unwrap();
830
831 assert_eq!(files.iter().filter(|p| p.ends_with(".env")).count(), 2, "{files:?}");
832 assert!(files.iter().any(|p| p == &PathBuf::from("/.env")));
833 assert!(files.iter().any(|p| p == &PathBuf::from("/sub/.env")));
834 assert!(!files.iter().any(|p| p.starts_with("/.hidden")));
835 }
836
837 #[tokio::test]
838 async fn test_globstar_then_explicit_dotdir() {
839 let fs = MemoryFs::new();
841 fs.add_file("/.github/ci.yml", b"x").await;
842 fs.add_file("/sub/.github/release.yml", b"x").await;
843
844 let walker = FileWalker::new(&fs, "/")
845 .with_pattern(GlobPath::new("**/.github/*.yml").unwrap())
846 .with_options(WalkOptions {
847 respect_gitignore: false,
848 ..Default::default()
849 });
850 let files = walker.collect().await.unwrap();
851
852 assert!(files.iter().any(|p| p.ends_with("ci.yml")), "{files:?}");
853 assert!(files.iter().any(|p| p.ends_with("release.yml")), "{files:?}");
854 }
855
856 #[tokio::test]
857 async fn test_include_hidden_acts_like_dotglob() {
858 let fs = MemoryFs::new();
860 fs.add_file("/.github/buried.rs", b"x").await;
861
862 let walker = FileWalker::new(&fs, "/")
863 .with_pattern(GlobPath::new("**/*.rs").unwrap())
864 .with_options(WalkOptions {
865 respect_gitignore: false,
866 include_hidden: true,
867 ..Default::default()
868 });
869 let files = walker.collect().await.unwrap();
870
871 assert!(files.iter().any(|p| p.ends_with("buried.rs")));
872 }
873
874 #[tokio::test]
875 async fn test_walk_max_depth() {
876 let fs = make_test_fs().await;
877
878 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
879 max_depth: Some(1),
880 respect_gitignore: false,
881 include_hidden: true,
882 ..Default::default()
883 });
884
885 let files = walker.collect().await.unwrap();
886
887 assert!(files.iter().any(|p| p.ends_with("README.md")));
889 assert!(files.iter().any(|p| p.ends_with("main.rs")));
891 assert!(!files.iter().any(|p| p.ends_with("utils.rs")));
893 }
894
895 #[tokio::test]
896 async fn test_walk_directories() {
897 let fs = make_test_fs().await;
898
899 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
900 entry_types: EntryTypes::dirs_only(),
901 respect_gitignore: false,
902 ..Default::default()
903 });
904
905 let dirs = walker.collect().await.unwrap();
906
907 assert!(dirs.iter().any(|p| p.ends_with("src")));
908 assert!(dirs.iter().any(|p| p.ends_with("lib")));
909 assert!(!dirs.iter().any(|p| p.ends_with("main.rs")));
910 }
911
912 #[tokio::test]
913 async fn test_walk_with_filter() {
914 let fs = make_test_fs().await;
915
916 let mut filter = IncludeExclude::new();
917 filter.exclude("*_test.rs");
918
919 let walker = FileWalker::new(&fs, "/")
920 .with_pattern(GlobPath::new("**/*.rs").unwrap())
921 .with_options(WalkOptions {
922 filter,
923 respect_gitignore: false,
924 ..Default::default()
925 });
926
927 let files = walker.collect().await.unwrap();
928
929 assert!(files.iter().any(|p| p.ends_with("main.rs")));
930 assert!(!files.iter().any(|p| p.ends_with("main_test.rs")));
931 }
932
933 #[tokio::test]
934 async fn test_walk_nested_gitignore() {
935 let fs = MemoryFs::new();
936
937 fs.add_dir("/src").await;
938 fs.add_dir("/src/subdir").await;
939 fs.add_file("/root.rs", b"root").await;
940 fs.add_file("/src/main.rs", b"main").await;
941 fs.add_file("/src/ignored.log", b"log").await;
942 fs.add_file("/src/subdir/util.rs", b"util").await;
943 fs.add_file("/src/subdir/local_ignore.txt", b"ignored").await;
944
945 fs.add_file("/.gitignore", b"*.log").await;
946 fs.add_file("/src/subdir/.gitignore", b"*.txt").await;
947
948 let walker = FileWalker::new(&fs, "/")
949 .with_options(WalkOptions {
950 respect_gitignore: true,
951 include_hidden: true,
952 ..Default::default()
953 });
954
955 let files = walker.collect().await.unwrap();
956
957 assert!(files.iter().any(|p| p.ends_with("root.rs")));
958 assert!(files.iter().any(|p| p.ends_with("main.rs")));
959 assert!(files.iter().any(|p| p.ends_with("util.rs")));
960
961 assert!(!files.iter().any(|p| p.ends_with("ignored.log")));
962 assert!(!files.iter().any(|p| p.ends_with("local_ignore.txt")));
963 }
964
965 struct SizedFs {
968 inner: MemoryFs,
969 sizes: HashMap<PathBuf, u64>,
970 }
971
972 #[async_trait::async_trait]
973 impl WalkerFs for SizedFs {
974 type DirEntry = MemEntry;
975 async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
976 self.inner.list_dir(path).await
977 }
978 async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
979 self.inner.read_file(path).await
980 }
981 async fn is_dir(&self, path: &Path) -> bool { self.inner.is_dir(path).await }
982 async fn exists(&self, path: &Path) -> bool { self.inner.exists(path).await }
983 async fn file_size(&self, path: &Path) -> Option<u64> {
984 self.sizes.get(path).copied()
985 }
986 }
987
988 #[tokio::test]
989 async fn test_walk_max_filesize_skips_large_files() {
990 let inner = MemoryFs::new();
991 inner.add_file("/small.txt", b"tiny").await;
992 inner.add_file("/big.bin", b"larger payload").await;
993 let mut sizes = HashMap::new();
994 sizes.insert(PathBuf::from("/small.txt"), 1_024); sizes.insert(PathBuf::from("/big.bin"), 2 * 1_048_576); let fs = SizedFs { inner, sizes };
997
998 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
999 respect_gitignore: false,
1000 max_filesize: Some(1_048_576), ..Default::default()
1002 });
1003
1004 let files = walker.collect().await.unwrap();
1005
1006 assert!(files.iter().any(|p| p.ends_with("small.txt")));
1007 assert!(!files.iter().any(|p| p.ends_with("big.bin")));
1008 }
1009
1010 #[tokio::test]
1011 async fn test_walk_max_filesize_unknown_size_yields() {
1012 let fs = MemoryFs::new();
1014 fs.add_file("/unknown.txt", b"x").await;
1015
1016 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1017 respect_gitignore: false,
1018 max_filesize: Some(0), ..Default::default()
1020 });
1021
1022 let files = walker.collect().await.unwrap();
1023 assert!(files.iter().any(|p| p.ends_with("unknown.txt")));
1024 }
1025
1026 #[tokio::test]
1027 async fn test_walk_min_depth_skips_root_files() {
1028 let fs = MemoryFs::new();
1029 fs.add_file("/at_root.txt", b"r").await;
1030 fs.add_dir("/sub").await;
1031 fs.add_file("/sub/nested.txt", b"n").await;
1032 fs.add_dir("/sub/deeper").await;
1033 fs.add_file("/sub/deeper/deep.txt", b"d").await;
1034
1035 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1036 respect_gitignore: false,
1037 min_depth: Some(1), ..Default::default()
1039 });
1040
1041 let files = walker.collect().await.unwrap();
1042
1043 assert!(!files.iter().any(|p| p.ends_with("at_root.txt")));
1045 assert!(files.iter().any(|p| p.ends_with("nested.txt")));
1047 assert!(files.iter().any(|p| p.ends_with("deep.txt")));
1049 }
1050
1051 #[tokio::test]
1052 async fn test_walk_types_select_only_rust() {
1053 let fs = MemoryFs::new();
1054 fs.add_file("/src/main.rs", b"r").await;
1055 fs.add_file("/src/main.py", b"p").await;
1056 fs.add_file("/src/main.js", b"j").await;
1057 fs.add_file("/README.md", b"m").await;
1058
1059 let mut tb = ignore::types::TypesBuilder::new();
1060 tb.add_defaults();
1061 tb.select("rust");
1062 let types = std::sync::Arc::new(tb.build().expect("types build"));
1063
1064 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1065 respect_gitignore: false,
1066 types: Some(types),
1067 ..Default::default()
1068 });
1069
1070 let files = walker.collect().await.unwrap();
1071
1072 assert!(files.iter().any(|p| p.ends_with("main.rs")));
1073 assert!(!files.iter().any(|p| p.ends_with("main.py")));
1074 assert!(!files.iter().any(|p| p.ends_with("main.js")));
1075 assert!(!files.iter().any(|p| p.ends_with("README.md")));
1076 }
1077
1078 #[tokio::test]
1079 async fn test_walk_types_negate_excludes() {
1080 let fs = MemoryFs::new();
1081 fs.add_file("/src/main.rs", b"r").await;
1082 fs.add_file("/src/main.py", b"p").await;
1083 fs.add_file("/README.md", b"m").await;
1084
1085 let mut tb = ignore::types::TypesBuilder::new();
1086 tb.add_defaults();
1087 tb.negate("rust");
1088 let types = std::sync::Arc::new(tb.build().expect("types build"));
1089
1090 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1091 respect_gitignore: false,
1092 types: Some(types),
1093 ..Default::default()
1094 });
1095
1096 let files = walker.collect().await.unwrap();
1097
1098 assert!(!files.iter().any(|p| p.ends_with("main.rs")));
1100 assert!(files.iter().any(|p| p.ends_with("main.py")));
1102 assert!(files.iter().any(|p| p.ends_with("README.md")));
1103 }
1104
1105 #[tokio::test]
1106 async fn test_walk_min_depth_still_descends() {
1107 let fs = MemoryFs::new();
1109 fs.add_dir("/level1").await;
1110 fs.add_dir("/level1/level2").await;
1111 fs.add_file("/level1/level2/found.txt", b"f").await;
1112
1113 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1114 respect_gitignore: false,
1115 min_depth: Some(2),
1116 ..Default::default()
1117 });
1118
1119 let files = walker.collect().await.unwrap();
1120 assert!(files.iter().any(|p| p.ends_with("found.txt")));
1121 }
1122
1123 #[tokio::test]
1124 async fn test_walk_error_callback() {
1125 use std::sync::Mutex;
1126
1127 struct ErrorFs {
1129 inner: MemoryFs,
1130 error_paths: Vec<PathBuf>,
1131 }
1132
1133 #[async_trait::async_trait]
1134 impl WalkerFs for ErrorFs {
1135 type DirEntry = MemEntry;
1136
1137 async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
1138 if self.error_paths.iter().any(|p| p == path) {
1139 return Err(WalkerError::PermissionDenied(path.display().to_string()));
1140 }
1141 self.inner.list_dir(path).await
1142 }
1143
1144 async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
1145 self.inner.read_file(path).await
1146 }
1147
1148 async fn is_dir(&self, path: &Path) -> bool {
1149 self.inner.is_dir(path).await
1150 }
1151
1152 async fn exists(&self, path: &Path) -> bool {
1153 self.inner.exists(path).await
1154 }
1155 }
1156
1157 let inner = MemoryFs::new();
1158 inner.add_dir("/readable").await;
1159 inner.add_dir("/forbidden").await;
1160 inner.add_file("/readable/ok.txt", b"ok").await;
1161 inner.add_file("/forbidden/secret.txt", b"secret").await;
1162
1163 let fs = ErrorFs {
1164 inner,
1165 error_paths: vec![PathBuf::from("/forbidden")],
1166 };
1167
1168 let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1169 let errors_cb = errors.clone();
1170
1171 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1172 respect_gitignore: false,
1173 include_hidden: true,
1174 on_error: Some(Arc::new(move |path, err| {
1175 errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1176 })),
1177 ..Default::default()
1178 });
1179
1180 let files = walker.collect().await.unwrap();
1181
1182 assert!(files.iter().any(|p| p.ends_with("ok.txt")));
1183 assert!(!files.iter().any(|p| p.ends_with("secret.txt")));
1184
1185 let errors = errors.lock().unwrap();
1186 assert_eq!(errors.len(), 1);
1187 assert_eq!(errors[0].0, PathBuf::from("/forbidden"));
1188 assert!(errors[0].1.contains("permission denied"));
1189 }
1190
1191 #[tokio::test]
1192 async fn test_walk_deterministic_order() {
1193 let fs = MemoryFs::new();
1194
1195 fs.add_dir("/charlie").await;
1197 fs.add_dir("/alpha").await;
1198 fs.add_dir("/bravo").await;
1199 fs.add_file("/charlie/c.txt", b"c").await;
1200 fs.add_file("/alpha/a.txt", b"a").await;
1201 fs.add_file("/bravo/b.txt", b"b").await;
1202
1203 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1204 respect_gitignore: false,
1205 ..Default::default()
1206 });
1207
1208 let files = walker.collect().await.unwrap();
1209
1210 assert_eq!(files.len(), 3);
1213 assert!(files[0].ends_with("alpha/a.txt"));
1214 assert!(files[1].ends_with("bravo/b.txt"));
1215 assert!(files[2].ends_with("charlie/c.txt"));
1216
1217 let walker2 = FileWalker::new(&fs, "/").with_options(WalkOptions {
1219 respect_gitignore: false,
1220 ..Default::default()
1221 });
1222 let files2 = walker2.collect().await.unwrap();
1223 assert_eq!(files, files2);
1224 }
1225
1226 #[tokio::test]
1227 async fn test_symlinks_not_followed_by_default() {
1228 let fs = MemoryFs::new();
1229
1230 fs.add_dir("/real").await;
1231 fs.add_file("/real/data.txt", b"data").await;
1232 fs.add_dir_symlink("/link", "/real").await;
1234
1235 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1236 respect_gitignore: false,
1237 ..Default::default()
1239 });
1240
1241 let files = walker.collect().await.unwrap();
1242
1243 assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1245 assert!(files.iter().any(|p| p.ends_with("link")));
1247 assert!(!files.iter().any(|p| p.to_string_lossy().contains("link/data")));
1249 }
1250
1251 #[tokio::test]
1252 async fn test_symlinks_followed() {
1253 let fs = MemoryFs::new();
1254
1255 fs.add_dir("/real").await;
1256 fs.add_file("/real/data.txt", b"data").await;
1257 fs.add_dir_symlink("/link", "/real").await;
1259
1260 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1261 respect_gitignore: false,
1262 follow_symlinks: true,
1263 ..Default::default()
1264 });
1265
1266 let files = walker.collect().await.unwrap();
1267
1268 assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1270 assert!(files.iter().any(|p| p.ends_with("link/data.txt")));
1271 }
1272
1273 #[tokio::test]
1274 async fn test_symlink_cycle_detection() {
1275 use std::sync::Mutex;
1276
1277 let fs = MemoryFs::new();
1278
1279 fs.add_dir("/a").await;
1281 fs.add_dir("/b").await;
1282 fs.add_file("/a/file_a.txt", b"a").await;
1283 fs.add_file("/b/file_b.txt", b"b").await;
1284 fs.add_dir_symlink("/a/link_to_b", "/b").await;
1286 fs.add_dir_symlink("/b/link_to_a", "/a").await;
1287
1288 let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1289 let errors_cb = errors.clone();
1290
1291 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1292 respect_gitignore: false,
1293 follow_symlinks: true,
1294 on_error: Some(Arc::new(move |path, err| {
1295 errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1296 })),
1297 ..Default::default()
1298 });
1299
1300 let files = walker.collect().await.unwrap();
1301
1302 assert!(files.iter().any(|p| p.ends_with("file_a.txt")));
1304 assert!(files.iter().any(|p| p.ends_with("file_b.txt")));
1305
1306 let errors = errors.lock().unwrap();
1308 assert!(
1309 errors.iter().any(|(_, msg)| msg.contains("symlink cycle")),
1310 "expected symlink cycle error, got: {errors:?}"
1311 );
1312
1313 }
1315}