1use std::collections::HashSet;
6use std::fmt;
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10use crate::{WalkerDirEntry, WalkerError, WalkerFs};
11use crate::glob_path::GlobPath;
12use crate::ignore::IgnoreFilter;
13use crate::filter::IncludeExclude;
14
15#[derive(Debug, Clone, Copy, Default)]
17pub struct EntryTypes {
18 pub files: bool,
20 pub dirs: bool,
22}
23
24impl EntryTypes {
25 pub fn files_only() -> Self {
27 Self {
28 files: true,
29 dirs: false,
30 }
31 }
32
33 pub fn dirs_only() -> Self {
35 Self {
36 files: false,
37 dirs: true,
38 }
39 }
40
41 pub fn all() -> Self {
43 Self {
44 files: true,
45 dirs: true,
46 }
47 }
48}
49
50pub type ErrorCallback = Arc<dyn Fn(&Path, &WalkerError) + Send + Sync>;
55
56pub struct WalkOptions {
58 pub max_depth: Option<usize>,
60 pub min_depth: Option<usize>,
64 pub max_filesize: Option<u64>,
68 pub entry_types: EntryTypes,
70 pub respect_gitignore: bool,
72 pub include_hidden: bool,
74 pub filter: IncludeExclude,
76 pub follow_symlinks: bool,
80 pub on_error: Option<ErrorCallback>,
83 pub types: Option<Arc<ignore::types::Types>>,
87}
88
89impl fmt::Debug for WalkOptions {
90 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91 f.debug_struct("WalkOptions")
92 .field("max_depth", &self.max_depth)
93 .field("min_depth", &self.min_depth)
94 .field("max_filesize", &self.max_filesize)
95 .field("entry_types", &self.entry_types)
96 .field("respect_gitignore", &self.respect_gitignore)
97 .field("include_hidden", &self.include_hidden)
98 .field("filter", &self.filter)
99 .field("follow_symlinks", &self.follow_symlinks)
100 .field("on_error", &self.on_error.as_ref().map(|_| "..."))
101 .field("types", &self.types.as_ref().map(|_| "..."))
102 .finish()
103 }
104}
105
106impl Clone for WalkOptions {
107 fn clone(&self) -> Self {
108 Self {
109 max_depth: self.max_depth,
110 min_depth: self.min_depth,
111 max_filesize: self.max_filesize,
112 entry_types: self.entry_types,
113 respect_gitignore: self.respect_gitignore,
114 include_hidden: self.include_hidden,
115 filter: self.filter.clone(),
116 follow_symlinks: self.follow_symlinks,
117 on_error: self.on_error.clone(),
118 types: self.types.clone(),
119 }
120 }
121}
122
123impl Default for WalkOptions {
124 fn default() -> Self {
125 Self {
126 max_depth: None,
127 min_depth: None,
128 max_filesize: None,
129 entry_types: EntryTypes::files_only(),
130 respect_gitignore: true,
131 include_hidden: false,
132 filter: IncludeExclude::new(),
133 follow_symlinks: false,
134 on_error: None,
135 types: None,
136 }
137 }
138}
139
140pub struct FileWalker<'a, F: WalkerFs> {
153 fs: &'a F,
154 root: PathBuf,
155 pattern: Option<GlobPath>,
156 options: WalkOptions,
157 ignore_filter: Option<IgnoreFilter>,
158}
159
160impl<'a, F: WalkerFs> FileWalker<'a, F> {
161 pub fn new(fs: &'a F, root: impl AsRef<Path>) -> Self {
163 Self {
164 fs,
165 root: root.as_ref().to_path_buf(),
166 pattern: None,
167 options: WalkOptions::default(),
168 ignore_filter: None,
169 }
170 }
171
172 pub fn with_pattern(mut self, pattern: GlobPath) -> Self {
174 self.pattern = Some(pattern);
175 self
176 }
177
178 pub fn with_options(mut self, options: WalkOptions) -> Self {
180 self.options = options;
181 self
182 }
183
184 pub fn with_ignore(mut self, filter: IgnoreFilter) -> Self {
186 self.ignore_filter = Some(filter);
187 self
188 }
189
190 pub async fn collect(mut self) -> Result<Vec<PathBuf>, crate::WalkerError> {
192 let base_filter = if self.options.respect_gitignore {
194 let mut filter = self
195 .ignore_filter
196 .take()
197 .unwrap_or_else(IgnoreFilter::with_defaults);
198
199 let gitignore_path = self.root.join(".gitignore");
201 if self.fs.exists(&gitignore_path).await {
202 match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
203 Ok(gitignore) => filter.merge(&gitignore),
204 Err(err) => {
205 if let Some(ref cb) = self.options.on_error {
206 cb(&gitignore_path, &err);
207 }
208 }
209 }
210 }
211 Some(filter)
212 } else {
213 self.ignore_filter.take()
214 };
215
216 let mut results = Vec::new();
217 let mut visited_dirs: HashSet<PathBuf> = HashSet::new();
219 if self.options.follow_symlinks {
220 visited_dirs.insert(self.root.clone());
221 }
222 let mut stack = vec![(self.root.clone(), 0usize, base_filter.clone())];
224
225 while let Some((dir, depth, current_filter)) = stack.pop() {
226 if let Some(max) = self.options.max_depth
228 && depth > max {
229 continue;
230 }
231
232 let entries = match self.fs.list_dir(&dir).await {
234 Ok(entries) => entries,
235 Err(err) => {
236 if let Some(ref cb) = self.options.on_error {
237 cb(&dir, &err);
238 }
239 continue;
240 }
241 };
242
243 let mut entries: Vec<_> = entries
245 .into_iter()
246 .map(|e| {
247 let name = e.name().to_string();
248 let is_dir = e.is_dir();
249 let is_symlink = e.is_symlink();
250 (name, is_dir, is_symlink)
251 })
252 .collect();
253 entries.sort_by(|a, b| a.0.cmp(&b.0));
254
255 let mut dirs_to_push = Vec::new();
258
259 for (entry_name, entry_is_dir, entry_is_symlink) in entries {
260 let full_path = dir.join(&entry_name);
261
262 if !self.options.include_hidden && entry_name.starts_with('.') {
264 continue;
265 }
266
267 if let Some(ref filter) = current_filter {
269 let relative = self.relative_path(&full_path);
270 if filter.is_ignored(&relative, entry_is_dir) {
271 continue;
272 }
273 }
274
275 if let Some(ref types) = self.options.types
279 && types.matched(&full_path, entry_is_dir).is_ignore() {
280 continue;
281 }
282
283 if !self.options.filter.is_empty() {
285 let relative = self.relative_path(&full_path);
286 if self.options.filter.should_exclude(&relative) {
287 continue;
288 }
289 if let Some(name) = full_path.file_name()
291 && self
292 .options
293 .filter
294 .should_exclude(Path::new(name))
295 {
296 continue;
297 }
298 }
299
300 if entry_is_dir {
301 if entry_is_symlink && !self.options.follow_symlinks {
303 if self.options.entry_types.files
305 && self.matches_pattern(&full_path)
306 && self.depth_yields(depth)
307 && self.size_within_limit(self.fs, &full_path).await
308 {
309 results.push(full_path);
310 }
311 continue;
312 }
313
314 if entry_is_symlink && self.options.follow_symlinks {
316 let canonical = self.fs.canonicalize(&full_path).await;
317 if !visited_dirs.insert(canonical) {
318 if let Some(ref cb) = self.options.on_error {
320 cb(
321 &full_path,
322 &WalkerError::SymlinkCycle(full_path.display().to_string()),
323 );
324 }
325 continue;
326 }
327 }
328
329 let child_filter = if self.options.respect_gitignore {
331 let gitignore_path = full_path.join(".gitignore");
332 if self.fs.exists(&gitignore_path).await {
333 match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
334 Ok(nested_gitignore) => {
335 current_filter
337 .as_ref()
338 .map(|f| f.merged_with(&nested_gitignore))
339 .or(Some(nested_gitignore))
340 }
341 Err(err) => {
342 if let Some(ref cb) = self.options.on_error {
343 cb(&gitignore_path, &err);
344 }
345 current_filter.clone()
346 }
347 }
348 } else {
349 current_filter.clone()
350 }
351 } else {
352 current_filter.clone()
353 };
354
355 let should_recurse = match &self.pattern {
357 None => true,
358 Some(pat) => {
359 if pat.has_globstar() {
360 true
361 } else if let Some(fixed) = pat.fixed_depth() {
362 depth + 1 < fixed
363 } else {
364 true
365 }
366 }
367 };
368
369 if should_recurse {
370 dirs_to_push.push((full_path.clone(), depth + 1, child_filter));
371 }
372
373 if self.options.entry_types.dirs
375 && self.matches_pattern(&full_path)
376 && self.depth_yields(depth)
377 {
378 results.push(full_path);
379 }
380 } else {
381 if self.options.entry_types.files
383 && self.matches_pattern(&full_path)
384 && self.depth_yields(depth)
385 && self.size_within_limit(self.fs, &full_path).await
386 {
387 results.push(full_path);
388 }
389 }
390 }
391
392 dirs_to_push.reverse();
395 stack.extend(dirs_to_push);
396 }
397
398 Ok(results)
399 }
400
401 fn relative_path(&self, full_path: &Path) -> PathBuf {
402 full_path
403 .strip_prefix(&self.root)
404 .map(|p| p.to_path_buf())
405 .unwrap_or_else(|_| full_path.to_path_buf())
406 }
407
408 fn matches_pattern(&self, path: &Path) -> bool {
409 match &self.pattern {
410 Some(pattern) => {
411 let relative = self.relative_path(path);
412 pattern.matches(&relative)
413 }
414 None => true,
415 }
416 }
417
418 fn depth_yields(&self, depth: usize) -> bool {
421 match self.options.min_depth {
422 None | Some(0) => true,
423 Some(min) => depth >= min,
424 }
425 }
426
427 async fn size_within_limit(&self, fs: &F, path: &Path) -> bool {
431 let Some(limit) = self.options.max_filesize else {
432 return true;
433 };
434 match fs.file_size(path).await {
435 Some(size) => size <= limit,
436 None => true,
437 }
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use super::*;
444 use crate::{WalkerDirEntry, WalkerError, WalkerFs};
445 use std::collections::HashMap;
446 use std::sync::Arc;
447 use tokio::sync::RwLock;
448
449 struct MemEntry {
451 name: String,
452 is_dir: bool,
453 is_symlink: bool,
454 }
455
456 impl WalkerDirEntry for MemEntry {
457 fn name(&self) -> &str { &self.name }
458 fn is_dir(&self) -> bool { self.is_dir }
459 fn is_file(&self) -> bool { !self.is_dir }
460 fn is_symlink(&self) -> bool { self.is_symlink }
461 }
462
463 struct MemoryFs {
467 files: Arc<RwLock<HashMap<PathBuf, Vec<u8>>>>,
468 dirs: Arc<RwLock<std::collections::HashSet<PathBuf>>>,
469 symlinks: Arc<RwLock<HashMap<PathBuf, PathBuf>>>,
471 }
472
473 impl MemoryFs {
474 fn new() -> Self {
475 let mut dirs = std::collections::HashSet::new();
476 dirs.insert(PathBuf::from("/"));
477 Self {
478 files: Arc::new(RwLock::new(HashMap::new())),
479 dirs: Arc::new(RwLock::new(dirs)),
480 symlinks: Arc::new(RwLock::new(HashMap::new())),
481 }
482 }
483
484 async fn add_file(&self, path: &str, content: &[u8]) {
485 let path = PathBuf::from(path);
486 if let Some(parent) = path.parent() {
488 self.ensure_dirs(parent).await;
489 }
490 self.files.write().await.insert(path, content.to_vec());
491 }
492
493 async fn add_dir(&self, path: &str) {
494 self.ensure_dirs(&PathBuf::from(path)).await;
495 }
496
497 async fn add_dir_symlink(&self, link: &str, target: &str) {
500 let link_path = PathBuf::from(link);
501 let target_path = PathBuf::from(target);
502 if let Some(parent) = link_path.parent() {
504 self.ensure_dirs(parent).await;
505 }
506 self.dirs.write().await.insert(link_path.clone());
508 self.symlinks.write().await.insert(link_path, target_path);
509 }
510
511 fn resolve_path(path: &Path, symlinks: &HashMap<PathBuf, PathBuf>) -> PathBuf {
514 let mut resolved = PathBuf::new();
515 for component in path.components() {
516 resolved.push(component);
517 if let Some(target) = symlinks.get(&resolved) {
519 resolved = target.clone();
520 }
521 }
522 resolved
523 }
524
525 async fn ensure_dirs(&self, path: &Path) {
526 let mut dirs = self.dirs.write().await;
527 let mut current = PathBuf::new();
528 for component in path.components() {
529 current.push(component);
530 dirs.insert(current.clone());
531 }
532 }
533 }
534
535 #[async_trait::async_trait]
536 impl WalkerFs for MemoryFs {
537 type DirEntry = MemEntry;
538
539 async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
540 let symlinks = self.symlinks.read().await;
541
542 let resolved = Self::resolve_path(path, &symlinks);
544
545 let files = self.files.read().await;
546 let dirs = self.dirs.read().await;
547
548 let mut entries = Vec::new();
549 let mut seen = std::collections::HashSet::new();
550
551 for file_path in files.keys() {
553 if let Some(parent) = file_path.parent() {
554 if parent == resolved {
555 if let Some(name) = file_path.file_name() {
556 let name_str = name.to_string_lossy().to_string();
557 if seen.insert(name_str.clone()) {
558 entries.push(MemEntry {
559 name: name_str,
560 is_dir: false,
561 is_symlink: false,
562 });
563 }
564 }
565 }
566 }
567 }
568
569 for dir_path in dirs.iter() {
571 if let Some(parent) = dir_path.parent() {
572 if parent == resolved && dir_path != &resolved {
573 if let Some(name) = dir_path.file_name() {
574 let name_str = name.to_string_lossy().to_string();
575 if seen.insert(name_str.clone()) {
576 let is_symlink = symlinks.contains_key(dir_path);
577 entries.push(MemEntry {
578 name: name_str,
579 is_dir: true,
580 is_symlink,
581 });
582 }
583 }
584 }
585 }
586 }
587
588 Ok(entries)
589 }
590
591 async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
592 let files = self.files.read().await;
593 files.get(path)
594 .cloned()
595 .ok_or_else(|| WalkerError::NotFound(path.display().to_string()))
596 }
597
598 async fn is_dir(&self, path: &Path) -> bool {
599 self.dirs.read().await.contains(path)
600 }
601
602 async fn exists(&self, path: &Path) -> bool {
603 self.files.read().await.contains_key(path)
604 || self.dirs.read().await.contains(path)
605 }
606
607 async fn canonicalize(&self, path: &Path) -> PathBuf {
608 let symlinks = self.symlinks.read().await;
609 Self::resolve_path(path, &symlinks)
610 }
611 }
612
613 async fn make_test_fs() -> MemoryFs {
614 let fs = MemoryFs::new();
615
616 fs.add_dir("/src").await;
617 fs.add_dir("/src/lib").await;
618 fs.add_dir("/test").await;
619 fs.add_dir("/.git").await;
620 fs.add_dir("/node_modules").await;
621
622 fs.add_file("/src/main.rs", b"fn main() {}").await;
623 fs.add_file("/src/lib.rs", b"pub mod lib;").await;
624 fs.add_file("/src/lib/utils.rs", b"pub fn util() {}").await;
625 fs.add_file("/test/main_test.rs", b"#[test]").await;
626 fs.add_file("/README.md", b"# Test").await;
627 fs.add_file("/.hidden", b"secret").await;
628 fs.add_file("/.git/config", b"[core]").await;
629 fs.add_file("/node_modules/pkg.json", b"{}").await;
630
631 fs
632 }
633
634 #[tokio::test]
635 async fn test_walk_all_files() {
636 let fs = make_test_fs().await;
637
638 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
639 respect_gitignore: false,
640 include_hidden: true,
641 ..Default::default()
642 });
643
644 let files = walker.collect().await.unwrap();
645
646 assert!(files.iter().any(|p| p.ends_with("main.rs")));
647 assert!(files.iter().any(|p| p.ends_with("lib.rs")));
648 assert!(files.iter().any(|p| p.ends_with("README.md")));
649 assert!(files.iter().any(|p| p.ends_with(".hidden")));
650 }
651
652 #[tokio::test]
653 async fn test_walk_with_pattern() {
654 let fs = make_test_fs().await;
655
656 let walker = FileWalker::new(&fs, "/")
657 .with_pattern(GlobPath::new("**/*.rs").unwrap())
658 .with_options(WalkOptions {
659 respect_gitignore: false,
660 ..Default::default()
661 });
662
663 let files = walker.collect().await.unwrap();
664
665 assert!(files.iter().any(|p| p.ends_with("main.rs")));
666 assert!(files.iter().any(|p| p.ends_with("lib.rs")));
667 assert!(files.iter().any(|p| p.ends_with("utils.rs")));
668 assert!(!files.iter().any(|p| p.ends_with("README.md")));
669 }
670
671 #[tokio::test]
672 async fn test_walk_respects_gitignore() {
673 let fs = make_test_fs().await;
674
675 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
676 respect_gitignore: true,
677 ..Default::default()
678 });
679
680 let files = walker.collect().await.unwrap();
681
682 assert!(!files
683 .iter()
684 .any(|p| p.to_string_lossy().contains(".git")));
685 assert!(!files
686 .iter()
687 .any(|p| p.to_string_lossy().contains("node_modules")));
688
689 assert!(files.iter().any(|p| p.ends_with("main.rs")));
690 }
691
692 #[tokio::test]
693 async fn test_walk_hides_dotfiles() {
694 let fs = make_test_fs().await;
695
696 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
697 include_hidden: false,
698 respect_gitignore: false,
699 ..Default::default()
700 });
701
702 let files = walker.collect().await.unwrap();
703
704 assert!(!files.iter().any(|p| p.ends_with(".hidden")));
705 assert!(files.iter().any(|p| p.ends_with("main.rs")));
706 }
707
708 #[tokio::test]
709 async fn test_walk_max_depth() {
710 let fs = make_test_fs().await;
711
712 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
713 max_depth: Some(1),
714 respect_gitignore: false,
715 include_hidden: true,
716 ..Default::default()
717 });
718
719 let files = walker.collect().await.unwrap();
720
721 assert!(files.iter().any(|p| p.ends_with("README.md")));
723 assert!(files.iter().any(|p| p.ends_with("main.rs")));
725 assert!(!files.iter().any(|p| p.ends_with("utils.rs")));
727 }
728
729 #[tokio::test]
730 async fn test_walk_directories() {
731 let fs = make_test_fs().await;
732
733 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
734 entry_types: EntryTypes::dirs_only(),
735 respect_gitignore: false,
736 ..Default::default()
737 });
738
739 let dirs = walker.collect().await.unwrap();
740
741 assert!(dirs.iter().any(|p| p.ends_with("src")));
742 assert!(dirs.iter().any(|p| p.ends_with("lib")));
743 assert!(!dirs.iter().any(|p| p.ends_with("main.rs")));
744 }
745
746 #[tokio::test]
747 async fn test_walk_with_filter() {
748 let fs = make_test_fs().await;
749
750 let mut filter = IncludeExclude::new();
751 filter.exclude("*_test.rs");
752
753 let walker = FileWalker::new(&fs, "/")
754 .with_pattern(GlobPath::new("**/*.rs").unwrap())
755 .with_options(WalkOptions {
756 filter,
757 respect_gitignore: false,
758 ..Default::default()
759 });
760
761 let files = walker.collect().await.unwrap();
762
763 assert!(files.iter().any(|p| p.ends_with("main.rs")));
764 assert!(!files.iter().any(|p| p.ends_with("main_test.rs")));
765 }
766
767 #[tokio::test]
768 async fn test_walk_nested_gitignore() {
769 let fs = MemoryFs::new();
770
771 fs.add_dir("/src").await;
772 fs.add_dir("/src/subdir").await;
773 fs.add_file("/root.rs", b"root").await;
774 fs.add_file("/src/main.rs", b"main").await;
775 fs.add_file("/src/ignored.log", b"log").await;
776 fs.add_file("/src/subdir/util.rs", b"util").await;
777 fs.add_file("/src/subdir/local_ignore.txt", b"ignored").await;
778
779 fs.add_file("/.gitignore", b"*.log").await;
780 fs.add_file("/src/subdir/.gitignore", b"*.txt").await;
781
782 let walker = FileWalker::new(&fs, "/")
783 .with_options(WalkOptions {
784 respect_gitignore: true,
785 include_hidden: true,
786 ..Default::default()
787 });
788
789 let files = walker.collect().await.unwrap();
790
791 assert!(files.iter().any(|p| p.ends_with("root.rs")));
792 assert!(files.iter().any(|p| p.ends_with("main.rs")));
793 assert!(files.iter().any(|p| p.ends_with("util.rs")));
794
795 assert!(!files.iter().any(|p| p.ends_with("ignored.log")));
796 assert!(!files.iter().any(|p| p.ends_with("local_ignore.txt")));
797 }
798
799 struct SizedFs {
802 inner: MemoryFs,
803 sizes: HashMap<PathBuf, u64>,
804 }
805
806 #[async_trait::async_trait]
807 impl WalkerFs for SizedFs {
808 type DirEntry = MemEntry;
809 async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
810 self.inner.list_dir(path).await
811 }
812 async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
813 self.inner.read_file(path).await
814 }
815 async fn is_dir(&self, path: &Path) -> bool { self.inner.is_dir(path).await }
816 async fn exists(&self, path: &Path) -> bool { self.inner.exists(path).await }
817 async fn file_size(&self, path: &Path) -> Option<u64> {
818 self.sizes.get(path).copied()
819 }
820 }
821
822 #[tokio::test]
823 async fn test_walk_max_filesize_skips_large_files() {
824 let inner = MemoryFs::new();
825 inner.add_file("/small.txt", b"tiny").await;
826 inner.add_file("/big.bin", b"larger payload").await;
827 let mut sizes = HashMap::new();
828 sizes.insert(PathBuf::from("/small.txt"), 1_024); sizes.insert(PathBuf::from("/big.bin"), 2 * 1_048_576); let fs = SizedFs { inner, sizes };
831
832 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
833 respect_gitignore: false,
834 max_filesize: Some(1_048_576), ..Default::default()
836 });
837
838 let files = walker.collect().await.unwrap();
839
840 assert!(files.iter().any(|p| p.ends_with("small.txt")));
841 assert!(!files.iter().any(|p| p.ends_with("big.bin")));
842 }
843
844 #[tokio::test]
845 async fn test_walk_max_filesize_unknown_size_yields() {
846 let fs = MemoryFs::new();
848 fs.add_file("/unknown.txt", b"x").await;
849
850 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
851 respect_gitignore: false,
852 max_filesize: Some(0), ..Default::default()
854 });
855
856 let files = walker.collect().await.unwrap();
857 assert!(files.iter().any(|p| p.ends_with("unknown.txt")));
858 }
859
860 #[tokio::test]
861 async fn test_walk_min_depth_skips_root_files() {
862 let fs = MemoryFs::new();
863 fs.add_file("/at_root.txt", b"r").await;
864 fs.add_dir("/sub").await;
865 fs.add_file("/sub/nested.txt", b"n").await;
866 fs.add_dir("/sub/deeper").await;
867 fs.add_file("/sub/deeper/deep.txt", b"d").await;
868
869 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
870 respect_gitignore: false,
871 min_depth: Some(1), ..Default::default()
873 });
874
875 let files = walker.collect().await.unwrap();
876
877 assert!(!files.iter().any(|p| p.ends_with("at_root.txt")));
879 assert!(files.iter().any(|p| p.ends_with("nested.txt")));
881 assert!(files.iter().any(|p| p.ends_with("deep.txt")));
883 }
884
885 #[tokio::test]
886 async fn test_walk_types_select_only_rust() {
887 let fs = MemoryFs::new();
888 fs.add_file("/src/main.rs", b"r").await;
889 fs.add_file("/src/main.py", b"p").await;
890 fs.add_file("/src/main.js", b"j").await;
891 fs.add_file("/README.md", b"m").await;
892
893 let mut tb = ignore::types::TypesBuilder::new();
894 tb.add_defaults();
895 tb.select("rust");
896 let types = std::sync::Arc::new(tb.build().expect("types build"));
897
898 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
899 respect_gitignore: false,
900 types: Some(types),
901 ..Default::default()
902 });
903
904 let files = walker.collect().await.unwrap();
905
906 assert!(files.iter().any(|p| p.ends_with("main.rs")));
907 assert!(!files.iter().any(|p| p.ends_with("main.py")));
908 assert!(!files.iter().any(|p| p.ends_with("main.js")));
909 assert!(!files.iter().any(|p| p.ends_with("README.md")));
910 }
911
912 #[tokio::test]
913 async fn test_walk_types_negate_excludes() {
914 let fs = MemoryFs::new();
915 fs.add_file("/src/main.rs", b"r").await;
916 fs.add_file("/src/main.py", b"p").await;
917 fs.add_file("/README.md", b"m").await;
918
919 let mut tb = ignore::types::TypesBuilder::new();
920 tb.add_defaults();
921 tb.negate("rust");
922 let types = std::sync::Arc::new(tb.build().expect("types build"));
923
924 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
925 respect_gitignore: false,
926 types: Some(types),
927 ..Default::default()
928 });
929
930 let files = walker.collect().await.unwrap();
931
932 assert!(!files.iter().any(|p| p.ends_with("main.rs")));
934 assert!(files.iter().any(|p| p.ends_with("main.py")));
936 assert!(files.iter().any(|p| p.ends_with("README.md")));
937 }
938
939 #[tokio::test]
940 async fn test_walk_min_depth_still_descends() {
941 let fs = MemoryFs::new();
943 fs.add_dir("/level1").await;
944 fs.add_dir("/level1/level2").await;
945 fs.add_file("/level1/level2/found.txt", b"f").await;
946
947 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
948 respect_gitignore: false,
949 min_depth: Some(2),
950 ..Default::default()
951 });
952
953 let files = walker.collect().await.unwrap();
954 assert!(files.iter().any(|p| p.ends_with("found.txt")));
955 }
956
957 #[tokio::test]
958 async fn test_walk_error_callback() {
959 use std::sync::Mutex;
960
961 struct ErrorFs {
963 inner: MemoryFs,
964 error_paths: Vec<PathBuf>,
965 }
966
967 #[async_trait::async_trait]
968 impl WalkerFs for ErrorFs {
969 type DirEntry = MemEntry;
970
971 async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
972 if self.error_paths.iter().any(|p| p == path) {
973 return Err(WalkerError::PermissionDenied(path.display().to_string()));
974 }
975 self.inner.list_dir(path).await
976 }
977
978 async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
979 self.inner.read_file(path).await
980 }
981
982 async fn is_dir(&self, path: &Path) -> bool {
983 self.inner.is_dir(path).await
984 }
985
986 async fn exists(&self, path: &Path) -> bool {
987 self.inner.exists(path).await
988 }
989 }
990
991 let inner = MemoryFs::new();
992 inner.add_dir("/readable").await;
993 inner.add_dir("/forbidden").await;
994 inner.add_file("/readable/ok.txt", b"ok").await;
995 inner.add_file("/forbidden/secret.txt", b"secret").await;
996
997 let fs = ErrorFs {
998 inner,
999 error_paths: vec![PathBuf::from("/forbidden")],
1000 };
1001
1002 let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1003 let errors_cb = errors.clone();
1004
1005 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1006 respect_gitignore: false,
1007 include_hidden: true,
1008 on_error: Some(Arc::new(move |path, err| {
1009 errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1010 })),
1011 ..Default::default()
1012 });
1013
1014 let files = walker.collect().await.unwrap();
1015
1016 assert!(files.iter().any(|p| p.ends_with("ok.txt")));
1017 assert!(!files.iter().any(|p| p.ends_with("secret.txt")));
1018
1019 let errors = errors.lock().unwrap();
1020 assert_eq!(errors.len(), 1);
1021 assert_eq!(errors[0].0, PathBuf::from("/forbidden"));
1022 assert!(errors[0].1.contains("permission denied"));
1023 }
1024
1025 #[tokio::test]
1026 async fn test_walk_deterministic_order() {
1027 let fs = MemoryFs::new();
1028
1029 fs.add_dir("/charlie").await;
1031 fs.add_dir("/alpha").await;
1032 fs.add_dir("/bravo").await;
1033 fs.add_file("/charlie/c.txt", b"c").await;
1034 fs.add_file("/alpha/a.txt", b"a").await;
1035 fs.add_file("/bravo/b.txt", b"b").await;
1036
1037 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1038 respect_gitignore: false,
1039 ..Default::default()
1040 });
1041
1042 let files = walker.collect().await.unwrap();
1043
1044 assert_eq!(files.len(), 3);
1047 assert!(files[0].ends_with("alpha/a.txt"));
1048 assert!(files[1].ends_with("bravo/b.txt"));
1049 assert!(files[2].ends_with("charlie/c.txt"));
1050
1051 let walker2 = FileWalker::new(&fs, "/").with_options(WalkOptions {
1053 respect_gitignore: false,
1054 ..Default::default()
1055 });
1056 let files2 = walker2.collect().await.unwrap();
1057 assert_eq!(files, files2);
1058 }
1059
1060 #[tokio::test]
1061 async fn test_symlinks_not_followed_by_default() {
1062 let fs = MemoryFs::new();
1063
1064 fs.add_dir("/real").await;
1065 fs.add_file("/real/data.txt", b"data").await;
1066 fs.add_dir_symlink("/link", "/real").await;
1068
1069 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1070 respect_gitignore: false,
1071 ..Default::default()
1073 });
1074
1075 let files = walker.collect().await.unwrap();
1076
1077 assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1079 assert!(files.iter().any(|p| p.ends_with("link")));
1081 assert!(!files.iter().any(|p| p.to_string_lossy().contains("link/data")));
1083 }
1084
1085 #[tokio::test]
1086 async fn test_symlinks_followed() {
1087 let fs = MemoryFs::new();
1088
1089 fs.add_dir("/real").await;
1090 fs.add_file("/real/data.txt", b"data").await;
1091 fs.add_dir_symlink("/link", "/real").await;
1093
1094 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1095 respect_gitignore: false,
1096 follow_symlinks: true,
1097 ..Default::default()
1098 });
1099
1100 let files = walker.collect().await.unwrap();
1101
1102 assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1104 assert!(files.iter().any(|p| p.ends_with("link/data.txt")));
1105 }
1106
1107 #[tokio::test]
1108 async fn test_symlink_cycle_detection() {
1109 use std::sync::Mutex;
1110
1111 let fs = MemoryFs::new();
1112
1113 fs.add_dir("/a").await;
1115 fs.add_dir("/b").await;
1116 fs.add_file("/a/file_a.txt", b"a").await;
1117 fs.add_file("/b/file_b.txt", b"b").await;
1118 fs.add_dir_symlink("/a/link_to_b", "/b").await;
1120 fs.add_dir_symlink("/b/link_to_a", "/a").await;
1121
1122 let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1123 let errors_cb = errors.clone();
1124
1125 let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1126 respect_gitignore: false,
1127 follow_symlinks: true,
1128 on_error: Some(Arc::new(move |path, err| {
1129 errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1130 })),
1131 ..Default::default()
1132 });
1133
1134 let files = walker.collect().await.unwrap();
1135
1136 assert!(files.iter().any(|p| p.ends_with("file_a.txt")));
1138 assert!(files.iter().any(|p| p.ends_with("file_b.txt")));
1139
1140 let errors = errors.lock().unwrap();
1142 assert!(
1143 errors.iter().any(|(_, msg)| msg.contains("symlink cycle")),
1144 "expected symlink cycle error, got: {errors:?}"
1145 );
1146
1147 }
1149}