Skip to main content

kaish_glob/
walker.rs

1//! Core async file walker, generic over `WalkerFs`.
2//!
3//! Provides recursive directory traversal with filtering support.
4
5use std::collections::HashSet;
6use std::fmt;
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10use crate::{WalkerDirEntry, WalkerError, WalkerFs};
11use crate::glob_path::GlobPath;
12use crate::ignore::IgnoreFilter;
13use crate::filter::IncludeExclude;
14
15/// Types of entries to include in walk results.
16#[derive(Debug, Clone, Copy, Default)]
17pub struct EntryTypes {
18    /// Include regular files.
19    pub files: bool,
20    /// Include directories.
21    pub dirs: bool,
22}
23
24impl EntryTypes {
25    /// Include only files.
26    pub fn files_only() -> Self {
27        Self {
28            files: true,
29            dirs: false,
30        }
31    }
32
33    /// Include only directories.
34    pub fn dirs_only() -> Self {
35        Self {
36            files: false,
37            dirs: true,
38        }
39    }
40
41    /// Include both files and directories.
42    pub fn all() -> Self {
43        Self {
44            files: true,
45            dirs: true,
46        }
47    }
48}
49
50/// Callback invoked when a non-fatal error occurs during walking.
51///
52/// Receives the path where the error occurred and the error itself.
53/// This allows callers to log or collect errors without aborting the walk.
54pub type ErrorCallback = Arc<dyn Fn(&Path, &WalkerError) + Send + Sync>;
55
56/// Options for file walking.
57pub struct WalkOptions {
58    /// Maximum depth to recurse (None = unlimited).
59    pub max_depth: Option<usize>,
60    /// Suppress yielding entries whose containing directory is at depth less
61    /// than this. Descent is unaffected — deeper entries are still found.
62    /// `None` and `Some(0)` are equivalent (yield everything).
63    pub min_depth: Option<usize>,
64    /// Skip files whose size exceeds this many bytes. Files for which the
65    /// underlying `WalkerFs::file_size` returns `None` (size unknown) are
66    /// always yielded regardless of the limit.
67    pub max_filesize: Option<u64>,
68    /// Types of entries to include.
69    pub entry_types: EntryTypes,
70    /// Respect .gitignore files and default ignores.
71    pub respect_gitignore: bool,
72    /// Include hidden files (starting with .).
73    pub include_hidden: bool,
74    /// Include/exclude filters.
75    pub filter: IncludeExclude,
76    /// Follow symbolic links into directories (default `false`).
77    /// When false, symlink directories are yielded as files rather than recursed.
78    /// When true, cycle detection prevents infinite loops.
79    pub follow_symlinks: bool,
80    /// Optional callback for non-fatal errors (unreadable dirs, bad .gitignore).
81    /// Default `None` silently skips errors (preserving original behavior).
82    pub on_error: Option<ErrorCallback>,
83    /// File-type filter using ripgrep's `ignore::types::Types`.
84    /// Builds e.g. with `TypesBuilder::new().add_defaults().select("rust")`.
85    /// Pure path-name matching — no I/O.
86    pub types: Option<Arc<ignore::types::Types>>,
87}
88
89impl fmt::Debug for WalkOptions {
90    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91        f.debug_struct("WalkOptions")
92            .field("max_depth", &self.max_depth)
93            .field("min_depth", &self.min_depth)
94            .field("max_filesize", &self.max_filesize)
95            .field("entry_types", &self.entry_types)
96            .field("respect_gitignore", &self.respect_gitignore)
97            .field("include_hidden", &self.include_hidden)
98            .field("filter", &self.filter)
99            .field("follow_symlinks", &self.follow_symlinks)
100            .field("on_error", &self.on_error.as_ref().map(|_| "..."))
101            .field("types", &self.types.as_ref().map(|_| "..."))
102            .finish()
103    }
104}
105
106impl Clone for WalkOptions {
107    fn clone(&self) -> Self {
108        Self {
109            max_depth: self.max_depth,
110            min_depth: self.min_depth,
111            max_filesize: self.max_filesize,
112            entry_types: self.entry_types,
113            respect_gitignore: self.respect_gitignore,
114            include_hidden: self.include_hidden,
115            filter: self.filter.clone(),
116            follow_symlinks: self.follow_symlinks,
117            on_error: self.on_error.clone(),
118            types: self.types.clone(),
119        }
120    }
121}
122
123impl Default for WalkOptions {
124    fn default() -> Self {
125        Self {
126            max_depth: None,
127            min_depth: None,
128            max_filesize: None,
129            entry_types: EntryTypes::files_only(),
130            respect_gitignore: true,
131            include_hidden: false,
132            filter: IncludeExclude::new(),
133            follow_symlinks: false,
134            on_error: None,
135            types: None,
136        }
137    }
138}
139
140/// Async file walker, generic over any `WalkerFs` implementation.
141///
142/// # Examples
143/// ```ignore
144/// use kaish_glob::{FileWalker, WalkOptions, GlobPath};
145///
146/// let walker = FileWalker::new(&my_fs, "src")
147///     .with_pattern(GlobPath::new("**/*.rs").unwrap())
148///     .with_options(WalkOptions::default());
149///
150/// let files = walker.collect().await?;
151/// ```
152pub struct FileWalker<'a, F: WalkerFs> {
153    fs: &'a F,
154    root: PathBuf,
155    pattern: Option<GlobPath>,
156    options: WalkOptions,
157    ignore_filter: Option<IgnoreFilter>,
158}
159
160impl<'a, F: WalkerFs> FileWalker<'a, F> {
161    /// Create a new file walker starting at the given root.
162    pub fn new(fs: &'a F, root: impl AsRef<Path>) -> Self {
163        Self {
164            fs,
165            root: root.as_ref().to_path_buf(),
166            pattern: None,
167            options: WalkOptions::default(),
168            ignore_filter: None,
169        }
170    }
171
172    /// Set a glob pattern to filter results.
173    pub fn with_pattern(mut self, pattern: GlobPath) -> Self {
174        self.pattern = Some(pattern);
175        self
176    }
177
178    /// Set walk options.
179    pub fn with_options(mut self, options: WalkOptions) -> Self {
180        self.options = options;
181        self
182    }
183
184    /// Set the ignore filter explicitly.
185    pub fn with_ignore(mut self, filter: IgnoreFilter) -> Self {
186        self.ignore_filter = Some(filter);
187        self
188    }
189
190    /// Collect all matching paths.
191    pub async fn collect(mut self) -> Result<Vec<PathBuf>, crate::WalkerError> {
192        // Set up base ignore filter
193        let base_filter = if self.options.respect_gitignore {
194            let mut filter = self
195                .ignore_filter
196                .take()
197                .unwrap_or_else(IgnoreFilter::with_defaults);
198
199            // Try to load .gitignore from root
200            let gitignore_path = self.root.join(".gitignore");
201            if self.fs.exists(&gitignore_path).await {
202                match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
203                    Ok(gitignore) => filter.merge(&gitignore),
204                    Err(err) => {
205                        if let Some(ref cb) = self.options.on_error {
206                            cb(&gitignore_path, &err);
207                        }
208                    }
209                }
210            }
211            Some(filter)
212        } else {
213            self.ignore_filter.take()
214        };
215
216        let mut results = Vec::new();
217        // Track visited directories for symlink cycle detection (only when following symlinks)
218        let mut visited_dirs: HashSet<PathBuf> = HashSet::new();
219        if self.options.follow_symlinks {
220            visited_dirs.insert(self.root.clone());
221        }
222        // Stack carries: (directory, depth, ignore_filter for this dir)
223        let mut stack = vec![(self.root.clone(), 0usize, base_filter.clone())];
224
225        while let Some((dir, depth, current_filter)) = stack.pop() {
226            // Check max depth
227            if let Some(max) = self.options.max_depth
228                && depth > max {
229                    continue;
230                }
231
232            // List directory contents
233            let entries = match self.fs.list_dir(&dir).await {
234                Ok(entries) => entries,
235                Err(err) => {
236                    if let Some(ref cb) = self.options.on_error {
237                        cb(&dir, &err);
238                    }
239                    continue;
240                }
241            };
242
243            // Sort entries by name for deterministic traversal order
244            let mut entries: Vec<_> = entries
245                .into_iter()
246                .map(|e| {
247                    let name = e.name().to_string();
248                    let is_dir = e.is_dir();
249                    let is_symlink = e.is_symlink();
250                    (name, is_dir, is_symlink)
251                })
252                .collect();
253            entries.sort_by(|a, b| a.0.cmp(&b.0));
254
255            // Collect directories to push in reverse order so alphabetically-first
256            // directories are popped first from the LIFO stack.
257            let mut dirs_to_push = Vec::new();
258
259            for (entry_name, entry_is_dir, entry_is_symlink) in entries {
260                let full_path = dir.join(&entry_name);
261
262                // Check hidden files
263                if !self.options.include_hidden && entry_name.starts_with('.') {
264                    continue;
265                }
266
267                // Check ignore filter
268                if let Some(ref filter) = current_filter {
269                    let relative = self.relative_path(&full_path);
270                    if filter.is_ignored(&relative, entry_is_dir) {
271                        continue;
272                    }
273                }
274
275                // Check type filter (-tjs / -Trust style filename matching).
276                // `Types::matched` returns Match::None for directories, so dirs
277                // always pass through and we can still recurse into them.
278                if let Some(ref types) = self.options.types
279                    && types.matched(&full_path, entry_is_dir).is_ignore() {
280                        continue;
281                    }
282
283                // Check include/exclude filter
284                if !self.options.filter.is_empty() {
285                    let relative = self.relative_path(&full_path);
286                    if self.options.filter.should_exclude(&relative) {
287                        continue;
288                    }
289                    // Also check filename only for patterns like "*_test.rs"
290                    if let Some(name) = full_path.file_name()
291                        && self
292                            .options
293                            .filter
294                            .should_exclude(Path::new(name))
295                        {
296                            continue;
297                        }
298                }
299
300                if entry_is_dir {
301                    // Symlink directory handling
302                    if entry_is_symlink && !self.options.follow_symlinks {
303                        // Don't recurse into symlink dirs — yield as a file entry
304                        if self.options.entry_types.files
305                            && self.matches_pattern(&full_path)
306                            && self.depth_yields(depth)
307                            && self.size_within_limit(self.fs, &full_path).await
308                        {
309                            results.push(full_path);
310                        }
311                        continue;
312                    }
313
314                    // Cycle detection when following symlinks
315                    if entry_is_symlink && self.options.follow_symlinks {
316                        let canonical = self.fs.canonicalize(&full_path).await;
317                        if !visited_dirs.insert(canonical) {
318                            // Already visited this real directory — symlink cycle
319                            if let Some(ref cb) = self.options.on_error {
320                                cb(
321                                    &full_path,
322                                    &WalkerError::SymlinkCycle(full_path.display().to_string()),
323                                );
324                            }
325                            continue;
326                        }
327                    }
328
329                    // Check for nested .gitignore in this directory
330                    let child_filter = if self.options.respect_gitignore {
331                        let gitignore_path = full_path.join(".gitignore");
332                        if self.fs.exists(&gitignore_path).await {
333                            match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
334                                Ok(nested_gitignore) => {
335                                    // Merge with parent filter
336                                    current_filter
337                                        .as_ref()
338                                        .map(|f| f.merged_with(&nested_gitignore))
339                                        .or(Some(nested_gitignore))
340                                }
341                                Err(err) => {
342                                    if let Some(ref cb) = self.options.on_error {
343                                        cb(&gitignore_path, &err);
344                                    }
345                                    current_filter.clone()
346                                }
347                            }
348                        } else {
349                            current_filter.clone()
350                        }
351                    } else {
352                        current_filter.clone()
353                    };
354
355                    // Only recurse if the pattern requires it
356                    let should_recurse = match &self.pattern {
357                        None => true,
358                        Some(pat) => {
359                            if pat.has_globstar() {
360                                true
361                            } else if let Some(fixed) = pat.fixed_depth() {
362                                depth + 1 < fixed
363                            } else {
364                                true
365                            }
366                        }
367                    };
368
369                    if should_recurse {
370                        dirs_to_push.push((full_path.clone(), depth + 1, child_filter));
371                    }
372
373                    // Yield directory if wanted
374                    if self.options.entry_types.dirs
375                        && self.matches_pattern(&full_path)
376                        && self.depth_yields(depth)
377                    {
378                        results.push(full_path);
379                    }
380                } else {
381                    // Yield file if wanted
382                    if self.options.entry_types.files
383                        && self.matches_pattern(&full_path)
384                        && self.depth_yields(depth)
385                        && self.size_within_limit(self.fs, &full_path).await
386                    {
387                        results.push(full_path);
388                    }
389                }
390            }
391
392            // Push directories in reverse order so alphabetically-first dirs
393            // are popped first from the LIFO stack.
394            dirs_to_push.reverse();
395            stack.extend(dirs_to_push);
396        }
397
398        Ok(results)
399    }
400
401    fn relative_path(&self, full_path: &Path) -> PathBuf {
402        full_path
403            .strip_prefix(&self.root)
404            .map(|p| p.to_path_buf())
405            .unwrap_or_else(|_| full_path.to_path_buf())
406    }
407
408    fn matches_pattern(&self, path: &Path) -> bool {
409        match &self.pattern {
410            Some(pattern) => {
411                let relative = self.relative_path(path);
412                pattern.matches(&relative)
413            }
414            None => true,
415        }
416    }
417
418    /// Whether an entry at the given containing-directory depth should be
419    /// yielded under the current `min_depth` setting.
420    fn depth_yields(&self, depth: usize) -> bool {
421        match self.options.min_depth {
422            None | Some(0) => true,
423            Some(min) => depth >= min,
424        }
425    }
426
427    /// Whether a file at `path` is within the configured `max_filesize`.
428    /// Files whose size cannot be determined (`file_size` returns `None`)
429    /// are always considered within the limit.
430    async fn size_within_limit(&self, fs: &F, path: &Path) -> bool {
431        let Some(limit) = self.options.max_filesize else {
432            return true;
433        };
434        match fs.file_size(path).await {
435            Some(size) => size <= limit,
436            None => true,
437        }
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    use super::*;
444    use crate::{WalkerDirEntry, WalkerError, WalkerFs};
445    use std::collections::HashMap;
446    use std::sync::Arc;
447    use tokio::sync::RwLock;
448
449    /// Simple in-memory dir entry for testing.
450    struct MemEntry {
451        name: String,
452        is_dir: bool,
453        is_symlink: bool,
454    }
455
456    impl WalkerDirEntry for MemEntry {
457        fn name(&self) -> &str { &self.name }
458        fn is_dir(&self) -> bool { self.is_dir }
459        fn is_file(&self) -> bool { !self.is_dir }
460        fn is_symlink(&self) -> bool { self.is_symlink }
461    }
462
463    /// In-memory filesystem for testing the walker.
464    ///
465    /// Supports files, directories, and symbolic links (directory symlinks).
466    struct MemoryFs {
467        files: Arc<RwLock<HashMap<PathBuf, Vec<u8>>>>,
468        dirs: Arc<RwLock<std::collections::HashSet<PathBuf>>>,
469        /// Symlink path → target path (for directory symlinks)
470        symlinks: Arc<RwLock<HashMap<PathBuf, PathBuf>>>,
471    }
472
473    impl MemoryFs {
474        fn new() -> Self {
475            let mut dirs = std::collections::HashSet::new();
476            dirs.insert(PathBuf::from("/"));
477            Self {
478                files: Arc::new(RwLock::new(HashMap::new())),
479                dirs: Arc::new(RwLock::new(dirs)),
480                symlinks: Arc::new(RwLock::new(HashMap::new())),
481            }
482        }
483
484        async fn add_file(&self, path: &str, content: &[u8]) {
485            let path = PathBuf::from(path);
486            // Ensure parent dirs exist
487            if let Some(parent) = path.parent() {
488                self.ensure_dirs(parent).await;
489            }
490            self.files.write().await.insert(path, content.to_vec());
491        }
492
493        async fn add_dir(&self, path: &str) {
494            self.ensure_dirs(&PathBuf::from(path)).await;
495        }
496
497        /// Add a directory symlink: `link` points to `target`.
498        /// The symlink appears as a directory entry and is listed under its parent.
499        async fn add_dir_symlink(&self, link: &str, target: &str) {
500            let link_path = PathBuf::from(link);
501            let target_path = PathBuf::from(target);
502            // Ensure parent of link exists
503            if let Some(parent) = link_path.parent() {
504                self.ensure_dirs(parent).await;
505            }
506            // Register as a directory so it appears in listings
507            self.dirs.write().await.insert(link_path.clone());
508            self.symlinks.write().await.insert(link_path, target_path);
509        }
510
511        /// Resolve symlinks in a path by checking each prefix component.
512        /// This mimics how a real filesystem resolves intermediate symlinks.
513        fn resolve_path(path: &Path, symlinks: &HashMap<PathBuf, PathBuf>) -> PathBuf {
514            let mut resolved = PathBuf::new();
515            for component in path.components() {
516                resolved.push(component);
517                // Check if the current prefix is a symlink and resolve it
518                if let Some(target) = symlinks.get(&resolved) {
519                    resolved = target.clone();
520                }
521            }
522            resolved
523        }
524
525        async fn ensure_dirs(&self, path: &Path) {
526            let mut dirs = self.dirs.write().await;
527            let mut current = PathBuf::new();
528            for component in path.components() {
529                current.push(component);
530                dirs.insert(current.clone());
531            }
532        }
533    }
534
535    #[async_trait::async_trait]
536    impl WalkerFs for MemoryFs {
537        type DirEntry = MemEntry;
538
539        async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
540            let symlinks = self.symlinks.read().await;
541
542            // Resolve symlinks in the path: check each prefix to see if it's a symlink
543            let resolved = Self::resolve_path(path, &symlinks);
544
545            let files = self.files.read().await;
546            let dirs = self.dirs.read().await;
547
548            let mut entries = Vec::new();
549            let mut seen = std::collections::HashSet::new();
550
551            // Find files directly under this dir
552            for file_path in files.keys() {
553                if let Some(parent) = file_path.parent() {
554                    if parent == resolved {
555                        if let Some(name) = file_path.file_name() {
556                            let name_str = name.to_string_lossy().to_string();
557                            if seen.insert(name_str.clone()) {
558                                entries.push(MemEntry {
559                                    name: name_str,
560                                    is_dir: false,
561                                    is_symlink: false,
562                                });
563                            }
564                        }
565                    }
566                }
567            }
568
569            // Find subdirs directly under this dir
570            for dir_path in dirs.iter() {
571                if let Some(parent) = dir_path.parent() {
572                    if parent == resolved && dir_path != &resolved {
573                        if let Some(name) = dir_path.file_name() {
574                            let name_str = name.to_string_lossy().to_string();
575                            if seen.insert(name_str.clone()) {
576                                let is_symlink = symlinks.contains_key(dir_path);
577                                entries.push(MemEntry {
578                                    name: name_str,
579                                    is_dir: true,
580                                    is_symlink,
581                                });
582                            }
583                        }
584                    }
585                }
586            }
587
588            Ok(entries)
589        }
590
591        async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
592            let files = self.files.read().await;
593            files.get(path)
594                .cloned()
595                .ok_or_else(|| WalkerError::NotFound(path.display().to_string()))
596        }
597
598        async fn is_dir(&self, path: &Path) -> bool {
599            self.dirs.read().await.contains(path)
600        }
601
602        async fn exists(&self, path: &Path) -> bool {
603            self.files.read().await.contains_key(path)
604                || self.dirs.read().await.contains(path)
605        }
606
607        async fn canonicalize(&self, path: &Path) -> PathBuf {
608            let symlinks = self.symlinks.read().await;
609            Self::resolve_path(path, &symlinks)
610        }
611    }
612
613    async fn make_test_fs() -> MemoryFs {
614        let fs = MemoryFs::new();
615
616        fs.add_dir("/src").await;
617        fs.add_dir("/src/lib").await;
618        fs.add_dir("/test").await;
619        fs.add_dir("/.git").await;
620        fs.add_dir("/node_modules").await;
621
622        fs.add_file("/src/main.rs", b"fn main() {}").await;
623        fs.add_file("/src/lib.rs", b"pub mod lib;").await;
624        fs.add_file("/src/lib/utils.rs", b"pub fn util() {}").await;
625        fs.add_file("/test/main_test.rs", b"#[test]").await;
626        fs.add_file("/README.md", b"# Test").await;
627        fs.add_file("/.hidden", b"secret").await;
628        fs.add_file("/.git/config", b"[core]").await;
629        fs.add_file("/node_modules/pkg.json", b"{}").await;
630
631        fs
632    }
633
634    #[tokio::test]
635    async fn test_walk_all_files() {
636        let fs = make_test_fs().await;
637
638        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
639            respect_gitignore: false,
640            include_hidden: true,
641            ..Default::default()
642        });
643
644        let files = walker.collect().await.unwrap();
645
646        assert!(files.iter().any(|p| p.ends_with("main.rs")));
647        assert!(files.iter().any(|p| p.ends_with("lib.rs")));
648        assert!(files.iter().any(|p| p.ends_with("README.md")));
649        assert!(files.iter().any(|p| p.ends_with(".hidden")));
650    }
651
652    #[tokio::test]
653    async fn test_walk_with_pattern() {
654        let fs = make_test_fs().await;
655
656        let walker = FileWalker::new(&fs, "/")
657            .with_pattern(GlobPath::new("**/*.rs").unwrap())
658            .with_options(WalkOptions {
659                respect_gitignore: false,
660                ..Default::default()
661            });
662
663        let files = walker.collect().await.unwrap();
664
665        assert!(files.iter().any(|p| p.ends_with("main.rs")));
666        assert!(files.iter().any(|p| p.ends_with("lib.rs")));
667        assert!(files.iter().any(|p| p.ends_with("utils.rs")));
668        assert!(!files.iter().any(|p| p.ends_with("README.md")));
669    }
670
671    #[tokio::test]
672    async fn test_walk_respects_gitignore() {
673        let fs = make_test_fs().await;
674
675        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
676            respect_gitignore: true,
677            ..Default::default()
678        });
679
680        let files = walker.collect().await.unwrap();
681
682        assert!(!files
683            .iter()
684            .any(|p| p.to_string_lossy().contains(".git")));
685        assert!(!files
686            .iter()
687            .any(|p| p.to_string_lossy().contains("node_modules")));
688
689        assert!(files.iter().any(|p| p.ends_with("main.rs")));
690    }
691
692    #[tokio::test]
693    async fn test_walk_hides_dotfiles() {
694        let fs = make_test_fs().await;
695
696        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
697            include_hidden: false,
698            respect_gitignore: false,
699            ..Default::default()
700        });
701
702        let files = walker.collect().await.unwrap();
703
704        assert!(!files.iter().any(|p| p.ends_with(".hidden")));
705        assert!(files.iter().any(|p| p.ends_with("main.rs")));
706    }
707
708    #[tokio::test]
709    async fn test_walk_max_depth() {
710        let fs = make_test_fs().await;
711
712        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
713            max_depth: Some(1),
714            respect_gitignore: false,
715            include_hidden: true,
716            ..Default::default()
717        });
718
719        let files = walker.collect().await.unwrap();
720
721        // Files at depth 1 (directly under /)
722        assert!(files.iter().any(|p| p.ends_with("README.md")));
723        // Files at depth 2 (under /src)
724        assert!(files.iter().any(|p| p.ends_with("main.rs")));
725        // Files at depth 3 (under /src/lib) should NOT be present
726        assert!(!files.iter().any(|p| p.ends_with("utils.rs")));
727    }
728
729    #[tokio::test]
730    async fn test_walk_directories() {
731        let fs = make_test_fs().await;
732
733        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
734            entry_types: EntryTypes::dirs_only(),
735            respect_gitignore: false,
736            ..Default::default()
737        });
738
739        let dirs = walker.collect().await.unwrap();
740
741        assert!(dirs.iter().any(|p| p.ends_with("src")));
742        assert!(dirs.iter().any(|p| p.ends_with("lib")));
743        assert!(!dirs.iter().any(|p| p.ends_with("main.rs")));
744    }
745
746    #[tokio::test]
747    async fn test_walk_with_filter() {
748        let fs = make_test_fs().await;
749
750        let mut filter = IncludeExclude::new();
751        filter.exclude("*_test.rs");
752
753        let walker = FileWalker::new(&fs, "/")
754            .with_pattern(GlobPath::new("**/*.rs").unwrap())
755            .with_options(WalkOptions {
756                filter,
757                respect_gitignore: false,
758                ..Default::default()
759            });
760
761        let files = walker.collect().await.unwrap();
762
763        assert!(files.iter().any(|p| p.ends_with("main.rs")));
764        assert!(!files.iter().any(|p| p.ends_with("main_test.rs")));
765    }
766
767    #[tokio::test]
768    async fn test_walk_nested_gitignore() {
769        let fs = MemoryFs::new();
770
771        fs.add_dir("/src").await;
772        fs.add_dir("/src/subdir").await;
773        fs.add_file("/root.rs", b"root").await;
774        fs.add_file("/src/main.rs", b"main").await;
775        fs.add_file("/src/ignored.log", b"log").await;
776        fs.add_file("/src/subdir/util.rs", b"util").await;
777        fs.add_file("/src/subdir/local_ignore.txt", b"ignored").await;
778
779        fs.add_file("/.gitignore", b"*.log").await;
780        fs.add_file("/src/subdir/.gitignore", b"*.txt").await;
781
782        let walker = FileWalker::new(&fs, "/")
783            .with_options(WalkOptions {
784                respect_gitignore: true,
785                include_hidden: true,
786                ..Default::default()
787            });
788
789        let files = walker.collect().await.unwrap();
790
791        assert!(files.iter().any(|p| p.ends_with("root.rs")));
792        assert!(files.iter().any(|p| p.ends_with("main.rs")));
793        assert!(files.iter().any(|p| p.ends_with("util.rs")));
794
795        assert!(!files.iter().any(|p| p.ends_with("ignored.log")));
796        assert!(!files.iter().any(|p| p.ends_with("local_ignore.txt")));
797    }
798
799    /// FS that reports a stub file size for every file.
800    /// Used for max_filesize tests.
801    struct SizedFs {
802        inner: MemoryFs,
803        sizes: HashMap<PathBuf, u64>,
804    }
805
806    #[async_trait::async_trait]
807    impl WalkerFs for SizedFs {
808        type DirEntry = MemEntry;
809        async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
810            self.inner.list_dir(path).await
811        }
812        async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
813            self.inner.read_file(path).await
814        }
815        async fn is_dir(&self, path: &Path) -> bool { self.inner.is_dir(path).await }
816        async fn exists(&self, path: &Path) -> bool { self.inner.exists(path).await }
817        async fn file_size(&self, path: &Path) -> Option<u64> {
818            self.sizes.get(path).copied()
819        }
820    }
821
822    #[tokio::test]
823    async fn test_walk_max_filesize_skips_large_files() {
824        let inner = MemoryFs::new();
825        inner.add_file("/small.txt", b"tiny").await;
826        inner.add_file("/big.bin", b"larger payload").await;
827        let mut sizes = HashMap::new();
828        sizes.insert(PathBuf::from("/small.txt"), 1_024); // 1 KB
829        sizes.insert(PathBuf::from("/big.bin"), 2 * 1_048_576); // 2 MB
830        let fs = SizedFs { inner, sizes };
831
832        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
833            respect_gitignore: false,
834            max_filesize: Some(1_048_576), // 1 MB cap
835            ..Default::default()
836        });
837
838        let files = walker.collect().await.unwrap();
839
840        assert!(files.iter().any(|p| p.ends_with("small.txt")));
841        assert!(!files.iter().any(|p| p.ends_with("big.bin")));
842    }
843
844    #[tokio::test]
845    async fn test_walk_max_filesize_unknown_size_yields() {
846        // file_size returning None means "unknown" — must NOT be skipped.
847        let fs = MemoryFs::new();
848        fs.add_file("/unknown.txt", b"x").await;
849
850        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
851            respect_gitignore: false,
852            max_filesize: Some(0), // even with zero cap, unknown sizes pass
853            ..Default::default()
854        });
855
856        let files = walker.collect().await.unwrap();
857        assert!(files.iter().any(|p| p.ends_with("unknown.txt")));
858    }
859
860    #[tokio::test]
861    async fn test_walk_min_depth_skips_root_files() {
862        let fs = MemoryFs::new();
863        fs.add_file("/at_root.txt", b"r").await;
864        fs.add_dir("/sub").await;
865        fs.add_file("/sub/nested.txt", b"n").await;
866        fs.add_dir("/sub/deeper").await;
867        fs.add_file("/sub/deeper/deep.txt", b"d").await;
868
869        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
870            respect_gitignore: false,
871            min_depth: Some(1), // skip yields when containing dir is at depth < 1
872            ..Default::default()
873        });
874
875        let files = walker.collect().await.unwrap();
876
877        // /at_root.txt is at depth 0 (containing dir = root, depth 0) — skipped.
878        assert!(!files.iter().any(|p| p.ends_with("at_root.txt")));
879        // /sub/nested.txt is at depth 1 — yielded.
880        assert!(files.iter().any(|p| p.ends_with("nested.txt")));
881        // /sub/deeper/deep.txt is at depth 2 — yielded.
882        assert!(files.iter().any(|p| p.ends_with("deep.txt")));
883    }
884
885    #[tokio::test]
886    async fn test_walk_types_select_only_rust() {
887        let fs = MemoryFs::new();
888        fs.add_file("/src/main.rs", b"r").await;
889        fs.add_file("/src/main.py", b"p").await;
890        fs.add_file("/src/main.js", b"j").await;
891        fs.add_file("/README.md", b"m").await;
892
893        let mut tb = ignore::types::TypesBuilder::new();
894        tb.add_defaults();
895        tb.select("rust");
896        let types = std::sync::Arc::new(tb.build().expect("types build"));
897
898        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
899            respect_gitignore: false,
900            types: Some(types),
901            ..Default::default()
902        });
903
904        let files = walker.collect().await.unwrap();
905
906        assert!(files.iter().any(|p| p.ends_with("main.rs")));
907        assert!(!files.iter().any(|p| p.ends_with("main.py")));
908        assert!(!files.iter().any(|p| p.ends_with("main.js")));
909        assert!(!files.iter().any(|p| p.ends_with("README.md")));
910    }
911
912    #[tokio::test]
913    async fn test_walk_types_negate_excludes() {
914        let fs = MemoryFs::new();
915        fs.add_file("/src/main.rs", b"r").await;
916        fs.add_file("/src/main.py", b"p").await;
917        fs.add_file("/README.md", b"m").await;
918
919        let mut tb = ignore::types::TypesBuilder::new();
920        tb.add_defaults();
921        tb.negate("rust");
922        let types = std::sync::Arc::new(tb.build().expect("types build"));
923
924        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
925            respect_gitignore: false,
926            types: Some(types),
927            ..Default::default()
928        });
929
930        let files = walker.collect().await.unwrap();
931
932        // Rust files excluded.
933        assert!(!files.iter().any(|p| p.ends_with("main.rs")));
934        // Other files yielded.
935        assert!(files.iter().any(|p| p.ends_with("main.py")));
936        assert!(files.iter().any(|p| p.ends_with("README.md")));
937    }
938
939    #[tokio::test]
940    async fn test_walk_min_depth_still_descends() {
941        // min_depth must NOT prevent descent — only suppress yields above the threshold.
942        let fs = MemoryFs::new();
943        fs.add_dir("/level1").await;
944        fs.add_dir("/level1/level2").await;
945        fs.add_file("/level1/level2/found.txt", b"f").await;
946
947        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
948            respect_gitignore: false,
949            min_depth: Some(2),
950            ..Default::default()
951        });
952
953        let files = walker.collect().await.unwrap();
954        assert!(files.iter().any(|p| p.ends_with("found.txt")));
955    }
956
957    #[tokio::test]
958    async fn test_walk_error_callback() {
959        use std::sync::Mutex;
960
961        /// Filesystem that returns errors for specific directories.
962        struct ErrorFs {
963            inner: MemoryFs,
964            error_paths: Vec<PathBuf>,
965        }
966
967        #[async_trait::async_trait]
968        impl WalkerFs for ErrorFs {
969            type DirEntry = MemEntry;
970
971            async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
972                if self.error_paths.iter().any(|p| p == path) {
973                    return Err(WalkerError::PermissionDenied(path.display().to_string()));
974                }
975                self.inner.list_dir(path).await
976            }
977
978            async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
979                self.inner.read_file(path).await
980            }
981
982            async fn is_dir(&self, path: &Path) -> bool {
983                self.inner.is_dir(path).await
984            }
985
986            async fn exists(&self, path: &Path) -> bool {
987                self.inner.exists(path).await
988            }
989        }
990
991        let inner = MemoryFs::new();
992        inner.add_dir("/readable").await;
993        inner.add_dir("/forbidden").await;
994        inner.add_file("/readable/ok.txt", b"ok").await;
995        inner.add_file("/forbidden/secret.txt", b"secret").await;
996
997        let fs = ErrorFs {
998            inner,
999            error_paths: vec![PathBuf::from("/forbidden")],
1000        };
1001
1002        let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1003        let errors_cb = errors.clone();
1004
1005        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1006            respect_gitignore: false,
1007            include_hidden: true,
1008            on_error: Some(Arc::new(move |path, err| {
1009                errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1010            })),
1011            ..Default::default()
1012        });
1013
1014        let files = walker.collect().await.unwrap();
1015
1016        assert!(files.iter().any(|p| p.ends_with("ok.txt")));
1017        assert!(!files.iter().any(|p| p.ends_with("secret.txt")));
1018
1019        let errors = errors.lock().unwrap();
1020        assert_eq!(errors.len(), 1);
1021        assert_eq!(errors[0].0, PathBuf::from("/forbidden"));
1022        assert!(errors[0].1.contains("permission denied"));
1023    }
1024
1025    #[tokio::test]
1026    async fn test_walk_deterministic_order() {
1027        let fs = MemoryFs::new();
1028
1029        // Add directories and files in non-alphabetical order
1030        fs.add_dir("/charlie").await;
1031        fs.add_dir("/alpha").await;
1032        fs.add_dir("/bravo").await;
1033        fs.add_file("/charlie/c.txt", b"c").await;
1034        fs.add_file("/alpha/a.txt", b"a").await;
1035        fs.add_file("/bravo/b.txt", b"b").await;
1036
1037        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1038            respect_gitignore: false,
1039            ..Default::default()
1040        });
1041
1042        let files = walker.collect().await.unwrap();
1043
1044        // Results should be in alphabetical traversal order:
1045        // alpha/a.txt, bravo/b.txt, charlie/c.txt
1046        assert_eq!(files.len(), 3);
1047        assert!(files[0].ends_with("alpha/a.txt"));
1048        assert!(files[1].ends_with("bravo/b.txt"));
1049        assert!(files[2].ends_with("charlie/c.txt"));
1050
1051        // Run again to verify determinism
1052        let walker2 = FileWalker::new(&fs, "/").with_options(WalkOptions {
1053            respect_gitignore: false,
1054            ..Default::default()
1055        });
1056        let files2 = walker2.collect().await.unwrap();
1057        assert_eq!(files, files2);
1058    }
1059
1060    #[tokio::test]
1061    async fn test_symlinks_not_followed_by_default() {
1062        let fs = MemoryFs::new();
1063
1064        fs.add_dir("/real").await;
1065        fs.add_file("/real/data.txt", b"data").await;
1066        // /link → /real (symlink directory)
1067        fs.add_dir_symlink("/link", "/real").await;
1068
1069        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1070            respect_gitignore: false,
1071            // follow_symlinks defaults to false
1072            ..Default::default()
1073        });
1074
1075        let files = walker.collect().await.unwrap();
1076
1077        // /real/data.txt should be found
1078        assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1079        // /link should be yielded as a file entry (not recursed)
1080        assert!(files.iter().any(|p| p.ends_with("link")));
1081        // Should NOT find files under /link/ since we don't follow
1082        assert!(!files.iter().any(|p| p.to_string_lossy().contains("link/data")));
1083    }
1084
1085    #[tokio::test]
1086    async fn test_symlinks_followed() {
1087        let fs = MemoryFs::new();
1088
1089        fs.add_dir("/real").await;
1090        fs.add_file("/real/data.txt", b"data").await;
1091        // /link → /real
1092        fs.add_dir_symlink("/link", "/real").await;
1093
1094        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1095            respect_gitignore: false,
1096            follow_symlinks: true,
1097            ..Default::default()
1098        });
1099
1100        let files = walker.collect().await.unwrap();
1101
1102        // Both the real path and symlinked path should have data.txt
1103        assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1104        assert!(files.iter().any(|p| p.ends_with("link/data.txt")));
1105    }
1106
1107    #[tokio::test]
1108    async fn test_symlink_cycle_detection() {
1109        use std::sync::Mutex;
1110
1111        let fs = MemoryFs::new();
1112
1113        // Create a cycle: /a → /b, /b → /a
1114        fs.add_dir("/a").await;
1115        fs.add_dir("/b").await;
1116        fs.add_file("/a/file_a.txt", b"a").await;
1117        fs.add_file("/b/file_b.txt", b"b").await;
1118        // /a/link_to_b → /b, /b/link_to_a → /a
1119        fs.add_dir_symlink("/a/link_to_b", "/b").await;
1120        fs.add_dir_symlink("/b/link_to_a", "/a").await;
1121
1122        let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1123        let errors_cb = errors.clone();
1124
1125        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1126            respect_gitignore: false,
1127            follow_symlinks: true,
1128            on_error: Some(Arc::new(move |path, err| {
1129                errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1130            })),
1131            ..Default::default()
1132        });
1133
1134        let files = walker.collect().await.unwrap();
1135
1136        // Real files should be found
1137        assert!(files.iter().any(|p| p.ends_with("file_a.txt")));
1138        assert!(files.iter().any(|p| p.ends_with("file_b.txt")));
1139
1140        // Cycle should be detected and reported
1141        let errors = errors.lock().unwrap();
1142        assert!(
1143            errors.iter().any(|(_, msg)| msg.contains("symlink cycle")),
1144            "expected symlink cycle error, got: {errors:?}"
1145        );
1146
1147        // Walk should terminate (not infinite loop) — the fact we got here proves it
1148    }
1149}