Skip to main content

kaish_glob/
walker.rs

1//! Core async file walker, generic over `WalkerFs`.
2//!
3//! Provides recursive directory traversal with filtering support.
4
5use std::collections::HashSet;
6use std::fmt;
7use std::path::{Path, PathBuf};
8use std::sync::Arc;
9
10use crate::{WalkerDirEntry, WalkerError, WalkerFs};
11use crate::glob_path::GlobPath;
12use crate::ignore::IgnoreFilter;
13use crate::filter::IncludeExclude;
14
15/// Types of entries to include in walk results.
16#[derive(Debug, Clone, Copy, Default)]
17pub struct EntryTypes {
18    /// Include regular files.
19    pub files: bool,
20    /// Include directories.
21    pub dirs: bool,
22}
23
24impl EntryTypes {
25    /// Include only files.
26    pub fn files_only() -> Self {
27        Self {
28            files: true,
29            dirs: false,
30        }
31    }
32
33    /// Include only directories.
34    pub fn dirs_only() -> Self {
35        Self {
36            files: false,
37            dirs: true,
38        }
39    }
40
41    /// Include both files and directories.
42    pub fn all() -> Self {
43        Self {
44            files: true,
45            dirs: true,
46        }
47    }
48}
49
50/// Callback invoked when a non-fatal error occurs during walking.
51///
52/// Receives the path where the error occurred and the error itself.
53/// This allows callers to log or collect errors without aborting the walk.
54pub type ErrorCallback = Arc<dyn Fn(&Path, &WalkerError) + Send + Sync>;
55
56/// Options for file walking.
57pub struct WalkOptions {
58    /// Maximum depth to recurse (None = unlimited).
59    pub max_depth: Option<usize>,
60    /// Suppress yielding entries whose containing directory is at depth less
61    /// than this. Descent is unaffected — deeper entries are still found.
62    /// `None` and `Some(0)` are equivalent (yield everything).
63    pub min_depth: Option<usize>,
64    /// Skip files whose size exceeds this many bytes. Files for which the
65    /// underlying `WalkerFs::file_size` returns `None` (size unknown) are
66    /// always yielded regardless of the limit.
67    pub max_filesize: Option<u64>,
68    /// Types of entries to include.
69    pub entry_types: EntryTypes,
70    /// Respect .gitignore files and default ignores.
71    pub respect_gitignore: bool,
72    /// Include hidden files (starting with .).
73    pub include_hidden: bool,
74    /// Include/exclude filters.
75    pub filter: IncludeExclude,
76    /// Follow symbolic links into directories (default `false`).
77    /// When false, symlink directories are yielded as files rather than recursed.
78    /// When true, cycle detection prevents infinite loops.
79    pub follow_symlinks: bool,
80    /// Optional callback for non-fatal errors (unreadable dirs, bad .gitignore).
81    /// Default `None` silently skips errors (preserving original behavior).
82    pub on_error: Option<ErrorCallback>,
83    /// File-type filter using ripgrep's `ignore::types::Types`.
84    /// Builds e.g. with `TypesBuilder::new().add_defaults().select("rust")`.
85    /// Pure path-name matching — no I/O.
86    pub types: Option<Arc<ignore::types::Types>>,
87}
88
89impl fmt::Debug for WalkOptions {
90    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91        f.debug_struct("WalkOptions")
92            .field("max_depth", &self.max_depth)
93            .field("min_depth", &self.min_depth)
94            .field("max_filesize", &self.max_filesize)
95            .field("entry_types", &self.entry_types)
96            .field("respect_gitignore", &self.respect_gitignore)
97            .field("include_hidden", &self.include_hidden)
98            .field("filter", &self.filter)
99            .field("follow_symlinks", &self.follow_symlinks)
100            .field("on_error", &self.on_error.as_ref().map(|_| "..."))
101            .field("types", &self.types.as_ref().map(|_| "..."))
102            .finish()
103    }
104}
105
106impl Clone for WalkOptions {
107    fn clone(&self) -> Self {
108        Self {
109            max_depth: self.max_depth,
110            min_depth: self.min_depth,
111            max_filesize: self.max_filesize,
112            entry_types: self.entry_types,
113            respect_gitignore: self.respect_gitignore,
114            include_hidden: self.include_hidden,
115            filter: self.filter.clone(),
116            follow_symlinks: self.follow_symlinks,
117            on_error: self.on_error.clone(),
118            types: self.types.clone(),
119        }
120    }
121}
122
123impl Default for WalkOptions {
124    fn default() -> Self {
125        Self {
126            max_depth: None,
127            min_depth: None,
128            max_filesize: None,
129            entry_types: EntryTypes::files_only(),
130            respect_gitignore: true,
131            include_hidden: false,
132            filter: IncludeExclude::new(),
133            follow_symlinks: false,
134            on_error: None,
135            types: None,
136        }
137    }
138}
139
140/// Async file walker, generic over any `WalkerFs` implementation.
141///
142/// # Examples
143/// ```ignore
144/// use kaish_glob::{FileWalker, WalkOptions, GlobPath};
145///
146/// let walker = FileWalker::new(&my_fs, "src")
147///     .with_pattern(GlobPath::new("**/*.rs").unwrap())
148///     .with_options(WalkOptions::default());
149///
150/// let files = walker.collect().await?;
151/// ```
152pub struct FileWalker<'a, F: WalkerFs> {
153    fs: &'a F,
154    root: PathBuf,
155    pattern: Option<GlobPath>,
156    options: WalkOptions,
157    ignore_filter: Option<IgnoreFilter>,
158}
159
160impl<'a, F: WalkerFs> FileWalker<'a, F> {
161    /// Create a new file walker starting at the given root.
162    pub fn new(fs: &'a F, root: impl AsRef<Path>) -> Self {
163        Self {
164            fs,
165            root: root.as_ref().to_path_buf(),
166            pattern: None,
167            options: WalkOptions::default(),
168            ignore_filter: None,
169        }
170    }
171
172    /// Set a glob pattern to filter results.
173    pub fn with_pattern(mut self, pattern: GlobPath) -> Self {
174        self.pattern = Some(pattern);
175        self
176    }
177
178    /// Set walk options.
179    pub fn with_options(mut self, options: WalkOptions) -> Self {
180        self.options = options;
181        self
182    }
183
184    /// Set the ignore filter explicitly.
185    pub fn with_ignore(mut self, filter: IgnoreFilter) -> Self {
186        self.ignore_filter = Some(filter);
187        self
188    }
189
190    /// Collect all matching paths.
191    pub async fn collect(mut self) -> Result<Vec<PathBuf>, crate::WalkerError> {
192        // Set up base ignore filter
193        let base_filter = if self.options.respect_gitignore {
194            let mut filter = self
195                .ignore_filter
196                .take()
197                .unwrap_or_else(IgnoreFilter::with_defaults);
198
199            // Try to load .gitignore from root
200            let gitignore_path = self.root.join(".gitignore");
201            if self.fs.exists(&gitignore_path).await {
202                match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
203                    Ok(gitignore) => filter.merge(&gitignore),
204                    Err(err) => {
205                        if let Some(ref cb) = self.options.on_error {
206                            cb(&gitignore_path, &err);
207                        }
208                    }
209                }
210            }
211            Some(filter)
212        } else {
213            self.ignore_filter.take()
214        };
215
216        let mut results = Vec::new();
217        // Track visited directories for symlink cycle detection (only when following symlinks)
218        let mut visited_dirs: HashSet<PathBuf> = HashSet::new();
219        if self.options.follow_symlinks {
220            visited_dirs.insert(self.root.clone());
221        }
222        // Stack carries: (directory, depth, ignore_filter for this dir)
223        let mut stack = vec![(self.root.clone(), 0usize, base_filter.clone())];
224
225        while let Some((dir, depth, current_filter)) = stack.pop() {
226            // Check max depth
227            if let Some(max) = self.options.max_depth
228                && depth > max {
229                    continue;
230                }
231
232            // List directory contents
233            let entries = match self.fs.list_dir(&dir).await {
234                Ok(entries) => entries,
235                Err(err) => {
236                    if let Some(ref cb) = self.options.on_error {
237                        cb(&dir, &err);
238                    }
239                    continue;
240                }
241            };
242
243            // Sort entries by name for deterministic traversal order
244            let mut entries: Vec<_> = entries
245                .into_iter()
246                .map(|e| {
247                    let name = e.name().to_string();
248                    let is_dir = e.is_dir();
249                    let is_symlink = e.is_symlink();
250                    (name, is_dir, is_symlink)
251                })
252                .collect();
253            entries.sort_by(|a, b| a.0.cmp(&b.0));
254
255            // Collect directories to push in reverse order so alphabetically-first
256            // directories are popped first from the LIFO stack.
257            let mut dirs_to_push = Vec::new();
258
259            for (entry_name, entry_is_dir, entry_is_symlink) in entries {
260                let full_path = dir.join(&entry_name);
261
262                // Hidden-file rule (bash, no `dotglob`). With a glob pattern the
263                // leading-dot decision is made per-component by `matches_pattern`
264                // (yield) and `could_descend` (traversal) below: `*` skips
265                // dotfiles while `.*`/`.github`/`**/.env` reach them. With no
266                // pattern — a plain recursive walk — hide dot entries unless
267                // `include_hidden`.
268                if !self.options.include_hidden
269                    && self.pattern.is_none()
270                    && entry_name.starts_with('.')
271                {
272                    continue;
273                }
274
275                // Check ignore filter
276                if let Some(ref filter) = current_filter {
277                    let relative = self.relative_path(&full_path);
278                    if filter.is_ignored(&relative, entry_is_dir) {
279                        continue;
280                    }
281                }
282
283                // Check type filter (-tjs / -Trust style filename matching).
284                // `Types::matched` returns Match::None for directories, so dirs
285                // always pass through and we can still recurse into them.
286                if let Some(ref types) = self.options.types
287                    && types.matched(&full_path, entry_is_dir).is_ignore() {
288                        continue;
289                    }
290
291                // Check include/exclude filter
292                if !self.options.filter.is_empty() {
293                    let relative = self.relative_path(&full_path);
294                    if self.options.filter.should_exclude(&relative) {
295                        continue;
296                    }
297                    // Also check filename only for patterns like "*_test.rs"
298                    if let Some(name) = full_path.file_name()
299                        && self
300                            .options
301                            .filter
302                            .should_exclude(Path::new(name))
303                        {
304                            continue;
305                        }
306                }
307
308                if entry_is_dir {
309                    // Symlink directory handling
310                    if entry_is_symlink && !self.options.follow_symlinks {
311                        // Don't recurse into symlink dirs — yield as a file entry
312                        if self.options.entry_types.files
313                            && self.matches_pattern(&full_path)
314                            && self.depth_yields(depth)
315                            && self.size_within_limit(self.fs, &full_path).await
316                        {
317                            results.push(full_path);
318                        }
319                        continue;
320                    }
321
322                    // Cycle detection when following symlinks
323                    if entry_is_symlink && self.options.follow_symlinks {
324                        let canonical = self.fs.canonicalize(&full_path).await;
325                        if !visited_dirs.insert(canonical) {
326                            // Already visited this real directory — symlink cycle
327                            if let Some(ref cb) = self.options.on_error {
328                                cb(
329                                    &full_path,
330                                    &WalkerError::SymlinkCycle(full_path.display().to_string()),
331                                );
332                            }
333                            continue;
334                        }
335                    }
336
337                    // Check for nested .gitignore in this directory
338                    let child_filter = if self.options.respect_gitignore {
339                        let gitignore_path = full_path.join(".gitignore");
340                        if self.fs.exists(&gitignore_path).await {
341                            match IgnoreFilter::from_gitignore(&gitignore_path, self.fs).await {
342                                Ok(nested_gitignore) => {
343                                    // Merge with parent filter
344                                    current_filter
345                                        .as_ref()
346                                        .map(|f| f.merged_with(&nested_gitignore))
347                                        .or(Some(nested_gitignore))
348                                }
349                                Err(err) => {
350                                    if let Some(ref cb) = self.options.on_error {
351                                        cb(&gitignore_path, &err);
352                                    }
353                                    current_filter.clone()
354                                }
355                            }
356                        } else {
357                            current_filter.clone()
358                        }
359                    } else {
360                        current_filter.clone()
361                    };
362
363                    // Only recurse if some entry beneath this directory could
364                    // still match. `could_descend` honours the leading-dot rule,
365                    // so `**` enters visible dirs but not hidden ones (without
366                    // dotglob), while an explicitly named `.github` is entered.
367                    let should_recurse = match &self.pattern {
368                        None => true,
369                        Some(pat) => {
370                            let relative = self.relative_path(&full_path);
371                            pat.could_descend(&relative, self.options.include_hidden)
372                        }
373                    };
374
375                    if should_recurse {
376                        dirs_to_push.push((full_path.clone(), depth + 1, child_filter));
377                    }
378
379                    // Yield directory if wanted
380                    if self.options.entry_types.dirs
381                        && self.matches_pattern(&full_path)
382                        && self.depth_yields(depth)
383                    {
384                        results.push(full_path);
385                    }
386                } else {
387                    // Yield file if wanted
388                    if self.options.entry_types.files
389                        && self.matches_pattern(&full_path)
390                        && self.depth_yields(depth)
391                        && self.size_within_limit(self.fs, &full_path).await
392                    {
393                        results.push(full_path);
394                    }
395                }
396            }
397
398            // Push directories in reverse order so alphabetically-first dirs
399            // are popped first from the LIFO stack.
400            dirs_to_push.reverse();
401            stack.extend(dirs_to_push);
402        }
403
404        Ok(results)
405    }
406
407    fn relative_path(&self, full_path: &Path) -> PathBuf {
408        full_path
409            .strip_prefix(&self.root)
410            .map(|p| p.to_path_buf())
411            .unwrap_or_else(|_| full_path.to_path_buf())
412    }
413
414    fn matches_pattern(&self, path: &Path) -> bool {
415        match &self.pattern {
416            Some(pattern) => {
417                let relative = self.relative_path(path);
418                pattern.matches_walk(&relative, self.options.include_hidden)
419            }
420            None => true,
421        }
422    }
423
424    /// Whether an entry at the given containing-directory depth should be
425    /// yielded under the current `min_depth` setting.
426    fn depth_yields(&self, depth: usize) -> bool {
427        match self.options.min_depth {
428            None | Some(0) => true,
429            Some(min) => depth >= min,
430        }
431    }
432
433    /// Whether a file at `path` is within the configured `max_filesize`.
434    /// Files whose size cannot be determined (`file_size` returns `None`)
435    /// are always considered within the limit.
436    async fn size_within_limit(&self, fs: &F, path: &Path) -> bool {
437        let Some(limit) = self.options.max_filesize else {
438            return true;
439        };
440        match fs.file_size(path).await {
441            Some(size) => size <= limit,
442            None => true,
443        }
444    }
445}
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450    use crate::{WalkerDirEntry, WalkerError, WalkerFs};
451    use std::collections::HashMap;
452    use std::sync::Arc;
453    use tokio::sync::RwLock;
454
455    /// Simple in-memory dir entry for testing.
456    struct MemEntry {
457        name: String,
458        is_dir: bool,
459        is_symlink: bool,
460    }
461
462    impl WalkerDirEntry for MemEntry {
463        fn name(&self) -> &str { &self.name }
464        fn is_dir(&self) -> bool { self.is_dir }
465        fn is_file(&self) -> bool { !self.is_dir }
466        fn is_symlink(&self) -> bool { self.is_symlink }
467    }
468
469    /// In-memory filesystem for testing the walker.
470    ///
471    /// Supports files, directories, and symbolic links (directory symlinks).
472    struct MemoryFs {
473        files: Arc<RwLock<HashMap<PathBuf, Vec<u8>>>>,
474        dirs: Arc<RwLock<std::collections::HashSet<PathBuf>>>,
475        /// Symlink path → target path (for directory symlinks)
476        symlinks: Arc<RwLock<HashMap<PathBuf, PathBuf>>>,
477    }
478
479    impl MemoryFs {
480        fn new() -> Self {
481            let mut dirs = std::collections::HashSet::new();
482            dirs.insert(PathBuf::from("/"));
483            Self {
484                files: Arc::new(RwLock::new(HashMap::new())),
485                dirs: Arc::new(RwLock::new(dirs)),
486                symlinks: Arc::new(RwLock::new(HashMap::new())),
487            }
488        }
489
490        async fn add_file(&self, path: &str, content: &[u8]) {
491            let path = PathBuf::from(path);
492            // Ensure parent dirs exist
493            if let Some(parent) = path.parent() {
494                self.ensure_dirs(parent).await;
495            }
496            self.files.write().await.insert(path, content.to_vec());
497        }
498
499        async fn add_dir(&self, path: &str) {
500            self.ensure_dirs(&PathBuf::from(path)).await;
501        }
502
503        /// Add a directory symlink: `link` points to `target`.
504        /// The symlink appears as a directory entry and is listed under its parent.
505        async fn add_dir_symlink(&self, link: &str, target: &str) {
506            let link_path = PathBuf::from(link);
507            let target_path = PathBuf::from(target);
508            // Ensure parent of link exists
509            if let Some(parent) = link_path.parent() {
510                self.ensure_dirs(parent).await;
511            }
512            // Register as a directory so it appears in listings
513            self.dirs.write().await.insert(link_path.clone());
514            self.symlinks.write().await.insert(link_path, target_path);
515        }
516
517        /// Resolve symlinks in a path by checking each prefix component.
518        /// This mimics how a real filesystem resolves intermediate symlinks.
519        fn resolve_path(path: &Path, symlinks: &HashMap<PathBuf, PathBuf>) -> PathBuf {
520            let mut resolved = PathBuf::new();
521            for component in path.components() {
522                resolved.push(component);
523                // Check if the current prefix is a symlink and resolve it
524                if let Some(target) = symlinks.get(&resolved) {
525                    resolved = target.clone();
526                }
527            }
528            resolved
529        }
530
531        async fn ensure_dirs(&self, path: &Path) {
532            let mut dirs = self.dirs.write().await;
533            let mut current = PathBuf::new();
534            for component in path.components() {
535                current.push(component);
536                dirs.insert(current.clone());
537            }
538        }
539    }
540
541    #[async_trait::async_trait]
542    impl WalkerFs for MemoryFs {
543        type DirEntry = MemEntry;
544
545        async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
546            let symlinks = self.symlinks.read().await;
547
548            // Resolve symlinks in the path: check each prefix to see if it's a symlink
549            let resolved = Self::resolve_path(path, &symlinks);
550
551            let files = self.files.read().await;
552            let dirs = self.dirs.read().await;
553
554            let mut entries = Vec::new();
555            let mut seen = std::collections::HashSet::new();
556
557            // Find files directly under this dir
558            for file_path in files.keys() {
559                if let Some(parent) = file_path.parent() {
560                    if parent == resolved {
561                        if let Some(name) = file_path.file_name() {
562                            let name_str = name.to_string_lossy().to_string();
563                            if seen.insert(name_str.clone()) {
564                                entries.push(MemEntry {
565                                    name: name_str,
566                                    is_dir: false,
567                                    is_symlink: false,
568                                });
569                            }
570                        }
571                    }
572                }
573            }
574
575            // Find subdirs directly under this dir
576            for dir_path in dirs.iter() {
577                if let Some(parent) = dir_path.parent() {
578                    if parent == resolved && dir_path != &resolved {
579                        if let Some(name) = dir_path.file_name() {
580                            let name_str = name.to_string_lossy().to_string();
581                            if seen.insert(name_str.clone()) {
582                                let is_symlink = symlinks.contains_key(dir_path);
583                                entries.push(MemEntry {
584                                    name: name_str,
585                                    is_dir: true,
586                                    is_symlink,
587                                });
588                            }
589                        }
590                    }
591                }
592            }
593
594            Ok(entries)
595        }
596
597        async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
598            let files = self.files.read().await;
599            files.get(path)
600                .cloned()
601                .ok_or_else(|| WalkerError::NotFound(path.display().to_string()))
602        }
603
604        async fn is_dir(&self, path: &Path) -> bool {
605            self.dirs.read().await.contains(path)
606        }
607
608        async fn exists(&self, path: &Path) -> bool {
609            self.files.read().await.contains_key(path)
610                || self.dirs.read().await.contains(path)
611        }
612
613        async fn canonicalize(&self, path: &Path) -> PathBuf {
614            let symlinks = self.symlinks.read().await;
615            Self::resolve_path(path, &symlinks)
616        }
617    }
618
619    async fn make_test_fs() -> MemoryFs {
620        let fs = MemoryFs::new();
621
622        fs.add_dir("/src").await;
623        fs.add_dir("/src/lib").await;
624        fs.add_dir("/test").await;
625        fs.add_dir("/.git").await;
626        fs.add_dir("/node_modules").await;
627
628        fs.add_file("/src/main.rs", b"fn main() {}").await;
629        fs.add_file("/src/lib.rs", b"pub mod lib;").await;
630        fs.add_file("/src/lib/utils.rs", b"pub fn util() {}").await;
631        fs.add_file("/test/main_test.rs", b"#[test]").await;
632        fs.add_file("/README.md", b"# Test").await;
633        fs.add_file("/.hidden", b"secret").await;
634        fs.add_file("/.git/config", b"[core]").await;
635        fs.add_file("/node_modules/pkg.json", b"{}").await;
636
637        fs
638    }
639
640    #[tokio::test]
641    async fn test_walk_all_files() {
642        let fs = make_test_fs().await;
643
644        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
645            respect_gitignore: false,
646            include_hidden: true,
647            ..Default::default()
648        });
649
650        let files = walker.collect().await.unwrap();
651
652        assert!(files.iter().any(|p| p.ends_with("main.rs")));
653        assert!(files.iter().any(|p| p.ends_with("lib.rs")));
654        assert!(files.iter().any(|p| p.ends_with("README.md")));
655        assert!(files.iter().any(|p| p.ends_with(".hidden")));
656    }
657
658    #[tokio::test]
659    async fn test_walk_with_pattern() {
660        let fs = make_test_fs().await;
661
662        let walker = FileWalker::new(&fs, "/")
663            .with_pattern(GlobPath::new("**/*.rs").unwrap())
664            .with_options(WalkOptions {
665                respect_gitignore: false,
666                ..Default::default()
667            });
668
669        let files = walker.collect().await.unwrap();
670
671        assert!(files.iter().any(|p| p.ends_with("main.rs")));
672        assert!(files.iter().any(|p| p.ends_with("lib.rs")));
673        assert!(files.iter().any(|p| p.ends_with("utils.rs")));
674        assert!(!files.iter().any(|p| p.ends_with("README.md")));
675    }
676
677    #[tokio::test]
678    async fn test_walk_respects_gitignore() {
679        let fs = make_test_fs().await;
680
681        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
682            respect_gitignore: true,
683            ..Default::default()
684        });
685
686        let files = walker.collect().await.unwrap();
687
688        assert!(!files
689            .iter()
690            .any(|p| p.to_string_lossy().contains(".git")));
691        assert!(!files
692            .iter()
693            .any(|p| p.to_string_lossy().contains("node_modules")));
694
695        assert!(files.iter().any(|p| p.ends_with("main.rs")));
696    }
697
698    #[tokio::test]
699    async fn test_walk_hides_dotfiles() {
700        let fs = make_test_fs().await;
701
702        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
703            include_hidden: false,
704            respect_gitignore: false,
705            ..Default::default()
706        });
707
708        let files = walker.collect().await.unwrap();
709
710        assert!(!files.iter().any(|p| p.ends_with(".hidden")));
711        assert!(files.iter().any(|p| p.ends_with("main.rs")));
712    }
713
714    #[tokio::test]
715    async fn test_dot_pattern_matches_dotfiles() {
716        // `.*` explicitly names a leading dot, so it matches dotfiles (bash).
717        let fs = MemoryFs::new();
718        fs.add_file("/.gitignore", b"x").await;
719        fs.add_file("/.env", b"x").await;
720        fs.add_file("/visible.txt", b"x").await;
721
722        let walker = FileWalker::new(&fs, "/")
723            .with_pattern(GlobPath::new(".*").unwrap())
724            .with_options(WalkOptions {
725                respect_gitignore: false,
726                ..Default::default()
727            });
728        let files = walker.collect().await.unwrap();
729
730        assert!(files.iter().any(|p| p.ends_with(".gitignore")));
731        assert!(files.iter().any(|p| p.ends_with(".env")));
732        assert!(!files.iter().any(|p| p.ends_with("visible.txt")));
733    }
734
735    #[tokio::test]
736    async fn test_star_skips_dotfiles() {
737        // A bare `*` never matches a leading dot without dotglob.
738        let fs = MemoryFs::new();
739        fs.add_file("/.env", b"x").await;
740        fs.add_file("/visible.txt", b"x").await;
741
742        let walker = FileWalker::new(&fs, "/")
743            .with_pattern(GlobPath::new("*").unwrap())
744            .with_options(WalkOptions {
745                respect_gitignore: false,
746                entry_types: EntryTypes::all(),
747                ..Default::default()
748            });
749        let files = walker.collect().await.unwrap();
750
751        assert!(!files.iter().any(|p| p.ends_with(".env")));
752        assert!(files.iter().any(|p| p.ends_with("visible.txt")));
753    }
754
755    #[tokio::test]
756    async fn test_literal_dotdir_is_traversed() {
757        // An explicitly named `.github` directory is descended into.
758        let fs = MemoryFs::new();
759        fs.add_file("/.github/workflows/ci.yml", b"x").await;
760        fs.add_file("/.github/.secret", b"x").await;
761
762        let walker = FileWalker::new(&fs, "/")
763            .with_pattern(GlobPath::new(".github/**/*.yml").unwrap())
764            .with_options(WalkOptions {
765                respect_gitignore: false,
766                ..Default::default()
767            });
768        let files = walker.collect().await.unwrap();
769
770        assert!(files.iter().any(|p| p.ends_with("ci.yml")));
771    }
772
773    #[tokio::test]
774    async fn test_dotdir_star_excludes_nested_dotfiles() {
775        // `.github/*` reaches into the named dot dir, but `*` still skips the
776        // dot-prefixed children inside it.
777        let fs = MemoryFs::new();
778        fs.add_file("/.github/config.yml", b"x").await;
779        fs.add_file("/.github/.secret", b"x").await;
780
781        let walker = FileWalker::new(&fs, "/")
782            .with_pattern(GlobPath::new(".github/*").unwrap())
783            .with_options(WalkOptions {
784                respect_gitignore: false,
785                entry_types: EntryTypes::all(),
786                ..Default::default()
787            });
788        let files = walker.collect().await.unwrap();
789
790        assert!(files.iter().any(|p| p.ends_with("config.yml")));
791        assert!(!files.iter().any(|p| p.ends_with(".secret")));
792    }
793
794    #[tokio::test]
795    async fn test_globstar_skips_dotdirs_without_dotglob() {
796        // `**` does not descend into hidden directories without dotglob.
797        let fs = MemoryFs::new();
798        fs.add_file("/.github/buried.rs", b"x").await;
799        fs.add_file("/top.rs", b"x").await;
800
801        let walker = FileWalker::new(&fs, "/")
802            .with_pattern(GlobPath::new("**/*.rs").unwrap())
803            .with_options(WalkOptions {
804                respect_gitignore: false,
805                ..Default::default()
806            });
807        let files = walker.collect().await.unwrap();
808
809        assert!(files.iter().any(|p| p.ends_with("top.rs")));
810        assert!(!files.iter().any(|p| p.ends_with("buried.rs")));
811    }
812
813    #[tokio::test]
814    async fn test_globstar_then_explicit_dotfile() {
815        // `**/.env` reaches a dotfile at the root and inside visible dirs, but
816        // not inside a hidden dir (which `**` cannot traverse without dotglob).
817        let fs = MemoryFs::new();
818        fs.add_file("/.env", b"x").await;
819        fs.add_file("/sub/.env", b"x").await;
820        fs.add_file("/.hidden/.env", b"x").await;
821        fs.add_file("/sub/visible.txt", b"x").await;
822
823        let walker = FileWalker::new(&fs, "/")
824            .with_pattern(GlobPath::new("**/.env").unwrap())
825            .with_options(WalkOptions {
826                respect_gitignore: false,
827                ..Default::default()
828            });
829        let files = walker.collect().await.unwrap();
830
831        assert_eq!(files.iter().filter(|p| p.ends_with(".env")).count(), 2, "{files:?}");
832        assert!(files.iter().any(|p| p == &PathBuf::from("/.env")));
833        assert!(files.iter().any(|p| p == &PathBuf::from("/sub/.env")));
834        assert!(!files.iter().any(|p| p.starts_with("/.hidden")));
835    }
836
837    #[tokio::test]
838    async fn test_globstar_then_explicit_dotdir() {
839        // `**/.github/*.yml` enters the named dot dir at any depth.
840        let fs = MemoryFs::new();
841        fs.add_file("/.github/ci.yml", b"x").await;
842        fs.add_file("/sub/.github/release.yml", b"x").await;
843
844        let walker = FileWalker::new(&fs, "/")
845            .with_pattern(GlobPath::new("**/.github/*.yml").unwrap())
846            .with_options(WalkOptions {
847                respect_gitignore: false,
848                ..Default::default()
849            });
850        let files = walker.collect().await.unwrap();
851
852        assert!(files.iter().any(|p| p.ends_with("ci.yml")), "{files:?}");
853        assert!(files.iter().any(|p| p.ends_with("release.yml")), "{files:?}");
854    }
855
856    #[tokio::test]
857    async fn test_include_hidden_acts_like_dotglob() {
858        // include_hidden == dotglob: `**` then reaches hidden directories.
859        let fs = MemoryFs::new();
860        fs.add_file("/.github/buried.rs", b"x").await;
861
862        let walker = FileWalker::new(&fs, "/")
863            .with_pattern(GlobPath::new("**/*.rs").unwrap())
864            .with_options(WalkOptions {
865                respect_gitignore: false,
866                include_hidden: true,
867                ..Default::default()
868            });
869        let files = walker.collect().await.unwrap();
870
871        assert!(files.iter().any(|p| p.ends_with("buried.rs")));
872    }
873
874    #[tokio::test]
875    async fn test_walk_max_depth() {
876        let fs = make_test_fs().await;
877
878        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
879            max_depth: Some(1),
880            respect_gitignore: false,
881            include_hidden: true,
882            ..Default::default()
883        });
884
885        let files = walker.collect().await.unwrap();
886
887        // Files at depth 1 (directly under /)
888        assert!(files.iter().any(|p| p.ends_with("README.md")));
889        // Files at depth 2 (under /src)
890        assert!(files.iter().any(|p| p.ends_with("main.rs")));
891        // Files at depth 3 (under /src/lib) should NOT be present
892        assert!(!files.iter().any(|p| p.ends_with("utils.rs")));
893    }
894
895    #[tokio::test]
896    async fn test_walk_directories() {
897        let fs = make_test_fs().await;
898
899        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
900            entry_types: EntryTypes::dirs_only(),
901            respect_gitignore: false,
902            ..Default::default()
903        });
904
905        let dirs = walker.collect().await.unwrap();
906
907        assert!(dirs.iter().any(|p| p.ends_with("src")));
908        assert!(dirs.iter().any(|p| p.ends_with("lib")));
909        assert!(!dirs.iter().any(|p| p.ends_with("main.rs")));
910    }
911
912    #[tokio::test]
913    async fn test_walk_with_filter() {
914        let fs = make_test_fs().await;
915
916        let mut filter = IncludeExclude::new();
917        filter.exclude("*_test.rs");
918
919        let walker = FileWalker::new(&fs, "/")
920            .with_pattern(GlobPath::new("**/*.rs").unwrap())
921            .with_options(WalkOptions {
922                filter,
923                respect_gitignore: false,
924                ..Default::default()
925            });
926
927        let files = walker.collect().await.unwrap();
928
929        assert!(files.iter().any(|p| p.ends_with("main.rs")));
930        assert!(!files.iter().any(|p| p.ends_with("main_test.rs")));
931    }
932
933    #[tokio::test]
934    async fn test_walk_nested_gitignore() {
935        let fs = MemoryFs::new();
936
937        fs.add_dir("/src").await;
938        fs.add_dir("/src/subdir").await;
939        fs.add_file("/root.rs", b"root").await;
940        fs.add_file("/src/main.rs", b"main").await;
941        fs.add_file("/src/ignored.log", b"log").await;
942        fs.add_file("/src/subdir/util.rs", b"util").await;
943        fs.add_file("/src/subdir/local_ignore.txt", b"ignored").await;
944
945        fs.add_file("/.gitignore", b"*.log").await;
946        fs.add_file("/src/subdir/.gitignore", b"*.txt").await;
947
948        let walker = FileWalker::new(&fs, "/")
949            .with_options(WalkOptions {
950                respect_gitignore: true,
951                include_hidden: true,
952                ..Default::default()
953            });
954
955        let files = walker.collect().await.unwrap();
956
957        assert!(files.iter().any(|p| p.ends_with("root.rs")));
958        assert!(files.iter().any(|p| p.ends_with("main.rs")));
959        assert!(files.iter().any(|p| p.ends_with("util.rs")));
960
961        assert!(!files.iter().any(|p| p.ends_with("ignored.log")));
962        assert!(!files.iter().any(|p| p.ends_with("local_ignore.txt")));
963    }
964
965    /// FS that reports a stub file size for every file.
966    /// Used for max_filesize tests.
967    struct SizedFs {
968        inner: MemoryFs,
969        sizes: HashMap<PathBuf, u64>,
970    }
971
972    #[async_trait::async_trait]
973    impl WalkerFs for SizedFs {
974        type DirEntry = MemEntry;
975        async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
976            self.inner.list_dir(path).await
977        }
978        async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
979            self.inner.read_file(path).await
980        }
981        async fn is_dir(&self, path: &Path) -> bool { self.inner.is_dir(path).await }
982        async fn exists(&self, path: &Path) -> bool { self.inner.exists(path).await }
983        async fn file_size(&self, path: &Path) -> Option<u64> {
984            self.sizes.get(path).copied()
985        }
986    }
987
988    #[tokio::test]
989    async fn test_walk_max_filesize_skips_large_files() {
990        let inner = MemoryFs::new();
991        inner.add_file("/small.txt", b"tiny").await;
992        inner.add_file("/big.bin", b"larger payload").await;
993        let mut sizes = HashMap::new();
994        sizes.insert(PathBuf::from("/small.txt"), 1_024); // 1 KB
995        sizes.insert(PathBuf::from("/big.bin"), 2 * 1_048_576); // 2 MB
996        let fs = SizedFs { inner, sizes };
997
998        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
999            respect_gitignore: false,
1000            max_filesize: Some(1_048_576), // 1 MB cap
1001            ..Default::default()
1002        });
1003
1004        let files = walker.collect().await.unwrap();
1005
1006        assert!(files.iter().any(|p| p.ends_with("small.txt")));
1007        assert!(!files.iter().any(|p| p.ends_with("big.bin")));
1008    }
1009
1010    #[tokio::test]
1011    async fn test_walk_max_filesize_unknown_size_yields() {
1012        // file_size returning None means "unknown" — must NOT be skipped.
1013        let fs = MemoryFs::new();
1014        fs.add_file("/unknown.txt", b"x").await;
1015
1016        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1017            respect_gitignore: false,
1018            max_filesize: Some(0), // even with zero cap, unknown sizes pass
1019            ..Default::default()
1020        });
1021
1022        let files = walker.collect().await.unwrap();
1023        assert!(files.iter().any(|p| p.ends_with("unknown.txt")));
1024    }
1025
1026    #[tokio::test]
1027    async fn test_walk_min_depth_skips_root_files() {
1028        let fs = MemoryFs::new();
1029        fs.add_file("/at_root.txt", b"r").await;
1030        fs.add_dir("/sub").await;
1031        fs.add_file("/sub/nested.txt", b"n").await;
1032        fs.add_dir("/sub/deeper").await;
1033        fs.add_file("/sub/deeper/deep.txt", b"d").await;
1034
1035        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1036            respect_gitignore: false,
1037            min_depth: Some(1), // skip yields when containing dir is at depth < 1
1038            ..Default::default()
1039        });
1040
1041        let files = walker.collect().await.unwrap();
1042
1043        // /at_root.txt is at depth 0 (containing dir = root, depth 0) — skipped.
1044        assert!(!files.iter().any(|p| p.ends_with("at_root.txt")));
1045        // /sub/nested.txt is at depth 1 — yielded.
1046        assert!(files.iter().any(|p| p.ends_with("nested.txt")));
1047        // /sub/deeper/deep.txt is at depth 2 — yielded.
1048        assert!(files.iter().any(|p| p.ends_with("deep.txt")));
1049    }
1050
1051    #[tokio::test]
1052    async fn test_walk_types_select_only_rust() {
1053        let fs = MemoryFs::new();
1054        fs.add_file("/src/main.rs", b"r").await;
1055        fs.add_file("/src/main.py", b"p").await;
1056        fs.add_file("/src/main.js", b"j").await;
1057        fs.add_file("/README.md", b"m").await;
1058
1059        let mut tb = ignore::types::TypesBuilder::new();
1060        tb.add_defaults();
1061        tb.select("rust");
1062        let types = std::sync::Arc::new(tb.build().expect("types build"));
1063
1064        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1065            respect_gitignore: false,
1066            types: Some(types),
1067            ..Default::default()
1068        });
1069
1070        let files = walker.collect().await.unwrap();
1071
1072        assert!(files.iter().any(|p| p.ends_with("main.rs")));
1073        assert!(!files.iter().any(|p| p.ends_with("main.py")));
1074        assert!(!files.iter().any(|p| p.ends_with("main.js")));
1075        assert!(!files.iter().any(|p| p.ends_with("README.md")));
1076    }
1077
1078    #[tokio::test]
1079    async fn test_walk_types_negate_excludes() {
1080        let fs = MemoryFs::new();
1081        fs.add_file("/src/main.rs", b"r").await;
1082        fs.add_file("/src/main.py", b"p").await;
1083        fs.add_file("/README.md", b"m").await;
1084
1085        let mut tb = ignore::types::TypesBuilder::new();
1086        tb.add_defaults();
1087        tb.negate("rust");
1088        let types = std::sync::Arc::new(tb.build().expect("types build"));
1089
1090        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1091            respect_gitignore: false,
1092            types: Some(types),
1093            ..Default::default()
1094        });
1095
1096        let files = walker.collect().await.unwrap();
1097
1098        // Rust files excluded.
1099        assert!(!files.iter().any(|p| p.ends_with("main.rs")));
1100        // Other files yielded.
1101        assert!(files.iter().any(|p| p.ends_with("main.py")));
1102        assert!(files.iter().any(|p| p.ends_with("README.md")));
1103    }
1104
1105    #[tokio::test]
1106    async fn test_walk_min_depth_still_descends() {
1107        // min_depth must NOT prevent descent — only suppress yields above the threshold.
1108        let fs = MemoryFs::new();
1109        fs.add_dir("/level1").await;
1110        fs.add_dir("/level1/level2").await;
1111        fs.add_file("/level1/level2/found.txt", b"f").await;
1112
1113        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1114            respect_gitignore: false,
1115            min_depth: Some(2),
1116            ..Default::default()
1117        });
1118
1119        let files = walker.collect().await.unwrap();
1120        assert!(files.iter().any(|p| p.ends_with("found.txt")));
1121    }
1122
1123    #[tokio::test]
1124    async fn test_walk_error_callback() {
1125        use std::sync::Mutex;
1126
1127        /// Filesystem that returns errors for specific directories.
1128        struct ErrorFs {
1129            inner: MemoryFs,
1130            error_paths: Vec<PathBuf>,
1131        }
1132
1133        #[async_trait::async_trait]
1134        impl WalkerFs for ErrorFs {
1135            type DirEntry = MemEntry;
1136
1137            async fn list_dir(&self, path: &Path) -> Result<Vec<MemEntry>, WalkerError> {
1138                if self.error_paths.iter().any(|p| p == path) {
1139                    return Err(WalkerError::PermissionDenied(path.display().to_string()));
1140                }
1141                self.inner.list_dir(path).await
1142            }
1143
1144            async fn read_file(&self, path: &Path) -> Result<Vec<u8>, WalkerError> {
1145                self.inner.read_file(path).await
1146            }
1147
1148            async fn is_dir(&self, path: &Path) -> bool {
1149                self.inner.is_dir(path).await
1150            }
1151
1152            async fn exists(&self, path: &Path) -> bool {
1153                self.inner.exists(path).await
1154            }
1155        }
1156
1157        let inner = MemoryFs::new();
1158        inner.add_dir("/readable").await;
1159        inner.add_dir("/forbidden").await;
1160        inner.add_file("/readable/ok.txt", b"ok").await;
1161        inner.add_file("/forbidden/secret.txt", b"secret").await;
1162
1163        let fs = ErrorFs {
1164            inner,
1165            error_paths: vec![PathBuf::from("/forbidden")],
1166        };
1167
1168        let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1169        let errors_cb = errors.clone();
1170
1171        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1172            respect_gitignore: false,
1173            include_hidden: true,
1174            on_error: Some(Arc::new(move |path, err| {
1175                errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1176            })),
1177            ..Default::default()
1178        });
1179
1180        let files = walker.collect().await.unwrap();
1181
1182        assert!(files.iter().any(|p| p.ends_with("ok.txt")));
1183        assert!(!files.iter().any(|p| p.ends_with("secret.txt")));
1184
1185        let errors = errors.lock().unwrap();
1186        assert_eq!(errors.len(), 1);
1187        assert_eq!(errors[0].0, PathBuf::from("/forbidden"));
1188        assert!(errors[0].1.contains("permission denied"));
1189    }
1190
1191    #[tokio::test]
1192    async fn test_walk_deterministic_order() {
1193        let fs = MemoryFs::new();
1194
1195        // Add directories and files in non-alphabetical order
1196        fs.add_dir("/charlie").await;
1197        fs.add_dir("/alpha").await;
1198        fs.add_dir("/bravo").await;
1199        fs.add_file("/charlie/c.txt", b"c").await;
1200        fs.add_file("/alpha/a.txt", b"a").await;
1201        fs.add_file("/bravo/b.txt", b"b").await;
1202
1203        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1204            respect_gitignore: false,
1205            ..Default::default()
1206        });
1207
1208        let files = walker.collect().await.unwrap();
1209
1210        // Results should be in alphabetical traversal order:
1211        // alpha/a.txt, bravo/b.txt, charlie/c.txt
1212        assert_eq!(files.len(), 3);
1213        assert!(files[0].ends_with("alpha/a.txt"));
1214        assert!(files[1].ends_with("bravo/b.txt"));
1215        assert!(files[2].ends_with("charlie/c.txt"));
1216
1217        // Run again to verify determinism
1218        let walker2 = FileWalker::new(&fs, "/").with_options(WalkOptions {
1219            respect_gitignore: false,
1220            ..Default::default()
1221        });
1222        let files2 = walker2.collect().await.unwrap();
1223        assert_eq!(files, files2);
1224    }
1225
1226    #[tokio::test]
1227    async fn test_symlinks_not_followed_by_default() {
1228        let fs = MemoryFs::new();
1229
1230        fs.add_dir("/real").await;
1231        fs.add_file("/real/data.txt", b"data").await;
1232        // /link → /real (symlink directory)
1233        fs.add_dir_symlink("/link", "/real").await;
1234
1235        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1236            respect_gitignore: false,
1237            // follow_symlinks defaults to false
1238            ..Default::default()
1239        });
1240
1241        let files = walker.collect().await.unwrap();
1242
1243        // /real/data.txt should be found
1244        assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1245        // /link should be yielded as a file entry (not recursed)
1246        assert!(files.iter().any(|p| p.ends_with("link")));
1247        // Should NOT find files under /link/ since we don't follow
1248        assert!(!files.iter().any(|p| p.to_string_lossy().contains("link/data")));
1249    }
1250
1251    #[tokio::test]
1252    async fn test_symlinks_followed() {
1253        let fs = MemoryFs::new();
1254
1255        fs.add_dir("/real").await;
1256        fs.add_file("/real/data.txt", b"data").await;
1257        // /link → /real
1258        fs.add_dir_symlink("/link", "/real").await;
1259
1260        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1261            respect_gitignore: false,
1262            follow_symlinks: true,
1263            ..Default::default()
1264        });
1265
1266        let files = walker.collect().await.unwrap();
1267
1268        // Both the real path and symlinked path should have data.txt
1269        assert!(files.iter().any(|p| p.ends_with("real/data.txt")));
1270        assert!(files.iter().any(|p| p.ends_with("link/data.txt")));
1271    }
1272
1273    #[tokio::test]
1274    async fn test_symlink_cycle_detection() {
1275        use std::sync::Mutex;
1276
1277        let fs = MemoryFs::new();
1278
1279        // Create a cycle: /a → /b, /b → /a
1280        fs.add_dir("/a").await;
1281        fs.add_dir("/b").await;
1282        fs.add_file("/a/file_a.txt", b"a").await;
1283        fs.add_file("/b/file_b.txt", b"b").await;
1284        // /a/link_to_b → /b, /b/link_to_a → /a
1285        fs.add_dir_symlink("/a/link_to_b", "/b").await;
1286        fs.add_dir_symlink("/b/link_to_a", "/a").await;
1287
1288        let errors: Arc<Mutex<Vec<(PathBuf, String)>>> = Arc::new(Mutex::new(Vec::new()));
1289        let errors_cb = errors.clone();
1290
1291        let walker = FileWalker::new(&fs, "/").with_options(WalkOptions {
1292            respect_gitignore: false,
1293            follow_symlinks: true,
1294            on_error: Some(Arc::new(move |path, err| {
1295                errors_cb.lock().unwrap().push((path.to_path_buf(), err.to_string()));
1296            })),
1297            ..Default::default()
1298        });
1299
1300        let files = walker.collect().await.unwrap();
1301
1302        // Real files should be found
1303        assert!(files.iter().any(|p| p.ends_with("file_a.txt")));
1304        assert!(files.iter().any(|p| p.ends_with("file_b.txt")));
1305
1306        // Cycle should be detected and reported
1307        let errors = errors.lock().unwrap();
1308        assert!(
1309            errors.iter().any(|(_, msg)| msg.contains("symlink cycle")),
1310            "expected symlink cycle error, got: {errors:?}"
1311        );
1312
1313        // Walk should terminate (not infinite loop) — the fact we got here proves it
1314    }
1315}