dirwalk 1.1.1

Platform-optimized recursive directory walker with metadata
Documentation
//! Recursive walk engine.
//!
//! [`WalkBuilder`] is the main entry point. It supports two execution modes:
//!
//! - **Sequential** — BFS via [`WalkIter`], using a `VecDeque` work queue.
//!   Filters are applied inline during enumeration.
//! - **Parallel** — Rayon-based. Directory scanning is split across a
//!   configurable thread pool (see [`crate::Threads`]).
//!
//! Symlink loop detection uses a `HashSet<(device, inode)>` on Unix and file
//! ID on Windows.

mod iter;
mod parallel;

use crate::entry::Entry;
use crate::error::Error;
use crate::filter::Filter;
use crate::sort::{self, Sort};
use crate::threads::Threads;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::{Duration, Instant};

pub use iter::WalkIter;

/// Hint about the storage medium being walked.
///
/// Allows the walker to tune OS-level I/O behavior for the target storage type.
/// On platforms or storage types where a hint has no effect, it is silently ignored.
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub enum StorageHint {
    /// Local storage (SSD, HDD). Default.
    #[default]
    Local,
    /// High-latency or network storage (SMB, NFS, NAS, slow USB).
    /// Enables larger OS-level fetch buffers where available (e.g. `FIND_FIRST_EX_LARGE_FETCH` on Windows).
    Network,
}

/// Configures and executes a recursive directory walk.
///
/// ```no_run
/// # use dirwalk::WalkBuilder;
/// let result = WalkBuilder::new("/some/path")
///     .max_depth(3)
///     .hidden(true)
///     .build()
///     .unwrap();
/// ```
pub struct WalkBuilder {
    root: PathBuf,
    max_depth: Option<u32>,
    include_hidden: bool,
    follow_links: bool,
    compute_stats: bool,
    filter: Filter,
    sort: Option<Sort>,
    dirs_first: bool,
    threads: Threads,
    pool: Option<Arc<rayon::ThreadPool>>,
    storage_hint: StorageHint,
}

impl WalkBuilder {
    pub fn new(root: impl AsRef<Path>) -> Self {
        Self {
            root: root.as_ref().to_path_buf(),
            max_depth: None,
            include_hidden: false,
            follow_links: false,
            compute_stats: false,
            filter: Filter::new(),
            sort: None,
            dirs_first: false,
            threads: Threads::from(0),
            pool: None,
            storage_hint: StorageHint::Local,
        }
    }

    pub fn max_depth(mut self, depth: u32) -> Self {
        self.max_depth = Some(depth);
        self
    }

    pub fn hidden(mut self, include: bool) -> Self {
        self.include_hidden = include;
        self
    }

    pub fn follow_links(mut self, follow: bool) -> Self {
        self.follow_links = follow;
        self
    }

    pub fn stats(mut self, compute: bool) -> Self {
        self.compute_stats = compute;
        self
    }

    pub fn extensions(mut self, exts: impl IntoIterator<Item = impl AsRef<str>>) -> Self {
        self.filter.set_extensions(
            exts.into_iter()
                .map(|s| s.as_ref().to_lowercase())
                .collect(),
        );
        self
    }

    pub fn glob(mut self, pattern: &str) -> Result<Self, globset::Error> {
        self.filter.set_glob(pattern)?;
        Ok(self)
    }

    pub fn min_size(mut self, size: u64) -> Self {
        self.filter.set_min_size(size);
        self
    }

    pub fn max_size(mut self, size: u64) -> Self {
        self.filter.set_max_size(size);
        self
    }

    pub fn gitignore(mut self, enabled: bool) -> Self {
        self.filter.set_gitignore(enabled);
        self
    }

    pub fn sort(mut self, sort: Sort) -> Self {
        self.sort = Some(sort);
        self
    }

    pub fn dirs_first(mut self, enabled: bool) -> Self {
        self.dirs_first = enabled;
        self
    }

    pub fn threads(mut self, spec: impl Into<Threads>) -> Self {
        self.threads = spec.into();
        self
    }

    /// Hint about the storage medium being walked.
    ///
    /// Defaults to [`StorageHint::Local`]. See [`StorageHint`] for details.
    pub fn storage_hint(mut self, hint: StorageHint) -> Self {
        self.storage_hint = hint;
        self
    }

    /// Provide a pre-built rayon thread pool for parallel walks.
    /// Eliminates per-walk pool creation overhead — useful when calling in a tight loop
    /// (e.g. benchmarks). Has no effect on single-threaded walks.
    /// When a pool is provided, the pool's own thread count is used; `.threads()` is ignored.
    pub fn pool(mut self, pool: Arc<rayon::ThreadPool>) -> Self {
        self.pool = Some(pool);
        self
    }

    pub fn build(self) -> Result<WalkResult, Error> {
        let start = Instant::now();
        let sort = self.sort;
        let dirs_first = self.dirs_first;
        let compute_stats = self.compute_stats;

        let mut iter = self.iter()?;

        let mut entries = Vec::new();
        let mut errors = Vec::new();

        for item in &mut iter {
            match item {
                Ok(entry) => entries.push(entry),
                Err(e) => errors.push(e),
            }
        }

        if let Some(sort_mode) = sort {
            sort::sort_entries(&mut entries, sort_mode, dirs_first);
        } else if dirs_first {
            // --dirs-first without --sort: use name order as a stable baseline
            sort::sort_entries(&mut entries, Sort::Name, dirs_first);
        }

        let stats = if compute_stats {
            Some(Stats {
                file_count: iter.file_count,
                dir_count: iter.dir_count,
                total_size: iter.total_size,
                duration: start.elapsed(),
            })
        } else {
            None
        };

        Ok(WalkResult {
            entries,
            stats,
            errors,
        })
    }

    pub fn iter(self) -> Result<WalkIter, Error> {
        WalkIter::new(iter::WalkConfig {
            root: self.root,
            max_depth: self.max_depth.unwrap_or(u32::MAX),
            include_hidden: self.include_hidden,
            follow_links: self.follow_links,
            filter: self.filter,
            threads: self.threads,
            pool: self.pool,
            storage_hint: self.storage_hint,
        })
    }
}

#[must_use]
pub struct WalkResult {
    pub entries: Vec<Entry>,
    pub stats: Option<Stats>,
    pub errors: Vec<Error>,
}

#[must_use]
pub struct Stats {
    pub file_count: u64,
    pub dir_count: u64,
    pub total_size: u64,
    pub duration: Duration,
}