dirwalk 1.1.1

Platform-optimized recursive directory walker with metadata
Documentation
use crate::entry::Entry;
use crate::error::Error;
use crate::filter::Filter;
use crate::scan::scan_dir_prefixed;
use crate::threads::Threads;
use crate::walk::StorageHint;
use std::collections::{HashSet, VecDeque};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;

pub(crate) struct WalkConfig {
    pub root: PathBuf,
    pub max_depth: u32,
    pub include_hidden: bool,
    pub follow_links: bool,
    pub filter: Filter,
    pub threads: Threads,
    pub pool: Option<Arc<rayon::ThreadPool>>,
    pub storage_hint: StorageHint,
}

pub struct WalkIter {
    inner: WalkIterInner,
    start: Instant,
    pub(crate) file_count: u64,
    pub(crate) dir_count: u64,
    pub(crate) total_size: u64,
}

enum WalkIterInner {
    Sequential {
        queue: VecDeque<(PathBuf, u32)>,
        buffer: std::vec::IntoIter<Entry>,
        buffer_abs: PathBuf,
        buffer_depth: u32,
        visited: HashSet<(u64, u64)>,
        config: Box<WalkConfig>,
        has_filter: bool,
    },
    Parallel {
        entries: std::vec::IntoIter<Entry>,
        errors: std::vec::IntoIter<Error>,
    },
}

impl WalkIter {
    pub(crate) fn new(config: WalkConfig) -> Result<Self, Error> {
        let resolved = config.threads.resolve();
        if resolved > 1 {
            return Self::new_parallel(config, resolved);
        }

        let root = &config.root;

        let mut visited = HashSet::new();
        if config.follow_links {
            if let Some(id) = file_id(root) {
                visited.insert(id);
            }
        }

        let buffer = scan_dir_prefixed(root, "", config.storage_hint)?.into_iter();
        let has_filter = config.filter.has_any_filter();
        let buffer_abs = root.clone();

        Ok(Self {
            inner: WalkIterInner::Sequential {
                queue: VecDeque::new(),
                buffer,
                buffer_abs,
                buffer_depth: 0,
                visited,
                config: Box::new(config),
                has_filter,
            },
            start: Instant::now(),
            file_count: 0,
            dir_count: 0,
            total_size: 0,
        })
    }

    fn new_parallel(config: WalkConfig, threads: usize) -> Result<Self, Error> {
        use crate::walk::parallel::{ParallelConfig, parallel_walk};

        let has_filter = config.filter.has_any_filter();
        let par_config = ParallelConfig {
            max_depth: config.max_depth,
            include_hidden: config.include_hidden,
            follow_links: config.follow_links,
            filter: config.filter,
            has_filter,
            storage_hint: config.storage_hint,
        };

        let start = Instant::now();
        let (entries, errors) = parallel_walk(&config.root, &par_config, threads, config.pool)?;

        Ok(Self {
            inner: WalkIterInner::Parallel {
                entries: entries.into_iter(),
                errors: errors.into_iter(),
            },
            start,
            file_count: 0,
            dir_count: 0,
            total_size: 0,
        })
    }

    pub fn stats(&self) -> super::Stats {
        super::Stats {
            file_count: self.file_count,
            dir_count: self.dir_count,
            total_size: self.total_size,
            duration: self.start.elapsed(),
        }
    }
}

impl Iterator for WalkIter {
    type Item = Result<Entry, Error>;

    fn next(&mut self) -> Option<Self::Item> {
        match &mut self.inner {
            WalkIterInner::Parallel { entries, errors } => {
                if let Some(e) = errors.next() {
                    return Some(Err(e));
                }
                let entry = entries.next()?;
                if entry.is_dir {
                    self.dir_count += 1;
                } else {
                    self.file_count += 1;
                    self.total_size += entry.size;
                }
                Some(Ok(entry))
            }
            WalkIterInner::Sequential {
                queue,
                buffer,
                buffer_abs,
                buffer_depth,
                visited,
                config,
                has_filter,
            } => loop {
                if let Some(mut entry) = buffer.next() {
                    if !config.include_hidden && entry.is_hidden {
                        continue;
                    }

                    let child_depth = *buffer_depth + 1;
                    entry.depth = child_depth;

                    let should_descend = entry.is_dir || (entry.is_symlink && config.follow_links);

                    if should_descend && child_depth < config.max_depth {
                        let child_abs = buffer_abs.join(entry.name());

                        let already_visited = config.follow_links
                            && file_id(&child_abs)
                                .map(|id| !visited.insert(id))
                                .unwrap_or(false);

                        if !already_visited {
                            queue.push_back((child_abs, child_depth));
                        }
                    }

                    if *has_filter && !config.filter.matches(&entry, buffer_abs) {
                        continue;
                    }

                    if entry.is_dir {
                        self.dir_count += 1;
                    } else {
                        self.file_count += 1;
                        self.total_size += entry.size;
                    }

                    return Some(Ok(entry));
                }

                let (abs_path, depth) = queue.pop_front()?;
                let rel = abs_path
                    .strip_prefix(&config.root)
                    .expect("abs_path is always under config.root")
                    .to_string_lossy();

                match scan_dir_prefixed(&abs_path, &rel, config.storage_hint) {
                    Ok(entries) => {
                        *buffer = entries.into_iter();
                        *buffer_abs = abs_path;
                        *buffer_depth = depth;
                    }
                    Err(e) => return Some(Err(e)),
                }
            },
        }
    }
}

#[cfg(target_os = "windows")]
fn file_id(path: &Path) -> Option<(u64, u64)> {
    use std::os::windows::ffi::OsStrExt;
    use windows::Win32::Foundation::CloseHandle;
    use windows::Win32::Storage::FileSystem::{
        BY_HANDLE_FILE_INFORMATION, CreateFileW, FILE_FLAG_BACKUP_SEMANTICS, FILE_SHARE_DELETE,
        FILE_SHARE_READ, FILE_SHARE_WRITE, GetFileInformationByHandle, OPEN_EXISTING,
    };
    use windows::core::PCWSTR;

    let wide: Vec<u16> = path
        .as_os_str()
        .encode_wide()
        .chain(std::iter::once(0))
        .collect();

    unsafe {
        let handle = CreateFileW(
            PCWSTR(wide.as_ptr()),
            0,
            FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
            None,
            OPEN_EXISTING,
            FILE_FLAG_BACKUP_SEMANTICS,
            None,
        )
        .ok()?;

        let mut info: BY_HANDLE_FILE_INFORMATION = std::mem::zeroed();
        let result = GetFileInformationByHandle(handle, &mut info);
        let _ = CloseHandle(handle);
        result.ok()?;

        let volume = info.dwVolumeSerialNumber as u64;
        let index = ((info.nFileIndexHigh as u64) << 32) | (info.nFileIndexLow as u64);
        Some((volume, index))
    }
}

#[cfg(not(target_os = "windows"))]
fn file_id(path: &Path) -> Option<(u64, u64)> {
    use std::os::unix::fs::MetadataExt;
    let meta = std::fs::metadata(path).ok()?;
    Some((meta.dev(), meta.ino()))
}

#[cfg(test)]
mod tests {
    use crate::walk::WalkBuilder;

    #[test]
    fn parallel_walk_duration_is_nonzero() {
        // parallel_walk blocks until complete; start is captured before it runs,
        // so stats().duration must reflect actual wall time > 0.
        let iter = WalkBuilder::new(".")
            .threads(2usize)
            .iter()
            .expect("parallel walk should succeed");
        assert!(
            iter.stats().duration.as_nanos() > 0,
            "parallel walk duration was zero — start is recorded after walk completed"
        );
    }
}