dirwalk 1.1.1

Platform-optimized recursive directory walker with metadata
Documentation
//! Entry filtering, applied inline during the walk.
//!
//! - **Extensions** — case-insensitive match against a set.
//! - **Glob** — [`globset`] pattern on filename.
//! - **Size** — min/max byte thresholds (files only).
//! - **Hidden** — `.`-prefixed (Unix) or `FILE_ATTRIBUTE_HIDDEN` (Windows).
//! - **Gitignore** — `.gitignore` hierarchy via the [`ignore`] crate. Off by default.

use crate::entry::Entry;
use globset::{Glob, GlobMatcher};
use ignore::gitignore::Gitignore;
use std::collections::HashMap;
use std::path::{Path, PathBuf};

thread_local! {
    static GITIGNORE_CACHE: std::cell::RefCell<HashMap<PathBuf, Option<Gitignore>>> =
        std::cell::RefCell::new(HashMap::new());
    static GIT_ROOT_CACHE: std::cell::RefCell<HashMap<PathBuf, Option<PathBuf>>> =
        std::cell::RefCell::new(HashMap::new());
}

fn find_git_root(dir: &Path) -> Option<PathBuf> {
    GIT_ROOT_CACHE.with(|cache| {
        let mut cache = cache.borrow_mut();
        if let Some(cached) = cache.get(dir) {
            return cached.clone();
        }
        let mut current = Some(dir);
        while let Some(d) = current {
            if d.join(".git").exists() {
                let root = d.to_path_buf();
                cache.insert(dir.to_path_buf(), Some(root.clone()));
                return Some(root);
            }
            current = d.parent();
        }
        cache.insert(dir.to_path_buf(), None);
        None
    })
}

fn is_gitignored(entry: &Entry, abs_dir: &Path) -> bool {
    let git_root = find_git_root(abs_dir);
    let abs_path = abs_dir.join(entry.name());

    GITIGNORE_CACHE.with(|cache| {
        let mut cache = cache.borrow_mut();
        let mut dir = Some(abs_dir);
        while let Some(d) = dir {
            let gi = cache.entry(d.to_path_buf()).or_insert_with(|| {
                let gitignore_path = d.join(".gitignore");
                if gitignore_path.exists() {
                    let (gi, _err) = Gitignore::new(&gitignore_path);
                    Some(gi)
                } else {
                    None
                }
            });
            if let Some(gi) = gi {
                if gi.matched(&abs_path, entry.is_dir).is_ignore() {
                    return true;
                }
            }
            if git_root.as_deref() == Some(d) {
                break;
            }
            dir = d.parent();
        }
        false
    })
}

#[derive(Default)]
pub struct Filter {
    extensions: Option<Vec<String>>,
    glob: Option<GlobMatcher>,
    min_size: Option<u64>,
    max_size: Option<u64>,
    gitignore: bool,
}

impl Filter {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn has_any_filter(&self) -> bool {
        self.extensions.is_some()
            || self.glob.is_some()
            || self.min_size.is_some()
            || self.max_size.is_some()
            || self.gitignore
    }

    pub fn set_extensions(&mut self, exts: Vec<String>) {
        self.extensions = Some(exts);
    }

    pub fn set_glob(&mut self, pattern: &str) -> Result<(), globset::Error> {
        let matcher = Glob::new(pattern)?.compile_matcher();
        self.glob = Some(matcher);
        Ok(())
    }

    pub fn set_min_size(&mut self, size: u64) {
        self.min_size = Some(size);
    }

    pub fn set_max_size(&mut self, size: u64) {
        self.max_size = Some(size);
    }

    pub fn set_gitignore(&mut self, enabled: bool) {
        self.gitignore = enabled;
    }

    /// Check if an entry passes the filter. Directories always pass extension/glob/size
    /// filters since we need to descend into them.
    ///
    /// Gitignore state is cached per thread, so this is safe to call from parallel contexts.
    pub fn matches(&self, entry: &Entry, abs_dir: &Path) -> bool {
        if self.gitignore && is_gitignored(entry, abs_dir) {
            return false;
        }

        if entry.is_dir {
            return true;
        }

        if let Some(ref exts) = self.extensions {
            match entry.extension() {
                Some(ext) => {
                    let ext_lower = ext.to_ascii_lowercase();
                    if !exts.contains(&ext_lower) {
                        return false;
                    }
                }
                None => return false,
            }
        }

        if let Some(ref glob) = self.glob
            && !glob.is_match(entry.name())
        {
            return false;
        }

        if let Some(min) = self.min_size
            && entry.size < min
        {
            return false;
        }

        if let Some(max) = self.max_size
            && entry.size > max
        {
            return false;
        }

        true
    }
}