ignored 0.0.6

A Rust implementation of the .gitignore file format for quickly checking whether a path is ignored by git - without invoking the git cli.
Documentation
use std::{
    collections::{HashMap, hash_map::Entry},
    ffi::OsStr,
    path::{Path, PathBuf},
    sync::{Arc, Mutex},
};

use crate::{
    constant,
    evaluator::{self, File, git_config, git_root, types::Result, utils},
};

/// An evaluator for `.gitignore` files in a given directory and its parent directories.
///
/// The evaluator maintains an internal cache of parsed `.gitignore` files to optimize performance when evaluating
/// multiple paths within the same directory structure.
///
/// The full specification of the `.gitignore` format, along with the behaviour and hierarchy of `.gitignore` files,
/// can be found in the [git documentation](https://git-scm.com/docs/gitignore#_description).
///
/// # Examples
///
/// ```rust
/// use ignored::evaluator::Evaluator;
///
/// # std::fs::create_dir("tests/fixtures/mock-project/.git");
/// let evaluator = Evaluator::default();
/// let ignored = evaluator.is_ignored("tests/fixtures/mock-project/file.tmp");
///
/// assert!(ignored);
/// ```
#[derive(Debug, Default)]
pub struct Evaluator {
    /// A map of previously parsed `.gitignore` files.
    ///
    /// This is an optimisation which allows the evaluator to avoid re-parsing frequently accessed
    /// `.gitignore` files.
    files: Mutex<HashMap<PathBuf, Arc<File>>>,

    config: git_config::ConfigHandler,

    root: git_root::RootHandler,
}

impl Evaluator {
    /// Evaluate whether an arbitrary path is ignored based on the `.gitignore` files in its directory
    /// and parent directories.
    ///
    /// `ignored` follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description) and
    /// returns `true` if the path is ignored, and `false` otherwise.
    ///
    /// # Examples
    ///
    /// ```rust
    /// use ignored::evaluator::Evaluator;
    ///
    /// # std::fs::create_dir("tests/fixtures/mock-project/.git");
    /// let evaluator = Evaluator::default();
    /// let ignored = evaluator.is_ignored("tests/fixtures/mock-project/file.tmp");
    ///
    /// assert!(ignored);
    /// ```
    #[must_use]
    pub fn is_ignored(&self, path: impl AsRef<Path>) -> bool {
        // Patterns read from a `.gitignore` file in the same directory as
        // the path, or in any parent directory (up to the top-level of
        // the working tree)
        let git_root = match self.evaluate_gitignore_files(path.as_ref()) {
            (_, Some(is_ignored)) => {
                log::debug!(
                    "{} is ignored by .gitignore: {is_ignored}",
                    path.as_ref().display()
                );

                return is_ignored;
            }
            (git_root, None) => git_root,
        };

        // Patterns read from `$GIT_DIR/info/exclude`.
        if let Some(ref git_root) = git_root {
            if let Some(is_ignored) = self.evaluate_local_git_exclude_file(git_root, path.as_ref())
            {
                return is_ignored;
            }
        }

        // Patterns read from the file specified by the configuration variable `core.excludesFile`.
        if let Some(is_ignored) =
            self.evaluate_global_git_exclude_file(git_root.as_ref(), path.as_ref())
        {
            return is_ignored;
        }

        false
    }

    /// Evaluate the repositories `.gitignore` files to determine if a given file or path is
    /// ignored.
    ///
    /// This is the first of three methods of ignoring files in git.
    ///
    /// This follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description).
    ///
    /// During traversal it also records the closest relative git root (directory containing a
    /// `.git`), which is beneficial for the second evaluation method - which is an ignore file
    /// listed in the git root (`.git/info/exclude`).
    ///
    /// This method returns true or false, which denotes whether the file is ignored or not, only if the path was
    /// matched in at least one `.gitignore` file. If not, [`Option::None`] will be returned,
    /// denoting that no `.gitignore` file matched the path in either direction.
    fn evaluate_gitignore_files(&self, path: impl AsRef<Path>) -> (Option<PathBuf>, Option<bool>) {
        let mut closest_git_root = self.root.get_closest(&path);

        let path_parts = path.as_ref().iter().collect::<Vec<&OsStr>>();
        let closest_git_root_offset = closest_git_root
            .as_ref()
            .map_or(1, |root| root.components().count());

        let mut is_in_git_root = closest_git_root.is_some();
        let mut is_ignored = None;

        for i in closest_git_root_offset..path_parts.len() {
            let base_path: PathBuf = path_parts[0..i].iter().collect();

            if self.root.record(&base_path)
                && closest_git_root
                    .as_ref()
                    .is_none_or(|closest| closest != &base_path)
            {
                // We've encountered this git root for the first time, we need to update our list of
                // encountered git roots. We also might already be in a git root (i.e. `.git` in a
                // subdirectory of another git root), in which case we need to reset our current
                // ignored decision.
                if is_in_git_root {
                    // We've reached _another_ git root, even though we're already in a git root (i.e.
                    // a repo inside a repo). We should reset our current ignored decision.
                    is_ignored = None;

                    log::debug!(
                        "Encountered recursive git root at: {}",
                        base_path.as_path().display()
                    );
                } else {
                    is_in_git_root = true;

                    log::debug!("Encountered git root at: {}", base_path.as_path().display());
                }

                // Update the closest git root as we've now encountered one we previously didn't
                // know about.
                closest_git_root = Some(base_path.clone());
            } else if !is_in_git_root {
                // We've still not reached a git root (i.e. a `.git` folder). Conforming to git's
                // semantics this means any `.gitignore` files don't apply.
                continue;
            }

            let potential_gitignore = base_path.join(constant::GITIGNORE_FILE);

            let gitignore_file = match self
                .get_or_parse_gitignore(Option::<&PathBuf>::None, potential_gitignore.as_path())
            {
                Ok(Some(gitignore_file)) => gitignore_file,
                Ok(None) => continue,
                Err(e) => {
                    log::error!(
                        "Failed to read .gitignore file at {}: {:?}",
                        potential_gitignore.display(),
                        e
                    );

                    continue;
                }
            };

            // NB: Because `[0..=i]` is inclusive (and the range driving this loop starts at 1) it's
            // effectively the same as `[0..i+1]`, which is why it works to select the parent.
            let parent_path = path_parts[0..=i].iter().collect::<PathBuf>().join("");

            if gitignore_file
                .is_ignored(parent_path.as_path())
                .is_some_and(|ignored| ignored)
            {
                // Git doesn’t list excluded directories for performance reasons, so any patterns one
                // contained files have no effect, no matter where they are defined.
                //
                // In other words, despite keep.me being explicitly not ignored in the example below, the
                // vendor directory is still ignored, which causes keep.me to be ignored as well:
                //
                // ```
                // vendor/
                // !vendor/keep.me
                // ```
                log::debug!(
                    "{} is ignored so {} is ignored by association.",
                    parent_path.as_path().display(),
                    path.as_ref().display()
                );

                return (closest_git_root, Some(true));
            }

            if let Some(result) = gitignore_file.is_ignored(path.as_ref()) {
                // Patterns in the higher level files are overridden by those in
                // lower level files down to the directory containing the file.
                //
                // We _have to_ check patterns in the higher levels _first_ because
                // they might ignore whole directories which will prevent evaluations
                // in the lower levels from having any effect.
                is_ignored = Some(result);
            }
        }

        (closest_git_root, is_ignored)
    }

    /// Evaluate the repositories `.git/info/exclude` located at the root of the working tree.
    ///
    /// This is the second of three methods of ignoring files in git.
    ///
    /// This follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description).
    ///
    /// This method returns true or false, which denotes whether the file is ignored or not, only if the path was
    /// matched in `.git/info/exclude`. If not, [`Option::None`] will be returned, denoting that the path was not listed.
    fn evaluate_local_git_exclude_file(
        &self,
        git_root: &impl AsRef<Path>,
        path: impl AsRef<Path>,
    ) -> Option<bool> {
        let exclude_file = self.root.get_exclude_path(git_root)?;

        let gitignore_file = match self.get_or_parse_gitignore(Some(git_root), &exclude_file) {
            Ok(file) => file,
            Err(e) => {
                log::error!(
                    "Failed to read .gitignore file at {}: {:?}",
                    exclude_file.display(),
                    e
                );

                None
            }
        };

        if let Some(gitignore_file) = gitignore_file {
            if let Some(is_ignored) = gitignore_file.is_ignored(&path) {
                log::debug!(
                    "{} is ignored by {}: {is_ignored}",
                    exclude_file.as_path().display(),
                    path.as_ref().display()
                );

                return Some(is_ignored);
            }
        }

        None
    }

    /// Evaluate the users global `.gitignore` file (located by default at `$XDG_CONFIG_HOME/git/ignore`, or
    /// if `$XDG_CONFIG_HOME` is either not set or empty, `$HOME/.config/git/ignore.`, and customised using
    /// `core.excludesfile` in global git configuration).
    ///
    /// This is the third of three methods of ignoring files in git.
    ///
    /// This follows the precedence rules defined in the [git documentation](https://git-scm.com/docs/gitignore#_description),
    /// and the git config rules defined in the [git documentation](https://git-scm.com/docs/git-config#FILES).
    ///
    /// This method returns true or false, which denotes whether the file is ignored or not, only if the path was
    /// matched in the global git ignore file. If not, [`Option::None`] will be returned, denoting that the path was not listed.
    fn evaluate_global_git_exclude_file(
        &self,
        git_root: Option<&impl AsRef<Path>>,
        path: impl AsRef<Path>,
    ) -> Option<bool> {
        let Ok(exclude_file) = self.config.get_global_git_exclude_file_path() else {
            return None;
        };

        let gitignore_file = match self.get_or_parse_gitignore(git_root, exclude_file.as_ref()?) {
            Ok(file) => file,
            Err(e) => {
                log::error!("Failed to read global .gitignore file at {exclude_file:?}: {e:?}");

                None
            }
        };

        if let Some(gitignore_file) = gitignore_file {
            if let Some(is_ignored) = gitignore_file.is_ignored(&path) {
                log::debug!(
                    "{} is ignored by {:?}: {is_ignored}",
                    path.as_ref().display(),
                    exclude_file
                );

                return Some(is_ignored);
            }
        }

        None
    }

    /// Parse a `.gitignore` file at the given path, or return a cached version if it has already been parsed
    /// and hasn't changed since.
    ///
    /// Optionally, provide a base path to override the path to which all glob patterns defined inside the file
    /// should be relative to.
    ///
    /// When no base path is provided, the base path is assumed to be relative to the file being read. This is fine for
    /// regular `.gitignore` files, however, when dealing with both global exclude files, and git root exclude files, the
    /// base path provided will be the closest git root, not the file itself.
    fn get_or_parse_gitignore(
        &self,
        base_path: Option<&impl AsRef<Path>>,
        potential_gitignore: impl AsRef<Path>,
    ) -> Result<Option<Arc<File>>> {
        if !potential_gitignore.as_ref().exists() {
            return Ok(None);
        }

        let mut guard = self.files.lock().map_err(|_| {
            evaluator::Error::CachePoisoned(potential_gitignore.as_ref().to_path_buf())
        })?;

        let gitignore_file = match guard.entry(potential_gitignore.as_ref().to_path_buf()) {
            Entry::Occupied(mut e) => {
                let (checksum, file) = {
                    let existing_file = e.get_mut();

                    let (target_checksum, file_handle) = crate::utils::compute_checksum(
                        potential_gitignore.as_ref(),
                    )
                    .map_err(|e| evaluator::Error::FileError {
                        file: potential_gitignore.as_ref().to_path_buf(),
                        source: e,
                    })?;

                    if existing_file.checksum == target_checksum {
                        return Ok(Some(Arc::clone(existing_file)));
                    }

                    (target_checksum, file_handle)
                };

                // We've parsed this file before but the content has changed. We need to re-parse
                // it from scratch
                Arc::clone(&e.insert(Arc::new(utils::read_gitignore(
                    base_path.as_ref(),
                    potential_gitignore.as_ref(),
                    file,
                    &checksum,
                )?)))
            }
            Entry::Vacant(e) => {
                let (target_checksum, file_handle) =
                    crate::utils::compute_checksum(potential_gitignore.as_ref()).map_err(|e| {
                        evaluator::Error::FileError {
                            file: potential_gitignore.as_ref().to_path_buf(),
                            source: e,
                        }
                    })?;

                let gitignore_file = Arc::new(utils::read_gitignore(
                    base_path.as_ref(),
                    potential_gitignore.as_ref(),
                    file_handle,
                    &target_checksum,
                )?);

                // We've never encountered this file before, we need to parse it
                Arc::clone(e.insert(gitignore_file))
            }
        };

        drop(guard);

        Ok(Some(gitignore_file))
    }
}