gix 0.50.0

Interact with git repositories just like git would
Documentation
//! lower-level access to filters which are applied to create working tree checkouts or to 'clean' working tree contents for storage in git.
use std::borrow::Cow;

pub use gix_filter as plumbing;
use gix_odb::{Find, FindExt};

use crate::{
    bstr::BStr,
    config::{
        cache::util::{ApplyLeniency, ApplyLeniencyDefaultValue},
        tree::Core,
    },
    Repository,
};

///
pub mod pipeline {
    ///
    pub mod options {
        use crate::{bstr::BString, config};

        /// The error returned by [Pipeline::options()][crate::filter::Pipeline::options()].
        #[derive(Debug, thiserror::Error)]
        #[allow(missing_docs)]
        pub enum Error {
            #[error(transparent)]
            CheckRoundTripEncodings(#[from] config::encoding::Error),
            #[error(transparent)]
            SafeCrlf(#[from] config::key::GenericErrorWithValue),
            #[error("Could not interpret 'filter.{name}.required' configuration")]
            Driver {
                name: BString,
                source: gix_config::value::Error,
            },
        }
    }

    ///
    pub mod convert_to_git {
        /// The error returned by [Pipeline::convert_to_git()][crate::filter::Pipeline::convert_to_git()].
        #[derive(Debug, thiserror::Error)]
        #[allow(missing_docs)]
        pub enum Error {
            #[error("Failed to prime attributes to the path at which the data resides")]
            WorktreeCacheAtPath(#[from] std::io::Error),
            #[error(transparent)]
            Convert(#[from] gix_filter::pipeline::convert::to_git::Error),
        }
    }

    ///
    pub mod convert_to_worktree {
        /// The error returned by [Pipeline::convert_to_worktree()][crate::filter::Pipeline::convert_to_worktree()].
        #[derive(Debug, thiserror::Error)]
        #[allow(missing_docs)]
        pub enum Error {
            #[error("Failed to prime attributes to the path at which the data resides")]
            WorktreeCacheAtPath(#[from] std::io::Error),
            #[error(transparent)]
            Convert(#[from] gix_filter::pipeline::convert::to_worktree::Error),
        }
    }
}

/// A git pipeline for transforming data *to-git* and *to-worktree*, based
/// [on git configuration and attributes](https://git-scm.com/docs/gitattributes).
#[derive(Clone)]
pub struct Pipeline<'repo> {
    inner: gix_filter::Pipeline,
    cache: gix_worktree::Cache,
    repo: &'repo Repository,
}

/// Lifecycle
impl<'repo> Pipeline<'repo> {
    /// Extract options from `repo` that are needed to properly drive a standard git filter pipeline.
    pub fn options(repo: &'repo Repository) -> Result<gix_filter::pipeline::Options, pipeline::options::Error> {
        let config = &repo.config.resolved;
        let encodings =
            Core::CHECK_ROUND_TRIP_ENCODING.try_into_encodings(config.string_by_key("core.checkRoundtripEncoding"))?;
        let safe_crlf = config
            .string_by_key("core.safecrlf")
            .map(|value| Core::SAFE_CRLF.try_into_safecrlf(value))
            .transpose()
            .map(Option::unwrap_or_default)
            .with_lenient_default_value(
                repo.config.lenient_config,
                // in lenient mode, we prefer the safe option, instead of just (trying) to output warnings.
                gix_filter::pipeline::CrlfRoundTripCheck::Fail,
            )?;
        let auto_crlf = config
            .string_by_key("core.autocrlf")
            .map(|value| Core::AUTO_CRLF.try_into_autocrlf(value))
            .transpose()
            .with_leniency(repo.config.lenient_config)?
            .unwrap_or_default();
        let eol = config
            .string_by_key("core.eol")
            .map(|value| Core::EOL.try_into_eol(value))
            .transpose()?;
        let drivers = extract_drivers(repo)?;
        Ok(gix_filter::pipeline::Options {
            drivers,
            eol_config: gix_filter::eol::Configuration { auto_crlf, eol },
            encodings_with_roundtrip_check: encodings,
            crlf_roundtrip_check: safe_crlf,
            object_hash: repo.object_hash(),
        })
    }

    /// Create a new instance by extracting all necessary information and configuration from a `repo` along with `cache` for accessing
    /// attributes. The `index` is used for some filters which may access it under very specific circumstances.
    pub fn new(repo: &'repo Repository, cache: gix_worktree::Cache) -> Result<Self, pipeline::options::Error> {
        let pipeline = gix_filter::Pipeline::new(cache.attributes_collection(), Self::options(repo)?);
        Ok(Pipeline {
            inner: pipeline,
            cache,
            repo,
        })
    }

    /// Detach the repository and obtain the individual functional parts.
    pub fn into_parts(self) -> (gix_filter::Pipeline, gix_worktree::Cache) {
        (self.inner, self.cache)
    }
}

/// Conversions
impl<'repo> Pipeline<'repo> {
    /// Convert a `src` stream (to be found at `rela_path`, a repo-relative path) to a representation suitable for storage in `git`
    /// by using all attributes at `rela_path` and configuration of the repository to know exactly which filters apply.
    /// `index` is used in particularly rare cases where the CRLF filter in auto-mode tries to determine whether or not to apply itself,
    /// and it should match the state used when [instantiating this instance][Self::new()].
    /// Note that the return-type implements [`std::io::Read`].
    pub fn convert_to_git<R>(
        &mut self,
        src: R,
        rela_path: &std::path::Path,
        index: &gix_index::State,
    ) -> Result<gix_filter::pipeline::convert::ToGitOutcome<'_, R>, pipeline::convert_to_git::Error>
    where
        R: std::io::Read,
    {
        let entry = self
            .cache
            .at_path(rela_path, Some(false), |id, buf| self.repo.objects.find_blob(id, buf))?;
        Ok(self.inner.convert_to_git(
            src,
            rela_path,
            |_, attrs| {
                entry.matching_attributes(attrs);
            },
            |rela_path, buf| -> Result<_, crate::object::find::Error> {
                let entry = match index.entry_by_path(rela_path) {
                    None => return Ok(None),
                    Some(entry) => entry,
                };
                let obj = self.repo.objects.try_find(entry.id, buf)?;
                Ok(obj.filter(|obj| obj.kind == gix_object::Kind::Blob).map(|_| ()))
            },
        )?)
    }

    /// Convert a `src` buffer located at `rela_path` (in the index) from what's in `git` to the worktree representation.
    /// This method will obtain all attributes and configuration necessary to know exactly which filters to apply.
    /// Note that the return-type implements [`std::io::Read`].
    ///
    /// Use `can_delay` to tell driver processes that they may delay the return of data. Doing this will require the caller to specifically
    /// handle delayed files by keeping state and using [`Self::into_parts()`] to get access to the driver state to follow the delayed-files
    /// protocol. For simplicity, most will want to disallow delayed processing.
    pub fn convert_to_worktree<'input>(
        &mut self,
        src: &'input [u8],
        rela_path: &BStr,
        can_delay: gix_filter::driver::apply::Delay,
    ) -> Result<gix_filter::pipeline::convert::ToWorktreeOutcome<'input, '_>, pipeline::convert_to_worktree::Error>
    {
        let entry = self
            .cache
            .at_entry(rela_path, Some(false), |id, buf| self.repo.objects.find_blob(id, buf))?;
        Ok(self.inner.convert_to_worktree(
            src,
            rela_path,
            |_, attrs| {
                entry.matching_attributes(attrs);
            },
            can_delay,
        )?)
    }

    /// Retrieve the static context that is made available to the process filters.
    ///
    /// The context set here is relevant for the [`convert_to_git()`][Self::convert_to_git()] and
    /// [`convert_to_worktree()`][Self::convert_to_worktree()] methods.
    pub fn driver_context_mut(&mut self) -> &mut gix_filter::pipeline::Context {
        self.inner.driver_context_mut()
    }
}

/// Obtain a list of all configured driver, but ignore those in sections that we don't trust enough.
fn extract_drivers(repo: &Repository) -> Result<Vec<gix_filter::Driver>, pipeline::options::Error> {
    Ok(match repo.config.resolved.sections_by_name("filter") {
        None => Vec::new(),
        Some(sections) => sections
            .filter(|s| repo.filter_config_section()(s.meta()))
            .filter_map(|s| {
                s.header().subsection_name().map(|name| {
                    Ok(gix_filter::Driver {
                        name: name.to_owned(),
                        clean: s.value("clean").map(Cow::into_owned),
                        smudge: s.value("smudge").map(Cow::into_owned),
                        process: s.value("process").map(Cow::into_owned),
                        required: s
                            .value("required")
                            .map(|value| gix_config::Boolean::try_from(value.as_ref()))
                            .transpose()
                            .map_err(|err| pipeline::options::Error::Driver {
                                name: name.to_owned(),
                                source: err,
                            })?
                            .unwrap_or_default()
                            .into(),
                    })
                })
            })
            .collect::<Result<Vec<_>, pipeline::options::Error>>()?,
    })
}