git-checks 4.0.1

Checks to run against a topic in git to enforce coding standards.
Documentation
// Copyright Kitware, Inc.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::collections::hash_set::HashSet;
use std::fmt::{self, Debug};
use std::process::Command;
use std::sync::Mutex;
use std::time::Duration;

use crates::git_checks_core::impl_prelude::*;
use crates::ttl_cache::TtlCache;

/// Configuration value for `ValidName` policy for use of full names in identities.
#[derive(Debug, Clone, Copy)]
pub enum ValidNameFullNamePolicy {
    /// A full name is required, error when missing.
    Required,
    /// A full name is preferred, warning when missing.
    Preferred,
    /// A full name is optional, no diagnostic when missing.
    Optional,
}

impl Default for ValidNameFullNamePolicy {
    fn default() -> Self {
        ValidNameFullNamePolicy::Required
    }
}

impl ValidNameFullNamePolicy {
    /// Apply the policy to a check result.
    fn apply<F>(self, result: &mut CheckResult, msg: F)
    where
        F: Fn(&str) -> String,
    {
        match self {
            ValidNameFullNamePolicy::Required => {
                result.add_error(msg("required"));
            },
            ValidNameFullNamePolicy::Preferred => {
                result.add_warning(msg("preferred"));
            },
            ValidNameFullNamePolicy::Optional => {},
        }
    }
}

const LOCK_POISONED: &str = "DNS cache lock poisoned";
const DEFAULT_TTL_CACHE_SIZE: usize = 100;
// 24 hours
const DEFAULT_TTL_CACHE_HIT_DURATION: Duration = Duration::from_secs(24 * 60 * 60);
// 5 minutes
const DEFAULT_TTL_CACHE_MISS_DURATION: Duration = Duration::from_secs(5 * 60);

/// A check which checks for valid identities.
///
/// This check uses the `host` external binary to check the validity of domain names used in email
/// addresses.
///
/// The check can be configured with a policy on how to enforce use of full names.
#[derive(Builder)]
#[builder(field(private))]
pub struct ValidName {
    /// The policy for names in commits.
    ///
    /// Configuration: Optional
    /// Default: `ValidNameFullNamePolicy::Required`
    #[builder(default)]
    full_name_policy: ValidNameFullNamePolicy,
    /// A cache of DNS query results.
    #[builder(setter(skip))]
    #[builder(default = "empty_dns_cache()")]
    dns_cache: Mutex<TtlCache<String, bool>>,
    /// Whitelisted domains.
    #[builder(private)]
    #[builder(setter(name = "_whitelisted_domains"))]
    #[builder(default = "HashSet::new()")]
    whitelisted_domains: HashSet<String>,
}

impl ValidNameBuilder {
    /// Add domains to the domain whitelist.
    pub fn whitelisted_domains<I, D>(&mut self, domains: I) -> &mut Self
    where
        I: IntoIterator<Item = D>,
        D: Into<String>,
    {
        self.whitelisted_domains = Some(domains.into_iter().map(Into::into).collect());
        self
    }
}

fn empty_dns_cache() -> Mutex<TtlCache<String, bool>> {
    Mutex::new(TtlCache::new(DEFAULT_TTL_CACHE_SIZE))
}

impl Debug for ValidName {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        f.debug_struct("ValidName")
            .field("full_name_policy", &self.full_name_policy)
            .field("whitelisted_domains", &self.whitelisted_domains)
            .finish()
    }
}

impl Default for ValidName {
    fn default() -> Self {
        Self {
            full_name_policy: ValidNameFullNamePolicy::default(),
            dns_cache: empty_dns_cache(),
            whitelisted_domains: HashSet::new(),
        }
    }
}

impl Clone for ValidName {
    fn clone(&self) -> Self {
        Self {
            full_name_policy: self.full_name_policy,
            dns_cache: empty_dns_cache(),
            whitelisted_domains: self.whitelisted_domains.clone(),
        }
    }
}

impl ValidName {
    /// Create a new builder.
    pub fn builder() -> ValidNameBuilder {
        ValidNameBuilder::default()
    }

    /// Check that a name is valid.
    fn check_name(name: &str) -> bool {
        name.find(' ').is_some()
    }

    fn check_host(domain: &str) -> Option<bool> {
        let dig = Command::new("host")
            .args(&["-t", "MX"])
            .arg(format!("{}.", domain)) // Search for the absolute domain.
            .output();
        let dig_output = match dig {
            Ok(dig_output) => dig_output,
            Err(err) => {
                error!(
                    target: "git-checks/valid_name",
                    "failed to construct host command: {:?}",
                    err,
                );

                return None;
            },
        };

        if dig_output.status.success() {
            Some(true)
        } else {
            // The `host` tool always outputs to stdout
            let output = String::from_utf8_lossy(&dig_output.stdout);

            warn!(
                target: "git-checks/valid_name",
                "failed to look up MX record for domain {}: {}",
                domain,
                output,
            );

            if output.contains("connection timed out") {
                None
            } else {
                Some(false)
            }
        }
    }

    /// Check that an email address is valid.
    fn check_email(&self, email: &str) -> bool {
        let domain_part = email.splitn(2, '@').nth(1);

        if let Some(domain) = domain_part {
            if self.whitelisted_domains.contains(domain) {
                return true;
            }

            let mut cache = self.dns_cache.lock().expect(LOCK_POISONED);
            if let Some(cached_res) = cache.get_mut(domain) {
                return *cached_res;
            }

            Self::check_host(domain).map_or(false, |res| {
                let duration = if res {
                    DEFAULT_TTL_CACHE_HIT_DURATION
                } else {
                    DEFAULT_TTL_CACHE_MISS_DURATION
                };

                cache.insert(domain.into(), res, duration);
                res
            })
        } else {
            false
        }
    }

    /// Check an identity for its validity.
    fn check_identity(&self, what: &str, who: &str, identity: &Identity) -> CheckResult {
        let mut result = CheckResult::new();

        if !Self::check_name(&identity.name) {
            self.full_name_policy.apply(&mut result, |policy| {
                format!(
                    "The {} name (`{}`) for {} has no space in it. A full name is {} for \
                     contribution. Please set the `user.name` Git configuration value.",
                    who, identity.name, what, policy,
                )
            });
        }

        if !self.check_email(&identity.email) {
            result.add_error(format!(
                "The {} email (`{}`) for {} has an unknown domain. Please set the `user.email` \
                 Git configuration value.",
                who, identity.email, what,
            ));
        }

        result
    }
}

impl Check for ValidName {
    fn name(&self) -> &str {
        "valid-name"
    }

    fn check(&self, _: &CheckGitContext, commit: &Commit) -> Result<CheckResult, Box<dyn Error>> {
        let what = format!("commit {}", commit.sha1);

        Ok(if commit.author == commit.committer {
            self.check_identity(&what, "given", &commit.author)
        } else {
            let author_res = self.check_identity(&what, "author", &commit.author);
            let commiter_res = self.check_identity(&what, "committer", &commit.committer);

            author_res.combine(commiter_res)
        })
    }
}

impl BranchCheck for ValidName {
    fn name(&self) -> &str {
        "valid-name"
    }

    fn check(&self, ctx: &CheckGitContext, _: &CommitId) -> Result<CheckResult, Box<dyn Error>> {
        Ok(self.check_identity("the topic", "owner", ctx.topic_owner()))
    }
}

#[cfg(feature = "config")]
pub(crate) mod config {
    use crates::git_checks_config::{CommitCheckConfig, IntoCheck};
    use crates::inventory;
    #[cfg(test)]
    use crates::serde::Deserialize;
    #[cfg(test)]
    use crates::serde_json;

    use ValidName;
    use ValidNameFullNamePolicy;

    /// Configuration for full name policies.
    #[derive(Deserialize, Debug, Clone, Copy, PartialEq, Eq)]
    pub enum ValidNameFullNamePolicyIO {
        /// Full names are required and trigger errors if not found.
        #[serde(rename = "required")]
        Required,
        /// Full names are preferred and trigger warnings if not found.
        #[serde(rename = "preferred")]
        Preferred,
        /// Full names are optional and are not checked.
        #[serde(rename = "optional")]
        Optional,
    }

    impl From<ValidNameFullNamePolicyIO> for ValidNameFullNamePolicy {
        fn from(policy: ValidNameFullNamePolicyIO) -> Self {
            match policy {
                ValidNameFullNamePolicyIO::Required => ValidNameFullNamePolicy::Required,
                ValidNameFullNamePolicyIO::Preferred => ValidNameFullNamePolicy::Preferred,
                ValidNameFullNamePolicyIO::Optional => ValidNameFullNamePolicy::Optional,
            }
        }
    }

    /// Configuration for the `ValidName` check.
    ///
    /// The `full_name_policy` key is a string which must be one of `"optional"`, `"preferred"`, or
    /// `"required"` (the default). The `whitelisted_domains` is a list of strings which defaults
    /// to empty for domains which are assumed to be valid in email addresses. This should contain
    /// addresses which are common to the project being watched to avoid false positives when DNS
    /// lookup failures occur.
    ///
    /// This check is registered as a commit check with the name `"valid_name"`.
    ///
    /// # Example
    ///
    /// ```json
    /// {
    ///     "full_name_policy": "required",
    ///     "whitelisted_domains": [
    ///         "mycompany.invalid"
    ///     ]
    /// }
    /// ```
    #[derive(Deserialize, Debug)]
    pub struct ValidNameConfig {
        #[serde(default)]
        full_name_policy: Option<ValidNameFullNamePolicyIO>,
        #[serde(default)]
        whitelisted_domains: Option<Vec<String>>,
    }

    impl IntoCheck for ValidNameConfig {
        type Check = ValidName;

        fn into_check(self) -> Self::Check {
            let mut builder = ValidName::builder();

            if let Some(full_name_policy) = self.full_name_policy {
                builder.full_name_policy(full_name_policy.into());
            }

            if let Some(whitelisted_domains) = self.whitelisted_domains {
                builder.whitelisted_domains(whitelisted_domains);
            }

            builder
                .build()
                .expect("configuration mismatch for `ValidName`")
        }
    }

    register_checks! {
        ValidNameConfig {
            "valid_name" => CommitCheckConfig,
        },
    }

    #[test]
    fn test_valid_name_full_name_policy_deserialize() {
        let value = json!("required");
        let policy = ValidNameFullNamePolicyIO::deserialize(value).unwrap();
        assert_eq!(policy, ValidNameFullNamePolicyIO::Required);

        let value = json!("optional");
        let policy = ValidNameFullNamePolicyIO::deserialize(value).unwrap();
        assert_eq!(policy, ValidNameFullNamePolicyIO::Optional);

        let value = json!("preferred");
        let policy = ValidNameFullNamePolicyIO::deserialize(value).unwrap();
        assert_eq!(policy, ValidNameFullNamePolicyIO::Preferred);

        let value = json!("invalid");
        let err = ValidNameFullNamePolicyIO::deserialize(value).unwrap_err();

        assert!(!err.is_io());
        assert!(!err.is_syntax());
        assert!(err.is_data());
        assert!(!err.is_eof());

        let msg = format!("{}", err);
        if msg != "unknown variant `invalid`, expected one of `required`, `preferred`, `optional`" {
            println!(
                "Error message doesn't match. Was a new style added? ({})",
                msg,
            );
        }
    }

    #[test]
    fn test_valid_name_config_empty() {
        let json = json!({});
        let check: ValidNameConfig = serde_json::from_value(json).unwrap();

        assert_eq!(check.full_name_policy, None);
        assert_eq!(check.whitelisted_domains, None);
    }

    #[test]
    fn test_valid_name_config_all_fields() {
        let exp_domain: String = "mycompany.invalid".into();
        let json = json!({
            "full_name_policy": "optional",
            "whitelisted_domains": [exp_domain.clone()],
        });
        let check: ValidNameConfig = serde_json::from_value(json).unwrap();

        assert_eq!(
            check.full_name_policy,
            Some(ValidNameFullNamePolicyIO::Optional),
        );
        itertools::assert_equal(&check.whitelisted_domains, &Some([exp_domain]));
    }
}

#[cfg(test)]
mod tests {
    use test::*;
    use ValidName;
    use ValidNameFullNamePolicy;

    const BAD_TOPIC: &str = "91d9fceb226bfc0faeb8a4e54b4f0b5a1ffd39e8";
    const BAD_AUTHOR_NAME: &str = "edac4e5b3a00eac60280a78ee84b5ef8d4cce97a";

    #[test]
    fn test_valid_name_builder_default() {
        assert!(ValidName::builder().build().is_ok());
    }

    #[test]
    fn test_valid_name_required() {
        let check = ValidName::default();
        let result = run_check("test_valid_name_required", BAD_TOPIC, check);
        test_result_errors(result, &[
            "The author name (`Mononym`) for commit edac4e5b3a00eac60280a78ee84b5ef8d4cce97a has \
             no space in it. A full name is required for contribution. Please set the `user.name` \
             Git configuration value.",
            "The author email (`bademail`) for commit 9de4928f5ec425eef414ee7620d0692fda56ebb0 \
             has an unknown domain. Please set the `user.email` Git configuration value.",
            "The committer name (`Mononym`) for commit 1debf1735a6e28880ef08f13baeea4b71a08a846 \
             has no space in it. A full name is required for contribution. Please set the \
             `user.name` Git configuration value.",
            "The committer email (`bademail`) for commit da71ae048e5a387d6809558d59ad073d0e4fb089 \
             has an unknown domain. Please set the `user.email` Git configuration value.",
            "The author email (`bademail@baddomain.invalid`) for commit \
             9002239437a06e81a58fed07150b215a917028d6 has an unknown domain. Please set the \
             `user.email` Git configuration value.",
            "The committer email (`bademail@baddomain.invalid`) for commit \
             dcd8895d299031d607481b4936478f8de4cc28ae has an unknown domain. Please set the \
             `user.email` Git configuration value.",
            "The given name (`Mononym`) for commit 91d9fceb226bfc0faeb8a4e54b4f0b5a1ffd39e8 has \
             no space in it. A full name is required for contribution. Please set the `user.name` \
             Git configuration value.",
            "The given email (`bademail`) for commit 91d9fceb226bfc0faeb8a4e54b4f0b5a1ffd39e8 has \
             an unknown domain. Please set the `user.email` Git configuration value.",
        ]);
    }

    #[test]
    fn test_valid_name_whitelist() {
        let check = ValidName::builder()
            .whitelisted_domains(["baddomain.invalid"].iter().cloned())
            .build()
            .unwrap();
        let result = run_check("test_valid_name_whitelist", BAD_TOPIC, check);
        test_result_errors(result, &[
            "The author name (`Mononym`) for commit edac4e5b3a00eac60280a78ee84b5ef8d4cce97a has \
             no space in it. A full name is required for contribution. Please set the `user.name` \
             Git configuration value.",
            "The author email (`bademail`) for commit 9de4928f5ec425eef414ee7620d0692fda56ebb0 \
             has an unknown domain. Please set the `user.email` Git configuration value.",
            "The committer name (`Mononym`) for commit 1debf1735a6e28880ef08f13baeea4b71a08a846 \
             has no space in it. A full name is required for contribution. Please set the \
             `user.name` Git configuration value.",
            "The committer email (`bademail`) for commit da71ae048e5a387d6809558d59ad073d0e4fb089 \
             has an unknown domain. Please set the `user.email` Git configuration value.",
            "The given name (`Mononym`) for commit 91d9fceb226bfc0faeb8a4e54b4f0b5a1ffd39e8 has \
             no space in it. A full name is required for contribution. Please set the `user.name` \
             Git configuration value.",
            "The given email (`bademail`) for commit 91d9fceb226bfc0faeb8a4e54b4f0b5a1ffd39e8 has \
             an unknown domain. Please set the `user.email` Git configuration value.",
        ]);
    }

    #[test]
    fn test_valid_name_preferred() {
        let check = ValidName::builder()
            .full_name_policy(ValidNameFullNamePolicy::Preferred)
            .build()
            .unwrap();
        let result = run_check("test_valid_name_preferred", BAD_AUTHOR_NAME, check);
        test_result_warnings(result, &[
            "The author name (`Mononym`) for commit edac4e5b3a00eac60280a78ee84b5ef8d4cce97a has \
             no space in it. A full name is preferred for contribution. Please set the \
             `user.name` Git configuration value.",
        ]);
    }

    #[test]
    fn test_valid_name_optional() {
        let check = ValidName::builder()
            .full_name_policy(ValidNameFullNamePolicy::Optional)
            .build()
            .unwrap();
        run_check_ok("test_valid_name_optional", BAD_AUTHOR_NAME, check);
    }
}