tga 2.4.0

Developer productivity analytics — git commit collection, classification, and reporting
Documentation
//! External developer aliases file support.
//!
//! Loads developer identity records from a standalone YAML file using
//! the same schema as the Python predecessor (`gitflow-analytics`).
//! This lets large teams (hundreds of engineers) share a single canonical
//! aliases file across multiple project configs instead of bloating each
//! config with inline `developer_aliases` entries.
//!
//! # File format
//!
//! ```yaml
//! developers:
//!   - name: "John Doe"
//!     primary_email: "john.doe@company.com"
//!     aliases:
//!       - "jdoe@gmail.com"
//!       - "John D."
//!     github_username: "jdoe"      # optional
//!     confidence: 1.0              # optional, defaults to 1.0
//!     reasoning: ""                # optional, for LLM-generated aliases
//! ```
//!
//! Use [`AliasFile::load`] to parse such a file from disk, then
//! [`AliasFile::to_alias_map`] to convert it into the
//! `HashMap<canonical_name, Vec<aliases>>` form consumed by
//! [`crate::collect::identity::resolver::IdentityResolver::from_alias_map`].

use std::collections::HashMap;
use std::path::Path;

use serde::{Deserialize, Serialize};

use crate::core::config::expand_path;
use crate::core::errors::{Result, TgaError};

/// A single developer identity record in an external aliases file.
///
/// Schema-compatible with the Python predecessor's alias YAML format so
/// alias files generated by `gitflow-analytics` can be consumed unchanged.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeveloperAliasEntry {
    /// Canonical display name (e.g. `"John Doe"`).
    pub name: String,

    /// Primary / canonical email address.
    pub primary_email: String,

    /// All alternative emails, git names, and username strings that
    /// should resolve to this developer.
    #[serde(default)]
    pub aliases: Vec<String>,

    /// Optional GitHub username.
    #[serde(default)]
    pub github_username: Option<String>,

    /// Confidence score (1.0 = manually verified, < 1.0 = LLM-suggested).
    #[serde(default = "default_confidence")]
    pub confidence: f64,

    /// Human-readable reasoning for LLM-generated aliases.
    #[serde(default)]
    pub reasoning: String,
}

fn default_confidence() -> f64 {
    1.0
}

/// Top-level structure of an external aliases file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AliasFile {
    /// All developer entries declared in the file.
    pub developers: Vec<DeveloperAliasEntry>,
}

impl AliasFile {
    /// Load an aliases file from disk.
    ///
    /// Supports leading `~` home-directory expansion in `path`.
    ///
    /// # Errors
    ///
    /// - [`TgaError::IoError`] if the file cannot be read.
    /// - [`TgaError::SerdeYamlError`] if YAML parsing fails.
    /// - [`TgaError::ConfigError`] if the parsed file is structurally
    ///   valid YAML but missing required fields.
    pub fn load(path: &Path) -> Result<Self> {
        let resolved = expand_path(path);
        tracing::debug!(path = %resolved.display(), "loading external aliases file");
        let text = std::fs::read_to_string(&resolved).map_err(|e| {
            TgaError::ConfigError(format!(
                "failed to read aliases file {}: {e}",
                resolved.display()
            ))
        })?;
        let parsed: AliasFile = serde_yaml::from_str(&text)?;
        Ok(parsed)
    }

    /// Convert into the `HashMap<canonical_name, Vec<aliases>>` form
    /// consumed by `IdentityResolver::from_alias_map`.
    ///
    /// The `primary_email` is prepended to each entry's alias list so it
    /// is also registered as a lookup key. `github_username`, when
    /// present and non-empty, is appended as well. Duplicates within an
    /// entry's combined alias list are removed while preserving order.
    pub fn to_alias_map(&self) -> HashMap<String, Vec<String>> {
        let mut out: HashMap<String, Vec<String>> = HashMap::new();
        for dev in &self.developers {
            let mut combined: Vec<String> = Vec::with_capacity(dev.aliases.len() + 2);
            if !dev.primary_email.is_empty() {
                combined.push(dev.primary_email.clone());
            }
            for a in &dev.aliases {
                combined.push(a.clone());
            }
            if let Some(gh) = &dev.github_username {
                if !gh.is_empty() {
                    combined.push(gh.clone());
                }
            }
            // Dedupe while preserving order.
            let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
            combined.retain(|s| seen.insert(s.to_lowercase()));
            out.insert(dev.name.clone(), combined);
        }
        out
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn sample_yaml() -> &'static str {
        r#"
developers:
  - name: "John Doe"
    primary_email: "john.doe@company.com"
    aliases:
      - "jdoe@gmail.com"
      - "john.doe@oldcompany.com"
      - "John D."
    github_username: "jdoe"
    confidence: 1.0
    reasoning: ""

  - name: "Alice Smith"
    primary_email: "alice@company.com"
    aliases:
      - "alice.smith@personal.com"
      - "asmith"
"#
    }

    #[test]
    fn alias_file_loads_from_yaml() {
        let parsed: AliasFile = serde_yaml::from_str(sample_yaml()).expect("parse");
        assert_eq!(parsed.developers.len(), 2);
        assert_eq!(parsed.developers[0].name, "John Doe");
        assert_eq!(parsed.developers[0].primary_email, "john.doe@company.com");
        assert_eq!(parsed.developers[0].aliases.len(), 3);
        assert_eq!(
            parsed.developers[0].github_username.as_deref(),
            Some("jdoe")
        );
        assert!((parsed.developers[0].confidence - 1.0).abs() < f64::EPSILON);
        // Alice has no github_username; should default to None.
        assert_eq!(parsed.developers[1].github_username, None);
        // Confidence defaults to 1.0 when absent.
        assert!((parsed.developers[1].confidence - 1.0).abs() < f64::EPSILON);
    }

    #[test]
    fn alias_file_to_map_includes_primary_email() {
        let parsed: AliasFile = serde_yaml::from_str(sample_yaml()).expect("parse");
        let map = parsed.to_alias_map();
        let john = map.get("John Doe").expect("John Doe present");
        assert!(
            john.iter().any(|s| s == "john.doe@company.com"),
            "primary_email should appear in alias list: {john:?}"
        );
        assert!(john.iter().any(|s| s == "jdoe@gmail.com"));
        assert!(john.iter().any(|s| s == "John D."));
        assert!(john.iter().any(|s| s == "jdoe"));
    }

    #[test]
    fn alias_file_to_map_dedupes_case_insensitive() {
        let yaml = r#"
developers:
  - name: "John Doe"
    primary_email: "john@example.com"
    aliases:
      - "john@example.com"
      - "JOHN@example.com"
      - "jdoe"
"#;
        let parsed: AliasFile = serde_yaml::from_str(yaml).expect("parse");
        let map = parsed.to_alias_map();
        let john = map.get("John Doe").expect("John Doe present");
        // primary_email + "jdoe" only; duplicate emails removed.
        assert_eq!(john.len(), 2, "expected 2 unique entries, got {john:?}");
    }

    #[test]
    fn alias_file_minimal_entry() {
        // Only required fields: name and primary_email.
        let yaml = r#"
developers:
  - name: "Solo Dev"
    primary_email: "solo@example.com"
"#;
        let parsed: AliasFile = serde_yaml::from_str(yaml).expect("parse");
        assert_eq!(parsed.developers.len(), 1);
        assert!(parsed.developers[0].aliases.is_empty());
        assert_eq!(parsed.developers[0].github_username, None);
        assert_eq!(parsed.developers[0].reasoning, "");
    }

    #[test]
    fn alias_file_path_expansion() {
        // Create a unique temp directory under std::env::temp_dir() and
        // point HOME at it so `~/aliases.yaml` resolves into the temp dir.
        let unique = format!(
            "tga-alias-test-{}-{}",
            std::process::id(),
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        );
        let tmp = std::env::temp_dir().join(unique);
        std::fs::create_dir_all(&tmp).expect("create tmp");

        let original_home = std::env::var_os("HOME");
        // SAFETY: env mutation. Restored at end of test. Tests can run
        // concurrently across files, but the unique temp dir makes
        // collisions impossible for the file payload; HOME mutation is
        // the only concurrency risk and is accepted for this small test.
        unsafe {
            std::env::set_var("HOME", &tmp);
        }

        let file_path = tmp.join("aliases.yaml");
        std::fs::write(&file_path, sample_yaml()).expect("write");

        let tilde_path = Path::new("~/aliases.yaml");
        let parsed = AliasFile::load(tilde_path).expect("load via tilde");
        assert_eq!(parsed.developers.len(), 2);

        // Restore HOME.
        unsafe {
            match original_home {
                Some(v) => std::env::set_var("HOME", v),
                None => std::env::remove_var("HOME"),
            }
        }
        let _ = std::fs::remove_dir_all(&tmp);
    }
}

#[cfg(test)]
mod merge_tests {
    //! Integration-style tests for [`crate::core::config::Config::resolved_alias_map`]
    //! verifying inline + external merge semantics.

    use std::collections::HashMap;

    use crate::core::config::Config;

    fn unique_dir(label: &str) -> std::path::PathBuf {
        let unique = format!(
            "tga-alias-merge-{label}-{}-{}",
            std::process::id(),
            std::time::SystemTime::now()
                .duration_since(std::time::UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        );
        let d = std::env::temp_dir().join(unique);
        std::fs::create_dir_all(&d).expect("create tmp");
        d
    }

    #[test]
    fn alias_file_merge_overrides_inline() {
        let tmp = unique_dir("override");
        let external = r#"
developers:
  - name: "John Doe"
    primary_email: "john.new@company.com"
    aliases:
      - "john.alias@company.com"
"#;
        let aliases_path = tmp.join("aliases.yaml");
        std::fs::write(&aliases_path, external).expect("write aliases");

        let mut inline: HashMap<String, Vec<String>> = HashMap::new();
        inline.insert(
            "John Doe".to_string(),
            vec!["john.OLD@company.com".to_string()],
        );

        let cfg = Config {
            developer_aliases: inline,
            aliases_file: Some(aliases_path.to_string_lossy().into_owned()),
            ..Default::default()
        };

        let map = cfg.resolved_alias_map(None).expect("resolve");
        let john = map.get("John Doe").expect("john present");
        // External wins: inline old email should NOT be present.
        assert!(
            !john.iter().any(|s| s == "john.OLD@company.com"),
            "external entry should override inline, got {john:?}"
        );
        assert!(john.iter().any(|s| s == "john.new@company.com"));
        assert!(john.iter().any(|s| s == "john.alias@company.com"));

        let _ = std::fs::remove_dir_all(&tmp);
    }

    #[test]
    fn alias_file_merge_additive() {
        let tmp = unique_dir("additive");
        let external = r#"
developers:
  - name: "Bob"
    primary_email: "bob@example.com"
"#;
        let aliases_path = tmp.join("aliases.yaml");
        std::fs::write(&aliases_path, external).expect("write aliases");

        let mut inline: HashMap<String, Vec<String>> = HashMap::new();
        inline.insert("Alice".to_string(), vec!["alice@example.com".to_string()]);

        let cfg = Config {
            developer_aliases: inline,
            aliases_file: Some(aliases_path.to_string_lossy().into_owned()),
            ..Default::default()
        };

        let map = cfg.resolved_alias_map(None).expect("resolve");
        assert!(map.contains_key("Alice"), "inline-only entry preserved");
        assert!(map.contains_key("Bob"), "external-only entry added");
        assert_eq!(map.len(), 2);

        let _ = std::fs::remove_dir_all(&tmp);
    }

    #[test]
    fn alias_file_missing_file_errors() {
        let cfg = Config {
            aliases_file: Some("/nonexistent/path/to/aliases.yaml".to_string()),
            ..Default::default()
        };
        let err = cfg.resolved_alias_map(None).unwrap_err();
        let msg = format!("{err}");
        assert!(
            msg.contains("aliases_file"),
            "error should mention aliases_file: {msg}"
        );
    }

    #[test]
    fn alias_file_relative_to_config_dir() {
        let tmp = unique_dir("reldir");
        let external = r#"
developers:
  - name: "Rel Person"
    primary_email: "rel@example.com"
"#;
        std::fs::write(tmp.join("aliases.yaml"), external).expect("write");

        let cfg = Config {
            aliases_file: Some("./aliases.yaml".to_string()),
            ..Default::default()
        };

        let map = cfg.resolved_alias_map(Some(&tmp)).expect("resolve");
        assert!(map.contains_key("Rel Person"));

        let _ = std::fs::remove_dir_all(&tmp);
    }
}