Skip to main content

tokmd_analysis_fingerprint/
lib.rs

1//! # tokmd-analysis-fingerprint
2//!
3//! Corporate fingerprint enrichment adapter for analysis receipts.
4
5use std::collections::BTreeMap;
6
7use tokmd_analysis_types::{CorporateFingerprint, DomainStat};
8
9const PUBLIC_DOMAINS: [&str; 7] = [
10    "gmail.com",
11    "yahoo.com",
12    "outlook.com",
13    "hotmail.com",
14    "icloud.com",
15    "proton.me",
16    "protonmail.com",
17];
18
19/// Build a corporate fingerprint from commit author email domains.
20pub fn build_corporate_fingerprint(commits: &[tokmd_git::GitCommit]) -> CorporateFingerprint {
21    let mut counts: BTreeMap<String, u32> = BTreeMap::new();
22    let mut total = 0u32;
23
24    for commit in commits {
25        if let Some(domain) = extract_domain(&commit.author) {
26            let domain = normalize_domain(&domain);
27            if domain.is_empty() || is_ignored_domain(&domain) {
28                continue;
29            }
30            let bucket = if is_public_domain(&domain) {
31                "public-email".to_string()
32            } else {
33                domain
34            };
35            *counts.entry(bucket).or_insert(0) += 1;
36            total += 1;
37        }
38    }
39
40    let mut domains: Vec<DomainStat> = counts
41        .into_iter()
42        .map(|(domain, commits)| DomainStat {
43            domain,
44            commits,
45            pct: if total == 0 {
46                0.0
47            } else {
48                (commits as f32) / (total as f32)
49            },
50        })
51        .collect();
52    domains.sort_by(|a, b| {
53        b.commits
54            .cmp(&a.commits)
55            .then_with(|| a.domain.cmp(&b.domain))
56    });
57
58    CorporateFingerprint { domains }
59}
60
61fn extract_domain(email: &str) -> Option<String> {
62    let parts: Vec<&str> = email.split('@').collect();
63    if parts.len() != 2 {
64        return None;
65    }
66    Some(parts[1].to_string())
67}
68
69fn normalize_domain(domain: &str) -> String {
70    domain.trim().to_lowercase()
71}
72
73fn is_ignored_domain(domain: &str) -> bool {
74    domain == "localhost"
75        || domain == "example.com"
76        || domain.contains("noreply.github.com")
77        || domain.contains("users.noreply.github.com")
78}
79
80fn is_public_domain(domain: &str) -> bool {
81    PUBLIC_DOMAINS.contains(&domain)
82}
83
84#[cfg(test)]
85mod tests {
86    use super::*;
87
88    #[test]
89    fn buckets_public_domains() {
90        let commits = vec![
91            tokmd_git::GitCommit {
92                timestamp: 0,
93                author: "alice@gmail.com".to_string(),
94                hash: None,
95                subject: String::new(),
96                files: vec![],
97            },
98            tokmd_git::GitCommit {
99                timestamp: 0,
100                author: "bob@acme.com".to_string(),
101                hash: None,
102                subject: String::new(),
103                files: vec![],
104            },
105            tokmd_git::GitCommit {
106                timestamp: 0,
107                author: "carol@acme.com".to_string(),
108                hash: None,
109                subject: String::new(),
110                files: vec![],
111            },
112        ];
113
114        let report = build_corporate_fingerprint(&commits);
115
116        assert!(report.domains.iter().any(|d| d.domain == "public-email"));
117        assert!(report.domains.iter().any(|d| d.domain == "acme.com"));
118    }
119}