tokmd_analysis_fingerprint/
lib.rs1use std::collections::BTreeMap;
6
7use tokmd_analysis_types::{CorporateFingerprint, DomainStat};
8
9const PUBLIC_DOMAINS: [&str; 7] = [
10 "gmail.com",
11 "yahoo.com",
12 "outlook.com",
13 "hotmail.com",
14 "icloud.com",
15 "proton.me",
16 "protonmail.com",
17];
18
19pub fn build_corporate_fingerprint(commits: &[tokmd_git::GitCommit]) -> CorporateFingerprint {
21 let mut counts: BTreeMap<String, u32> = BTreeMap::new();
22 let mut total = 0u32;
23
24 for commit in commits {
25 if let Some(domain) = extract_domain(&commit.author) {
26 let domain = normalize_domain(&domain);
27 if domain.is_empty() || is_ignored_domain(&domain) {
28 continue;
29 }
30 let bucket = if is_public_domain(&domain) {
31 "public-email".to_string()
32 } else {
33 domain
34 };
35 *counts.entry(bucket).or_insert(0) += 1;
36 total += 1;
37 }
38 }
39
40 let mut domains: Vec<DomainStat> = counts
41 .into_iter()
42 .map(|(domain, commits)| DomainStat {
43 domain,
44 commits,
45 pct: if total == 0 {
46 0.0
47 } else {
48 (commits as f32) / (total as f32)
49 },
50 })
51 .collect();
52 domains.sort_by(|a, b| {
53 b.commits
54 .cmp(&a.commits)
55 .then_with(|| a.domain.cmp(&b.domain))
56 });
57
58 CorporateFingerprint { domains }
59}
60
61fn extract_domain(email: &str) -> Option<String> {
62 let parts: Vec<&str> = email.split('@').collect();
63 if parts.len() != 2 {
64 return None;
65 }
66 Some(parts[1].to_string())
67}
68
69fn normalize_domain(domain: &str) -> String {
70 domain.trim().to_lowercase()
71}
72
73fn is_ignored_domain(domain: &str) -> bool {
74 domain == "localhost"
75 || domain == "example.com"
76 || domain.contains("noreply.github.com")
77 || domain.contains("users.noreply.github.com")
78}
79
80fn is_public_domain(domain: &str) -> bool {
81 PUBLIC_DOMAINS.contains(&domain)
82}
83
84#[cfg(test)]
85mod tests {
86 use super::*;
87
88 #[test]
89 fn buckets_public_domains() {
90 let commits = vec![
91 tokmd_git::GitCommit {
92 timestamp: 0,
93 author: "alice@gmail.com".to_string(),
94 hash: None,
95 subject: String::new(),
96 files: vec![],
97 },
98 tokmd_git::GitCommit {
99 timestamp: 0,
100 author: "bob@acme.com".to_string(),
101 hash: None,
102 subject: String::new(),
103 files: vec![],
104 },
105 tokmd_git::GitCommit {
106 timestamp: 0,
107 author: "carol@acme.com".to_string(),
108 hash: None,
109 subject: String::new(),
110 files: vec![],
111 },
112 ];
113
114 let report = build_corporate_fingerprint(&commits);
115
116 assert!(report.domains.iter().any(|d| d.domain == "public-email"));
117 assert!(report.domains.iter().any(|d| d.domain == "acme.com"));
118 }
119}