Skip to main content

cgx_engine/
git.rs

1use std::collections::HashMap;
2use std::path::Path;
3
4use git2::{Repository, Sort};
5
6type OwnerList = Vec<(String, String, f64)>;
7type CoChangeList = Vec<(String, String, f64)>;
8
9pub struct GitAnalysis {
10    pub file_churn: HashMap<String, f64>,
11    pub file_owners: HashMap<String, OwnerList>,
12    pub co_changes: CoChangeList,
13}
14
15pub fn analyze_repo(repo_path: &Path, file_paths: &[String]) -> anyhow::Result<GitAnalysis> {
16    let repo = Repository::open(repo_path)?;
17
18    let (file_churn, co_changes) = compute_churn_and_co_changes(&repo)?;
19    let file_owners = compute_blame(&repo, file_paths)?;
20
21    Ok(GitAnalysis {
22        file_churn,
23        file_owners,
24        co_changes,
25    })
26}
27
28fn compute_churn_and_co_changes(
29    repo: &Repository,
30) -> anyhow::Result<(HashMap<String, f64>, CoChangeList)> {
31    // 90 days for churn scoring; 365 days for co-change pairs (stable repos commit infrequently)
32    let churn_cutoff = chrono::Utc::now().timestamp() - 90 * 86400;
33    let co_change_cutoff = chrono::Utc::now().timestamp() - 365 * 86400;
34
35    let mut commit_counts: HashMap<String, u32> = HashMap::new();
36    let mut pair_counts: HashMap<(String, String), u32> = HashMap::new();
37    let mut max_churn: u32 = 0;
38    let mut max_co: u32 = 0;
39
40    let mut revwalk = repo.revwalk()?;
41    revwalk.push_head()?;
42    revwalk.set_sorting(Sort::TIME)?;
43
44    for oid_result in revwalk {
45        let oid = match oid_result {
46            Ok(o) => o,
47            Err(_) => continue,
48        };
49        let commit = match repo.find_commit(oid) {
50            Ok(c) => c,
51            Err(_) => continue,
52        };
53
54        let commit_ts = commit.time().seconds();
55        if commit_ts < co_change_cutoff {
56            break;
57        }
58
59        let commit_tree = match commit.tree() {
60            Ok(t) => t,
61            Err(_) => continue,
62        };
63
64        let mut parent_tree = None;
65        if let Ok(parent) = commit.parent(0) {
66            if let Ok(tree) = parent.tree() {
67                parent_tree = Some(tree);
68            }
69        }
70
71        let diff = repo.diff_tree_to_tree(parent_tree.as_ref(), Some(&commit_tree), None)?;
72
73        let mut changed_files: Vec<String> = Vec::new();
74
75        diff.foreach(
76            &mut |delta, _| {
77                if let Some(path) = delta.new_file().path() {
78                    if let Some(s) = path.to_str() {
79                        changed_files.push(s.to_string());
80                    }
81                }
82                true
83            },
84            None,
85            None,
86            None,
87        )?;
88
89        changed_files.sort();
90        changed_files.dedup();
91
92        // Only count churn for commits within the 90-day window
93        if commit_ts >= churn_cutoff {
94            for file in &changed_files {
95                let count = commit_counts.entry(file.clone()).or_insert(0);
96                *count += 1;
97                if *count > max_churn {
98                    max_churn = *count;
99                }
100            }
101        }
102
103        for i in 0..changed_files.len() {
104            for j in (i + 1)..changed_files.len() {
105                let pair = (changed_files[i].clone(), changed_files[j].clone());
106                let count = pair_counts.entry(pair).or_insert(0);
107                *count += 1;
108                if *count > max_co {
109                    max_co = *count;
110                }
111            }
112        }
113    }
114
115    let mut churn_map = HashMap::new();
116    if max_churn > 0 {
117        for (file, count) in commit_counts {
118            churn_map.insert(file, count as f64 / max_churn as f64);
119        }
120    }
121
122    let mut co_results: CoChangeList = Vec::new();
123    let min_co_count = 2u32;
124    if max_co > 0 {
125        for ((a, b), count) in pair_counts {
126            if count >= min_co_count {
127                co_results.push((a, b, count as f64 / max_co as f64));
128            }
129        }
130    }
131
132    Ok((churn_map, co_results))
133}
134
135fn compute_blame(
136    repo: &Repository,
137    file_paths: &[String],
138) -> anyhow::Result<HashMap<String, OwnerList>> {
139    let mut owners: HashMap<String, OwnerList> = HashMap::new();
140
141    for file_path in file_paths {
142        let blame = match repo.blame_file(std::path::Path::new(file_path), None) {
143            Ok(b) => b,
144            Err(e) => {
145                tracing::debug!("blame failed for {}: {}", file_path, e);
146                continue;
147            }
148        };
149
150        let mut author_lines: HashMap<String, (String, u32)> = HashMap::new();
151        let mut total_lines: u32 = 0;
152
153        for hunk in blame.iter() {
154            let sig = hunk.final_signature();
155            let name = sig.name().unwrap_or("unknown").to_string();
156            let email = sig.email().unwrap_or("unknown").to_string();
157            let lines = hunk.lines_in_hunk() as u32;
158
159            let key = email.clone();
160            let entry = author_lines.entry(key).or_insert((name, 0));
161            entry.1 += lines;
162            total_lines += lines;
163        }
164
165        if total_lines > 0 {
166            let mut file_owners: Vec<(String, String, f64)> = author_lines
167                .into_iter()
168                .map(|(email, (name, lines))| (name, email, lines as f64 / total_lines as f64))
169                .collect();
170            file_owners.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
171            file_owners.truncate(3);
172            owners.insert(file_path.clone(), file_owners);
173        }
174    }
175
176    Ok(owners)
177}