Skip to main content

kaizen/metrics/
git.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Git-derived churn, authorship, co-change.
3
4use crate::metrics::types::{FileHistory, RepoEdge};
5use anyhow::{Context, Result};
6use std::collections::{HashMap, HashSet};
7use std::path::Path;
8use std::process::Command;
9
10pub fn load_history(
11    workspace: &Path,
12    now_ms: u64,
13) -> Result<(HashMap<String, FileHistory>, Vec<RepoEdge>)> {
14    if !workspace.join(".git").exists() {
15        return Ok((HashMap::new(), vec![]));
16    }
17    let out = Command::new("git")
18        .arg("-C")
19        .arg(workspace)
20        .args([
21            "log",
22            "--since=90 days ago",
23            "--format=__K__%H|%ct|%ae",
24            "--name-only",
25        ])
26        .output()
27        .context("git log")?;
28    if !out.status.success() {
29        anyhow::bail!("git log failed: {}", String::from_utf8_lossy(&out.stderr));
30    }
31    Ok(parse_history(&String::from_utf8_lossy(&out.stdout), now_ms))
32}
33
34fn parse_history(raw: &str, now_ms: u64) -> (HashMap<String, FileHistory>, Vec<RepoEdge>) {
35    let mut histories: HashMap<String, FileHistory> = HashMap::new();
36    let mut authors: HashMap<String, HashSet<String>> = HashMap::new();
37    let mut co_changed: HashMap<(String, String), u32> = HashMap::new();
38    let mut current_ts = 0u64;
39    let mut current_author = String::new();
40    let mut current_paths: Vec<String> = vec![];
41
42    for line in raw.lines().chain(std::iter::once("__END__")) {
43        if let Some(meta) = line.strip_prefix("__K__") {
44            flush_commit(&current_paths, &mut co_changed);
45            current_paths.clear();
46            let mut parts = meta.split('|');
47            let _hash = parts.next();
48            current_ts = parts
49                .next()
50                .and_then(|s| s.parse::<u64>().ok())
51                .unwrap_or(0)
52                * 1000;
53            current_author = parts.next().unwrap_or("").to_string();
54            continue;
55        }
56        let path = line.trim();
57        if path.is_empty() || path == "__END__" {
58            continue;
59        }
60        current_paths.push(path.to_string());
61        let entry = histories.entry(path.to_string()).or_default();
62        entry.churn_90d += 1;
63        if current_ts >= now_ms.saturating_sub(30 * 86_400_000) {
64            entry.churn_30d += 1;
65        }
66        if entry.last_changed_ms.is_none() {
67            entry.last_changed_ms = Some(current_ts);
68        }
69        authors
70            .entry(path.to_string())
71            .or_default()
72            .insert(current_author.clone());
73    }
74
75    for (path, set) in authors {
76        histories.entry(path).or_default().authors_90d = set.len() as u32;
77    }
78
79    let edges = co_changed
80        .into_iter()
81        .map(|((from_path, to_path), weight)| RepoEdge {
82            from_path,
83            to_path,
84            kind: "CO_CHANGED_WITH".into(),
85            weight,
86        })
87        .collect();
88    (histories, edges)
89}
90
91fn flush_commit(paths: &[String], out: &mut HashMap<(String, String), u32>) {
92    for i in 0..paths.len() {
93        for j in (i + 1)..paths.len() {
94            let mut a = paths[i].clone();
95            let mut b = paths[j].clone();
96            if a > b {
97                std::mem::swap(&mut a, &mut b);
98            }
99            *out.entry((a, b)).or_default() += 1;
100        }
101    }
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107
108    #[test]
109    fn parse_log_history() {
110        let raw = "__K__a|100|a@x\nsrc/a.rs\nsrc/b.rs\n__K__b|200|b@x\nsrc/a.rs\n";
111        let (history, edges) = parse_history(raw, 250_000);
112        assert_eq!(history["src/a.rs"].churn_90d, 2);
113        assert_eq!(history["src/a.rs"].authors_90d, 2);
114        assert_eq!(edges[0].weight, 1);
115    }
116}