1use crate::metrics::types::{FileHistory, RepoEdge};
5use anyhow::{Context, Result};
6use std::collections::{HashMap, HashSet};
7use std::path::Path;
8use std::process::Command;
9
10pub fn load_history(
11 workspace: &Path,
12 now_ms: u64,
13) -> Result<(HashMap<String, FileHistory>, Vec<RepoEdge>)> {
14 if !workspace.join(".git").exists() {
15 return Ok((HashMap::new(), vec![]));
16 }
17 let out = Command::new("git")
18 .arg("-C")
19 .arg(workspace)
20 .args([
21 "log",
22 "--since=90 days ago",
23 "--format=__K__%H|%ct|%ae",
24 "--name-only",
25 ])
26 .output()
27 .context("git log")?;
28 if !out.status.success() {
29 anyhow::bail!("git log failed: {}", String::from_utf8_lossy(&out.stderr));
30 }
31 Ok(parse_history(&String::from_utf8_lossy(&out.stdout), now_ms))
32}
33
34fn parse_history(raw: &str, now_ms: u64) -> (HashMap<String, FileHistory>, Vec<RepoEdge>) {
35 let mut histories: HashMap<String, FileHistory> = HashMap::new();
36 let mut authors: HashMap<String, HashSet<String>> = HashMap::new();
37 let mut co_changed: HashMap<(String, String), u32> = HashMap::new();
38 let mut current_ts = 0u64;
39 let mut current_author = String::new();
40 let mut current_paths: Vec<String> = vec![];
41
42 for line in raw.lines().chain(std::iter::once("__END__")) {
43 if let Some(meta) = line.strip_prefix("__K__") {
44 flush_commit(¤t_paths, &mut co_changed);
45 current_paths.clear();
46 let mut parts = meta.split('|');
47 let _hash = parts.next();
48 current_ts = parts
49 .next()
50 .and_then(|s| s.parse::<u64>().ok())
51 .unwrap_or(0)
52 * 1000;
53 current_author = parts.next().unwrap_or("").to_string();
54 continue;
55 }
56 let path = line.trim();
57 if path.is_empty() || path == "__END__" {
58 continue;
59 }
60 current_paths.push(path.to_string());
61 let entry = histories.entry(path.to_string()).or_default();
62 entry.churn_90d += 1;
63 if current_ts >= now_ms.saturating_sub(30 * 86_400_000) {
64 entry.churn_30d += 1;
65 }
66 if entry.last_changed_ms.is_none() {
67 entry.last_changed_ms = Some(current_ts);
68 }
69 authors
70 .entry(path.to_string())
71 .or_default()
72 .insert(current_author.clone());
73 }
74
75 for (path, set) in authors {
76 histories.entry(path).or_default().authors_90d = set.len() as u32;
77 }
78
79 let edges = co_changed
80 .into_iter()
81 .map(|((from_path, to_path), weight)| RepoEdge {
82 from_path,
83 to_path,
84 kind: "CO_CHANGED_WITH".into(),
85 weight,
86 })
87 .collect();
88 (histories, edges)
89}
90
91fn flush_commit(paths: &[String], out: &mut HashMap<(String, String), u32>) {
92 for i in 0..paths.len() {
93 for j in (i + 1)..paths.len() {
94 let mut a = paths[i].clone();
95 let mut b = paths[j].clone();
96 if a > b {
97 std::mem::swap(&mut a, &mut b);
98 }
99 *out.entry((a, b)).or_default() += 1;
100 }
101 }
102}
103
104#[cfg(test)]
105mod tests {
106 use super::*;
107
108 #[test]
109 fn parse_log_history() {
110 let raw = "__K__a|100|a@x\nsrc/a.rs\nsrc/b.rs\n__K__b|200|b@x\nsrc/a.rs\n";
111 let (history, edges) = parse_history(raw, 250_000);
112 assert_eq!(history["src/a.rs"].churn_90d, 2);
113 assert_eq!(history["src/a.rs"].authors_90d, 2);
114 assert_eq!(edges[0].weight, 1);
115 }
116}