Skip to main content

tokmd_analysis_git/
git.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use anyhow::Result;
5use tokmd_analysis_types::{
6    BusFactorRow, CodeAgeBucket, CodeAgeDistributionReport, CommitIntentCounts, CommitIntentReport,
7    CouplingRow, FreshnessReport, GitReport, HotspotRow, ModuleFreshnessRow, ModuleIntentRow,
8    TrendClass,
9};
10use tokmd_types::{ExportData, FileKind, FileRow};
11
12use tokmd_analysis_util::normalize_path;
13use tokmd_math::{percentile, round_f64};
14
15const SECONDS_PER_DAY: i64 = 86_400;
16const REFRESH_WINDOW_DAYS: i64 = 30;
17const REFRESH_TREND_EPSILON: f64 = 0.10;
18
19pub fn build_git_report(
20    repo_root: &Path,
21    export: &ExportData,
22    commits: &[tokmd_git::GitCommit],
23) -> Result<GitReport> {
24    let mut row_map: BTreeMap<String, (&FileRow, String)> = BTreeMap::new();
25    for row in export.rows.iter().filter(|r| r.kind == FileKind::Parent) {
26        let key = normalize_path(&row.path, repo_root);
27        row_map.insert(key, (row, row.module.clone()));
28    }
29
30    let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
31    let mut authors_by_module: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
32    let mut last_change: BTreeMap<String, i64> = BTreeMap::new();
33    let mut max_ts = 0i64;
34
35    for commit in commits {
36        max_ts = max_ts.max(commit.timestamp);
37        for file in &commit.files {
38            let key = normalize_git_path(file);
39            if let Some((row, module)) = row_map.get(&key) {
40                if let Some(val) = commit_counts.get_mut(&key) {
41                    *val += 1;
42                } else {
43                    commit_counts.insert(key.clone(), 1);
44                }
45                if let Some(val) = authors_by_module.get_mut(module) {
46                    val.insert(commit.author.clone());
47                } else {
48                    let mut set = BTreeSet::new();
49                    set.insert(commit.author.clone());
50                    authors_by_module.insert(module.clone(), set);
51                }
52                if !last_change.contains_key(&key) {
53                    last_change.insert(key.clone(), commit.timestamp);
54                }
55                let _ = row;
56            }
57        }
58    }
59
60    let mut hotspots: Vec<HotspotRow> = commit_counts
61        .iter()
62        .filter_map(|(path, commits)| {
63            let (row, _) = row_map.get(path)?;
64            Some(HotspotRow {
65                path: path.clone(),
66                commits: *commits,
67                lines: row.lines,
68                score: row.lines * commits,
69            })
70        })
71        .collect();
72    hotspots.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
73
74    let mut bus_factor: Vec<BusFactorRow> = authors_by_module
75        .into_iter()
76        .map(|(module, authors)| BusFactorRow {
77            module,
78            authors: authors.len(),
79        })
80        .collect();
81    bus_factor.sort_by(|a, b| {
82        a.authors
83            .cmp(&b.authors)
84            .then_with(|| a.module.cmp(&b.module))
85    });
86
87    let freshness = build_freshness_report(&last_change, &row_map, max_ts);
88    let age_distribution = build_code_age_distribution(&last_change, max_ts, commits);
89
90    let coupling = build_coupling(commits, &row_map);
91    let intent = build_intent_report(commits, &row_map);
92
93    Ok(GitReport {
94        commits_scanned: commits.len(),
95        files_seen: commit_counts.len(),
96        hotspots,
97        bus_factor,
98        freshness,
99        coupling,
100        age_distribution: Some(age_distribution),
101        intent: Some(intent),
102    })
103}
104
105fn build_freshness_report(
106    last_change: &BTreeMap<String, i64>,
107    row_map: &BTreeMap<String, (&FileRow, String)>,
108    reference_ts: i64,
109) -> FreshnessReport {
110    let threshold_days = 365usize;
111    let mut stale_files = 0usize;
112    let mut total_files = 0usize;
113    let mut by_module: BTreeMap<String, Vec<usize>> = BTreeMap::new();
114
115    for (path, ts) in last_change {
116        let (_, module) = match row_map.get(path) {
117            Some(v) => v,
118            None => continue,
119        };
120        let days = if reference_ts > *ts {
121            ((reference_ts - *ts) / 86_400) as usize
122        } else {
123            0
124        };
125        total_files += 1;
126        if days > threshold_days {
127            stale_files += 1;
128        }
129        by_module.entry(module.clone()).or_default().push(days);
130    }
131
132    let stale_pct = if total_files == 0 {
133        0.0
134    } else {
135        round_f64(stale_files as f64 / total_files as f64, 4)
136    };
137
138    let mut module_rows: Vec<ModuleFreshnessRow> = Vec::new();
139    for (module, mut days) in by_module {
140        days.sort();
141        let avg = if days.is_empty() {
142            0.0
143        } else {
144            round_f64(days.iter().sum::<usize>() as f64 / days.len() as f64, 2)
145        };
146        let p90 = if days.is_empty() {
147            0.0
148        } else {
149            round_f64(percentile(&days, 0.90), 2)
150        };
151        let stale = days.iter().filter(|d| **d > threshold_days).count();
152        let pct = if days.is_empty() {
153            0.0
154        } else {
155            round_f64(stale as f64 / days.len() as f64, 4)
156        };
157        module_rows.push(ModuleFreshnessRow {
158            module,
159            avg_days: avg,
160            p90_days: p90,
161            stale_pct: pct,
162        });
163    }
164    module_rows.sort_by(|a, b| a.module.cmp(&b.module));
165
166    FreshnessReport {
167        threshold_days,
168        stale_files,
169        total_files,
170        stale_pct,
171        by_module: module_rows,
172    }
173}
174
175fn build_coupling(
176    commits: &[tokmd_git::GitCommit],
177    row_map: &BTreeMap<String, (&FileRow, String)>,
178) -> Vec<CouplingRow> {
179    let mut pairs: BTreeMap<(String, String), usize> = BTreeMap::new();
180    let mut touches: BTreeMap<String, usize> = BTreeMap::new();
181    let mut commits_considered: usize = 0;
182
183    for commit in commits {
184        let mut modules: BTreeSet<String> = BTreeSet::new();
185        for file in &commit.files {
186            let key = normalize_git_path(file);
187            if let Some((_row, module)) = row_map.get(&key) {
188                modules.insert(module.clone());
189            }
190        }
191        // Only count commits where at least one file maps to a module
192        if modules.is_empty() {
193            continue;
194        }
195        commits_considered += 1;
196        for m in &modules {
197            if let Some(val) = touches.get_mut(m) {
198                *val += 1;
199            } else {
200                touches.insert(m.clone(), 1);
201            }
202        }
203        let modules: Vec<String> = modules.into_iter().collect();
204        for i in 0..modules.len() {
205            for j in (i + 1)..modules.len() {
206                let left = modules[i].clone();
207                let right = modules[j].clone();
208                let key = if left <= right {
209                    (left, right)
210                } else {
211                    (right, left)
212                };
213                *pairs.entry(key).or_insert(0) += 1;
214            }
215        }
216    }
217
218    let n = commits_considered;
219
220    let mut rows: Vec<CouplingRow> = pairs
221        .into_iter()
222        .map(|((left, right), count)| {
223            let n_a = touches.get(&left).copied().unwrap_or(0);
224            let n_b = touches.get(&right).copied().unwrap_or(0);
225            let denom = (n_a + n_b).saturating_sub(count);
226            let jaccard = if denom > 0 {
227                Some(round_f64(count as f64 / denom as f64, 4))
228            } else {
229                None
230            };
231            let lift = if n > 0 && n_a > 0 && n_b > 0 {
232                Some(round_f64(
233                    (count as f64 * n as f64) / (n_a as f64 * n_b as f64),
234                    4,
235                ))
236            } else {
237                None
238            };
239            CouplingRow {
240                left,
241                right,
242                count,
243                jaccard,
244                lift,
245                n_left: Some(n_a),
246                n_right: Some(n_b),
247            }
248        })
249        .collect();
250    rows.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.left.cmp(&b.left)));
251    rows
252}
253
254fn build_intent_report(
255    commits: &[tokmd_git::GitCommit],
256    row_map: &BTreeMap<String, (&FileRow, String)>,
257) -> CommitIntentReport {
258    let mut overall = CommitIntentCounts::default();
259    let mut by_module_counts: BTreeMap<String, CommitIntentCounts> = BTreeMap::new();
260
261    for commit in commits {
262        let kind = tokmd_git::classify_intent(&commit.subject);
263        overall.increment(kind);
264
265        // Attribute intent to all modules touched by this commit
266        let mut modules: BTreeSet<String> = BTreeSet::new();
267        for file in &commit.files {
268            let key = normalize_git_path(file);
269            if let Some((_row, module)) = row_map.get(&key) {
270                modules.insert(module.clone());
271            }
272        }
273        for module in modules {
274            by_module_counts.entry(module).or_default().increment(kind);
275        }
276    }
277
278    let unknown_pct = if overall.total > 0 {
279        round_f64(overall.other as f64 / overall.total as f64, 4)
280    } else {
281        0.0
282    };
283
284    let corrective_ratio = if overall.total > 0 {
285        Some(round_f64(
286            (overall.fix + overall.revert) as f64 / overall.total as f64,
287            4,
288        ))
289    } else {
290        None
291    };
292
293    let mut by_module: Vec<ModuleIntentRow> = by_module_counts
294        .into_iter()
295        .map(|(module, counts)| ModuleIntentRow { module, counts })
296        .collect();
297    by_module.sort_by(|a, b| a.module.cmp(&b.module));
298
299    CommitIntentReport {
300        overall,
301        by_module,
302        unknown_pct,
303        corrective_ratio,
304    }
305}
306
307fn build_code_age_distribution(
308    last_change: &BTreeMap<String, i64>,
309    reference_ts: i64,
310    commits: &[tokmd_git::GitCommit],
311) -> CodeAgeDistributionReport {
312    let mut ages_days: Vec<usize> = last_change
313        .values()
314        .map(|ts| {
315            if reference_ts > *ts {
316                ((reference_ts - *ts) / SECONDS_PER_DAY) as usize
317            } else {
318                0
319            }
320        })
321        .collect();
322    ages_days.sort_unstable();
323
324    let buckets = vec![
325        ("0-30d", 0usize, Some(30usize)),
326        ("31-90d", 31usize, Some(90usize)),
327        ("91-180d", 91usize, Some(180usize)),
328        ("181-365d", 181usize, Some(365usize)),
329        ("366d+", 366usize, None),
330    ];
331
332    let mut counts = vec![0usize; buckets.len()];
333    for age in &ages_days {
334        for (idx, (_label, min_days, max_days)) in buckets.iter().enumerate() {
335            let in_range = if let Some(max_days) = max_days {
336                *age >= *min_days && *age <= *max_days
337            } else {
338                *age >= *min_days
339            };
340            if in_range {
341                counts[idx] += 1;
342                break;
343            }
344        }
345    }
346
347    let total_files = ages_days.len();
348    let bucket_rows: Vec<CodeAgeBucket> = buckets
349        .into_iter()
350        .zip(counts)
351        .map(|((label, min_days, max_days), files)| CodeAgeBucket {
352            label: label.to_string(),
353            min_days,
354            max_days,
355            files,
356            pct: if total_files == 0 {
357                0.0
358            } else {
359                round_f64(files as f64 / total_files as f64, 4)
360            },
361        })
362        .collect();
363
364    let tracked_paths: BTreeSet<String> = last_change.keys().cloned().collect();
365    let (recent_refreshes, prior_refreshes, refresh_trend) =
366        compute_refresh_trend(commits, reference_ts, &tracked_paths);
367
368    CodeAgeDistributionReport {
369        buckets: bucket_rows,
370        recent_refreshes,
371        prior_refreshes,
372        refresh_trend,
373    }
374}
375
376fn compute_refresh_trend(
377    commits: &[tokmd_git::GitCommit],
378    reference_ts: i64,
379    tracked_paths: &BTreeSet<String>,
380) -> (usize, usize, TrendClass) {
381    if commits.is_empty() || tracked_paths.is_empty() || reference_ts <= 0 {
382        return (0, 0, TrendClass::Flat);
383    }
384
385    let recent_start = reference_ts - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
386    let prior_start = recent_start - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
387
388    let mut recent_files: BTreeSet<String> = BTreeSet::new();
389    let mut prior_files: BTreeSet<String> = BTreeSet::new();
390
391    for commit in commits {
392        if commit.timestamp >= recent_start {
393            for file in &commit.files {
394                let normalized = normalize_git_path(file);
395                if tracked_paths.contains(&normalized) {
396                    recent_files.insert(normalized);
397                }
398            }
399        } else if commit.timestamp >= prior_start {
400            for file in &commit.files {
401                let normalized = normalize_git_path(file);
402                if tracked_paths.contains(&normalized) {
403                    prior_files.insert(normalized);
404                }
405            }
406        }
407    }
408
409    let recent = recent_files.len();
410    let prior = prior_files.len();
411    let trend = if prior == 0 {
412        if recent > 0 {
413            TrendClass::Rising
414        } else {
415            TrendClass::Flat
416        }
417    } else {
418        let delta_pct = (recent as f64 - prior as f64) / prior as f64;
419        if delta_pct > REFRESH_TREND_EPSILON {
420            TrendClass::Rising
421        } else if delta_pct < -REFRESH_TREND_EPSILON {
422            TrendClass::Falling
423        } else {
424            TrendClass::Flat
425        }
426    };
427
428    (recent, prior, trend)
429}
430
431fn normalize_git_path(path: &str) -> String {
432    let mut out = path.replace('\\', "/");
433    if let Some(stripped) = out.strip_prefix("./") {
434        out = stripped.to_string();
435    }
436    out
437}