Skip to main content

tokmd_analysis_git/
git.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use anyhow::Result;
5use tokmd_analysis_types::{
6    BusFactorRow, CodeAgeBucket, CodeAgeDistributionReport, CommitIntentCounts, CommitIntentReport,
7    CouplingRow, FreshnessReport, GitReport, HotspotRow, ModuleFreshnessRow, ModuleIntentRow,
8    TrendClass,
9};
10use tokmd_types::{ExportData, FileKind, FileRow};
11
12use tokmd_analysis_util::normalize_path;
13use tokmd_math::{percentile, round_f64};
14
15const SECONDS_PER_DAY: i64 = 86_400;
16const REFRESH_WINDOW_DAYS: i64 = 30;
17const REFRESH_TREND_EPSILON: f64 = 0.10;
18
19pub fn build_git_report(
20    repo_root: &Path,
21    export: &ExportData,
22    commits: &[tokmd_git::GitCommit],
23) -> Result<GitReport> {
24    let mut row_map: BTreeMap<String, (&FileRow, String)> = BTreeMap::new();
25    for row in export.rows.iter().filter(|r| r.kind == FileKind::Parent) {
26        let key = normalize_path(&row.path, repo_root);
27        row_map.insert(key, (row, row.module.clone()));
28    }
29
30    let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
31    let mut authors_by_module: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
32    let mut last_change: BTreeMap<String, i64> = BTreeMap::new();
33    let mut max_ts = 0i64;
34
35    for commit in commits {
36        max_ts = max_ts.max(commit.timestamp);
37        for file in &commit.files {
38            let key = normalize_git_path(file);
39            if let Some((row, module)) = row_map.get(&key) {
40                if let Some(val) = commit_counts.get_mut(&key) {
41                    *val += 1;
42                } else {
43                    commit_counts.insert(key.clone(), 1);
44                }
45                if let Some(val) = authors_by_module.get_mut(module) {
46                    val.insert(commit.author.clone());
47                } else {
48                    let mut set = BTreeSet::new();
49                    set.insert(commit.author.clone());
50                    authors_by_module.insert(module.clone(), set);
51                }
52                if !last_change.contains_key(&key) {
53                    last_change.insert(key.clone(), commit.timestamp);
54                }
55                let _ = row;
56            }
57        }
58    }
59
60    let mut hotspots: Vec<HotspotRow> = commit_counts
61        .iter()
62        .filter_map(|(path, commits)| {
63            let (row, _) = row_map.get(path)?;
64            Some(HotspotRow {
65                path: path.clone(),
66                commits: *commits,
67                lines: row.lines,
68                score: row.lines * commits,
69            })
70        })
71        .collect();
72    hotspots.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
73
74    let mut bus_factor: Vec<BusFactorRow> = authors_by_module
75        .into_iter()
76        .map(|(module, authors)| BusFactorRow {
77            module,
78            authors: authors.len(),
79        })
80        .collect();
81    bus_factor.sort_by(|a, b| {
82        a.authors
83            .cmp(&b.authors)
84            .then_with(|| a.module.cmp(&b.module))
85    });
86
87    let freshness = build_freshness_report(&last_change, &row_map, max_ts);
88    let age_distribution = build_code_age_distribution(&last_change, max_ts, commits);
89
90    let coupling = build_coupling(commits, &row_map);
91    let intent = build_intent_report(commits, &row_map);
92
93    Ok(GitReport {
94        commits_scanned: commits.len(),
95        files_seen: commit_counts.len(),
96        hotspots,
97        bus_factor,
98        freshness,
99        coupling,
100        age_distribution: Some(age_distribution),
101        intent: Some(intent),
102    })
103}
104
105fn build_freshness_report(
106    last_change: &BTreeMap<String, i64>,
107    row_map: &BTreeMap<String, (&FileRow, String)>,
108    reference_ts: i64,
109) -> FreshnessReport {
110    let threshold_days = 365usize;
111    let mut stale_files = 0usize;
112    let mut total_files = 0usize;
113    let mut by_module: BTreeMap<String, Vec<usize>> = BTreeMap::new();
114
115    for (path, ts) in last_change {
116        let (_, module) = match row_map.get(path) {
117            Some(v) => v,
118            None => continue,
119        };
120        let days = if reference_ts > *ts {
121            ((reference_ts - *ts) / 86_400) as usize
122        } else {
123            0
124        };
125        total_files += 1;
126        if days > threshold_days {
127            stale_files += 1;
128        }
129        if let Some(list) = by_module.get_mut(module) {
130            list.push(days);
131        } else {
132            by_module.insert(module.clone(), vec![days]);
133        }
134    }
135
136    let stale_pct = if total_files == 0 {
137        0.0
138    } else {
139        round_f64(stale_files as f64 / total_files as f64, 4)
140    };
141
142    let mut module_rows: Vec<ModuleFreshnessRow> = Vec::new();
143    for (module, mut days) in by_module {
144        days.sort();
145        let avg = if days.is_empty() {
146            0.0
147        } else {
148            round_f64(days.iter().sum::<usize>() as f64 / days.len() as f64, 2)
149        };
150        let p90 = if days.is_empty() {
151            0.0
152        } else {
153            round_f64(percentile(&days, 0.90), 2)
154        };
155        let stale = days.iter().filter(|d| **d > threshold_days).count();
156        let pct = if days.is_empty() {
157            0.0
158        } else {
159            round_f64(stale as f64 / days.len() as f64, 4)
160        };
161        module_rows.push(ModuleFreshnessRow {
162            module,
163            avg_days: avg,
164            p90_days: p90,
165            stale_pct: pct,
166        });
167    }
168    module_rows.sort_by(|a, b| a.module.cmp(&b.module));
169
170    FreshnessReport {
171        threshold_days,
172        stale_files,
173        total_files,
174        stale_pct,
175        by_module: module_rows,
176    }
177}
178
179fn build_coupling(
180    commits: &[tokmd_git::GitCommit],
181    row_map: &BTreeMap<String, (&FileRow, String)>,
182) -> Vec<CouplingRow> {
183    let mut pairs: BTreeMap<(&str, &str), usize> = BTreeMap::new();
184    let mut touches: BTreeMap<&str, usize> = BTreeMap::new();
185    let mut commits_considered: usize = 0;
186
187    for commit in commits {
188        let mut modules: BTreeSet<&str> = BTreeSet::new();
189        for file in &commit.files {
190            let key = normalize_git_path(file);
191            if let Some((_row, module)) = row_map.get(&key) {
192                modules.insert(module.as_str());
193            }
194        }
195        // Only count commits where at least one file maps to a module
196        if modules.is_empty() {
197            continue;
198        }
199        commits_considered += 1;
200        for m in &modules {
201            if let Some(val) = touches.get_mut(m) {
202                *val += 1;
203            } else {
204                touches.insert(*m, 1);
205            }
206        }
207        let modules: Vec<&str> = modules.into_iter().collect();
208        for i in 0..modules.len() {
209            let left = modules[i];
210            for right in modules.iter().skip(i + 1) {
211                let key = (left, *right);
212                *pairs.entry(key).or_insert(0) += 1;
213            }
214        }
215    }
216
217    let n = commits_considered;
218
219    let mut rows: Vec<CouplingRow> = pairs
220        .into_iter()
221        .map(|((left, right), count)| {
222            let n_a = touches.get(left).copied().unwrap_or(0);
223            let n_b = touches.get(right).copied().unwrap_or(0);
224            let denom = (n_a + n_b).saturating_sub(count);
225            let jaccard = if denom > 0 {
226                Some(round_f64(count as f64 / denom as f64, 4))
227            } else {
228                None
229            };
230            let lift = if n > 0 && n_a > 0 && n_b > 0 {
231                Some(round_f64(
232                    (count as f64 * n as f64) / (n_a as f64 * n_b as f64),
233                    4,
234                ))
235            } else {
236                None
237            };
238            CouplingRow {
239                left: left.to_string(),
240                right: right.to_string(),
241                count,
242                jaccard,
243                lift,
244                n_left: Some(n_a),
245                n_right: Some(n_b),
246            }
247        })
248        .collect();
249    rows.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.left.cmp(&b.left)));
250    rows
251}
252
253fn build_intent_report(
254    commits: &[tokmd_git::GitCommit],
255    row_map: &BTreeMap<String, (&FileRow, String)>,
256) -> CommitIntentReport {
257    let mut overall = CommitIntentCounts::default();
258    let mut by_module_counts: BTreeMap<String, CommitIntentCounts> = BTreeMap::new();
259
260    for commit in commits {
261        let kind = tokmd_git::classify_intent(&commit.subject);
262        overall.increment(kind);
263
264        // Attribute intent to all modules touched by this commit
265        let mut modules: BTreeSet<&str> = BTreeSet::new();
266        for file in &commit.files {
267            let key = normalize_git_path(file);
268            if let Some((_row, module)) = row_map.get(&key) {
269                modules.insert(module.as_str());
270            }
271        }
272        for module in modules {
273            by_module_counts
274                .entry(module.to_string())
275                .or_default()
276                .increment(kind);
277        }
278    }
279
280    let unknown_pct = if overall.total > 0 {
281        round_f64(overall.other as f64 / overall.total as f64, 4)
282    } else {
283        0.0
284    };
285
286    let corrective_ratio = if overall.total > 0 {
287        Some(round_f64(
288            (overall.fix + overall.revert) as f64 / overall.total as f64,
289            4,
290        ))
291    } else {
292        None
293    };
294
295    let mut by_module: Vec<ModuleIntentRow> = by_module_counts
296        .into_iter()
297        .map(|(module, counts)| ModuleIntentRow { module, counts })
298        .collect();
299    by_module.sort_by(|a, b| a.module.cmp(&b.module));
300
301    CommitIntentReport {
302        overall,
303        by_module,
304        unknown_pct,
305        corrective_ratio,
306    }
307}
308
309fn build_code_age_distribution(
310    last_change: &BTreeMap<String, i64>,
311    reference_ts: i64,
312    commits: &[tokmd_git::GitCommit],
313) -> CodeAgeDistributionReport {
314    let mut ages_days: Vec<usize> = last_change
315        .values()
316        .map(|ts| {
317            if reference_ts > *ts {
318                ((reference_ts - *ts) / SECONDS_PER_DAY) as usize
319            } else {
320                0
321            }
322        })
323        .collect();
324    ages_days.sort_unstable();
325
326    let buckets = vec![
327        ("0-30d", 0usize, Some(30usize)),
328        ("31-90d", 31usize, Some(90usize)),
329        ("91-180d", 91usize, Some(180usize)),
330        ("181-365d", 181usize, Some(365usize)),
331        ("366d+", 366usize, None),
332    ];
333
334    let mut counts = vec![0usize; buckets.len()];
335    for age in &ages_days {
336        for (idx, (_label, min_days, max_days)) in buckets.iter().enumerate() {
337            let in_range = if let Some(max_days) = max_days {
338                *age >= *min_days && *age <= *max_days
339            } else {
340                *age >= *min_days
341            };
342            if in_range {
343                counts[idx] += 1;
344                break;
345            }
346        }
347    }
348
349    let total_files = ages_days.len();
350    let bucket_rows: Vec<CodeAgeBucket> = buckets
351        .into_iter()
352        .zip(counts)
353        .map(|((label, min_days, max_days), files)| CodeAgeBucket {
354            label: label.to_string(),
355            min_days,
356            max_days,
357            files,
358            pct: if total_files == 0 {
359                0.0
360            } else {
361                round_f64(files as f64 / total_files as f64, 4)
362            },
363        })
364        .collect();
365
366    let tracked_paths: BTreeSet<String> = last_change.keys().cloned().collect();
367    let (recent_refreshes, prior_refreshes, refresh_trend) =
368        compute_refresh_trend(commits, reference_ts, &tracked_paths);
369
370    CodeAgeDistributionReport {
371        buckets: bucket_rows,
372        recent_refreshes,
373        prior_refreshes,
374        refresh_trend,
375    }
376}
377
378fn compute_refresh_trend(
379    commits: &[tokmd_git::GitCommit],
380    reference_ts: i64,
381    tracked_paths: &BTreeSet<String>,
382) -> (usize, usize, TrendClass) {
383    if commits.is_empty() || tracked_paths.is_empty() || reference_ts <= 0 {
384        return (0, 0, TrendClass::Flat);
385    }
386
387    let recent_start = reference_ts - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
388    let prior_start = recent_start - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
389
390    let mut recent_files: BTreeSet<String> = BTreeSet::new();
391    let mut prior_files: BTreeSet<String> = BTreeSet::new();
392
393    for commit in commits {
394        if commit.timestamp >= recent_start {
395            for file in &commit.files {
396                let normalized = normalize_git_path(file);
397                if tracked_paths.contains(&normalized) {
398                    recent_files.insert(normalized);
399                }
400            }
401        } else if commit.timestamp >= prior_start {
402            for file in &commit.files {
403                let normalized = normalize_git_path(file);
404                if tracked_paths.contains(&normalized) {
405                    prior_files.insert(normalized);
406                }
407            }
408        }
409    }
410
411    let recent = recent_files.len();
412    let prior = prior_files.len();
413    let trend = if prior == 0 {
414        if recent > 0 {
415            TrendClass::Rising
416        } else {
417            TrendClass::Flat
418        }
419    } else {
420        let delta_pct = (recent as f64 - prior as f64) / prior as f64;
421        if delta_pct > REFRESH_TREND_EPSILON {
422            TrendClass::Rising
423        } else if delta_pct < -REFRESH_TREND_EPSILON {
424            TrendClass::Falling
425        } else {
426            TrendClass::Flat
427        }
428    };
429
430    (recent, prior, trend)
431}
432
433fn normalize_git_path(path: &str) -> String {
434    let mut out = path.replace('\\', "/");
435    if let Some(stripped) = out.strip_prefix("./") {
436        out = stripped.to_string();
437    }
438    out
439}