Skip to main content

tokmd_analysis_git/
git.rs

1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use anyhow::Result;
5use tokmd_analysis_types::{
6    BusFactorRow, CodeAgeBucket, CodeAgeDistributionReport, CommitIntentCounts, CommitIntentReport,
7    CouplingRow, FreshnessReport, GitReport, HotspotRow, ModuleFreshnessRow, ModuleIntentRow,
8    TrendClass,
9};
10use tokmd_types::{ExportData, FileKind, FileRow};
11
12use tokmd_analysis_util::normalize_path;
13use tokmd_math::{percentile, round_f64};
14
15const SECONDS_PER_DAY: i64 = 86_400;
16const REFRESH_WINDOW_DAYS: i64 = 30;
17const REFRESH_TREND_EPSILON: f64 = 0.10;
18
19pub fn build_git_report(
20    repo_root: &Path,
21    export: &ExportData,
22    commits: &[tokmd_git::GitCommit],
23) -> Result<GitReport> {
24    let mut row_map: BTreeMap<String, (&FileRow, String)> = BTreeMap::new();
25    for row in export.rows.iter().filter(|r| r.kind == FileKind::Parent) {
26        let key = normalize_path(&row.path, repo_root);
27        row_map.insert(key, (row, row.module.clone()));
28    }
29
30    let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
31    let mut authors_by_module: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
32    let mut last_change: BTreeMap<String, i64> = BTreeMap::new();
33    let mut max_ts = 0i64;
34
35    for commit in commits {
36        max_ts = max_ts.max(commit.timestamp);
37        for file in &commit.files {
38            let key = normalize_git_path(file);
39            if let Some((row, module)) = row_map.get(&key) {
40                *commit_counts.entry(key.clone()).or_insert(0) += 1;
41                authors_by_module
42                    .entry(module.clone())
43                    .or_default()
44                    .insert(commit.author.clone());
45                last_change.entry(key.clone()).or_insert(commit.timestamp);
46                let _ = row;
47            }
48        }
49    }
50
51    let mut hotspots: Vec<HotspotRow> = commit_counts
52        .iter()
53        .filter_map(|(path, commits)| {
54            let (row, _) = row_map.get(path)?;
55            Some(HotspotRow {
56                path: path.clone(),
57                commits: *commits,
58                lines: row.lines,
59                score: row.lines * commits,
60            })
61        })
62        .collect();
63    hotspots.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
64
65    let mut bus_factor: Vec<BusFactorRow> = authors_by_module
66        .into_iter()
67        .map(|(module, authors)| BusFactorRow {
68            module,
69            authors: authors.len(),
70        })
71        .collect();
72    bus_factor.sort_by(|a, b| {
73        a.authors
74            .cmp(&b.authors)
75            .then_with(|| a.module.cmp(&b.module))
76    });
77
78    let freshness = build_freshness_report(&last_change, &row_map, max_ts);
79    let age_distribution = build_code_age_distribution(&last_change, max_ts, commits);
80
81    let coupling = build_coupling(commits, &row_map);
82    let intent = build_intent_report(commits, &row_map);
83
84    Ok(GitReport {
85        commits_scanned: commits.len(),
86        files_seen: commit_counts.len(),
87        hotspots,
88        bus_factor,
89        freshness,
90        coupling,
91        age_distribution: Some(age_distribution),
92        intent: Some(intent),
93    })
94}
95
96fn build_freshness_report(
97    last_change: &BTreeMap<String, i64>,
98    row_map: &BTreeMap<String, (&FileRow, String)>,
99    reference_ts: i64,
100) -> FreshnessReport {
101    let threshold_days = 365usize;
102    let mut stale_files = 0usize;
103    let mut total_files = 0usize;
104    let mut by_module: BTreeMap<String, Vec<usize>> = BTreeMap::new();
105
106    for (path, ts) in last_change {
107        let (_, module) = match row_map.get(path) {
108            Some(v) => v,
109            None => continue,
110        };
111        let days = if reference_ts > *ts {
112            ((reference_ts - *ts) / 86_400) as usize
113        } else {
114            0
115        };
116        total_files += 1;
117        if days > threshold_days {
118            stale_files += 1;
119        }
120        by_module.entry(module.clone()).or_default().push(days);
121    }
122
123    let stale_pct = if total_files == 0 {
124        0.0
125    } else {
126        round_f64(stale_files as f64 / total_files as f64, 4)
127    };
128
129    let mut module_rows: Vec<ModuleFreshnessRow> = Vec::new();
130    for (module, mut days) in by_module {
131        days.sort();
132        let avg = if days.is_empty() {
133            0.0
134        } else {
135            round_f64(days.iter().sum::<usize>() as f64 / days.len() as f64, 2)
136        };
137        let p90 = if days.is_empty() {
138            0.0
139        } else {
140            round_f64(percentile(&days, 0.90), 2)
141        };
142        let stale = days.iter().filter(|d| **d > threshold_days).count();
143        let pct = if days.is_empty() {
144            0.0
145        } else {
146            round_f64(stale as f64 / days.len() as f64, 4)
147        };
148        module_rows.push(ModuleFreshnessRow {
149            module,
150            avg_days: avg,
151            p90_days: p90,
152            stale_pct: pct,
153        });
154    }
155    module_rows.sort_by(|a, b| a.module.cmp(&b.module));
156
157    FreshnessReport {
158        threshold_days,
159        stale_files,
160        total_files,
161        stale_pct,
162        by_module: module_rows,
163    }
164}
165
166fn build_coupling(
167    commits: &[tokmd_git::GitCommit],
168    row_map: &BTreeMap<String, (&FileRow, String)>,
169) -> Vec<CouplingRow> {
170    let mut pairs: BTreeMap<(String, String), usize> = BTreeMap::new();
171    let mut touches: BTreeMap<String, usize> = BTreeMap::new();
172    let mut commits_considered: usize = 0;
173
174    for commit in commits {
175        let mut modules: BTreeSet<String> = BTreeSet::new();
176        for file in &commit.files {
177            let key = normalize_git_path(file);
178            if let Some((_row, module)) = row_map.get(&key) {
179                modules.insert(module.clone());
180            }
181        }
182        // Only count commits where at least one file maps to a module
183        if modules.is_empty() {
184            continue;
185        }
186        commits_considered += 1;
187        for m in &modules {
188            *touches.entry(m.clone()).or_insert(0) += 1;
189        }
190        let modules: Vec<String> = modules.into_iter().collect();
191        for i in 0..modules.len() {
192            for j in (i + 1)..modules.len() {
193                let left = modules[i].clone();
194                let right = modules[j].clone();
195                let key = if left <= right {
196                    (left, right)
197                } else {
198                    (right, left)
199                };
200                *pairs.entry(key).or_insert(0) += 1;
201            }
202        }
203    }
204
205    let n = commits_considered;
206
207    let mut rows: Vec<CouplingRow> = pairs
208        .into_iter()
209        .map(|((left, right), count)| {
210            let n_a = touches.get(&left).copied().unwrap_or(0);
211            let n_b = touches.get(&right).copied().unwrap_or(0);
212            let denom = (n_a + n_b).saturating_sub(count);
213            let jaccard = if denom > 0 {
214                Some(round_f64(count as f64 / denom as f64, 4))
215            } else {
216                None
217            };
218            let lift = if n > 0 && n_a > 0 && n_b > 0 {
219                Some(round_f64(
220                    (count as f64 * n as f64) / (n_a as f64 * n_b as f64),
221                    4,
222                ))
223            } else {
224                None
225            };
226            CouplingRow {
227                left,
228                right,
229                count,
230                jaccard,
231                lift,
232                n_left: Some(n_a),
233                n_right: Some(n_b),
234            }
235        })
236        .collect();
237    rows.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.left.cmp(&b.left)));
238    rows
239}
240
241fn build_intent_report(
242    commits: &[tokmd_git::GitCommit],
243    row_map: &BTreeMap<String, (&FileRow, String)>,
244) -> CommitIntentReport {
245    let mut overall = CommitIntentCounts::default();
246    let mut by_module_counts: BTreeMap<String, CommitIntentCounts> = BTreeMap::new();
247
248    for commit in commits {
249        let kind = tokmd_git::classify_intent(&commit.subject);
250        overall.increment(kind);
251
252        // Attribute intent to all modules touched by this commit
253        let mut modules: BTreeSet<String> = BTreeSet::new();
254        for file in &commit.files {
255            let key = normalize_git_path(file);
256            if let Some((_row, module)) = row_map.get(&key) {
257                modules.insert(module.clone());
258            }
259        }
260        for module in modules {
261            by_module_counts.entry(module).or_default().increment(kind);
262        }
263    }
264
265    let unknown_pct = if overall.total > 0 {
266        round_f64(overall.other as f64 / overall.total as f64, 4)
267    } else {
268        0.0
269    };
270
271    let corrective_ratio = if overall.total > 0 {
272        Some(round_f64(
273            (overall.fix + overall.revert) as f64 / overall.total as f64,
274            4,
275        ))
276    } else {
277        None
278    };
279
280    let mut by_module: Vec<ModuleIntentRow> = by_module_counts
281        .into_iter()
282        .map(|(module, counts)| ModuleIntentRow { module, counts })
283        .collect();
284    by_module.sort_by(|a, b| a.module.cmp(&b.module));
285
286    CommitIntentReport {
287        overall,
288        by_module,
289        unknown_pct,
290        corrective_ratio,
291    }
292}
293
294fn build_code_age_distribution(
295    last_change: &BTreeMap<String, i64>,
296    reference_ts: i64,
297    commits: &[tokmd_git::GitCommit],
298) -> CodeAgeDistributionReport {
299    let mut ages_days: Vec<usize> = last_change
300        .values()
301        .map(|ts| {
302            if reference_ts > *ts {
303                ((reference_ts - *ts) / SECONDS_PER_DAY) as usize
304            } else {
305                0
306            }
307        })
308        .collect();
309    ages_days.sort_unstable();
310
311    let buckets = vec![
312        ("0-30d", 0usize, Some(30usize)),
313        ("31-90d", 31usize, Some(90usize)),
314        ("91-180d", 91usize, Some(180usize)),
315        ("181-365d", 181usize, Some(365usize)),
316        ("366d+", 366usize, None),
317    ];
318
319    let mut counts = vec![0usize; buckets.len()];
320    for age in &ages_days {
321        for (idx, (_label, min_days, max_days)) in buckets.iter().enumerate() {
322            let in_range = if let Some(max_days) = max_days {
323                *age >= *min_days && *age <= *max_days
324            } else {
325                *age >= *min_days
326            };
327            if in_range {
328                counts[idx] += 1;
329                break;
330            }
331        }
332    }
333
334    let total_files = ages_days.len();
335    let bucket_rows: Vec<CodeAgeBucket> = buckets
336        .into_iter()
337        .zip(counts)
338        .map(|((label, min_days, max_days), files)| CodeAgeBucket {
339            label: label.to_string(),
340            min_days,
341            max_days,
342            files,
343            pct: if total_files == 0 {
344                0.0
345            } else {
346                round_f64(files as f64 / total_files as f64, 4)
347            },
348        })
349        .collect();
350
351    let tracked_paths: BTreeSet<String> = last_change.keys().cloned().collect();
352    let (recent_refreshes, prior_refreshes, refresh_trend) =
353        compute_refresh_trend(commits, reference_ts, &tracked_paths);
354
355    CodeAgeDistributionReport {
356        buckets: bucket_rows,
357        recent_refreshes,
358        prior_refreshes,
359        refresh_trend,
360    }
361}
362
363fn compute_refresh_trend(
364    commits: &[tokmd_git::GitCommit],
365    reference_ts: i64,
366    tracked_paths: &BTreeSet<String>,
367) -> (usize, usize, TrendClass) {
368    if commits.is_empty() || tracked_paths.is_empty() || reference_ts <= 0 {
369        return (0, 0, TrendClass::Flat);
370    }
371
372    let recent_start = reference_ts - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
373    let prior_start = recent_start - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
374
375    let mut recent_files: BTreeSet<String> = BTreeSet::new();
376    let mut prior_files: BTreeSet<String> = BTreeSet::new();
377
378    for commit in commits {
379        if commit.timestamp >= recent_start {
380            for file in &commit.files {
381                let normalized = normalize_git_path(file);
382                if tracked_paths.contains(&normalized) {
383                    recent_files.insert(normalized);
384                }
385            }
386        } else if commit.timestamp >= prior_start {
387            for file in &commit.files {
388                let normalized = normalize_git_path(file);
389                if tracked_paths.contains(&normalized) {
390                    prior_files.insert(normalized);
391                }
392            }
393        }
394    }
395
396    let recent = recent_files.len();
397    let prior = prior_files.len();
398    let trend = if prior == 0 {
399        if recent > 0 {
400            TrendClass::Rising
401        } else {
402            TrendClass::Flat
403        }
404    } else {
405        let delta_pct = (recent as f64 - prior as f64) / prior as f64;
406        if delta_pct > REFRESH_TREND_EPSILON {
407            TrendClass::Rising
408        } else if delta_pct < -REFRESH_TREND_EPSILON {
409            TrendClass::Falling
410        } else {
411            TrendClass::Flat
412        }
413    };
414
415    (recent, prior, trend)
416}
417
418fn normalize_git_path(path: &str) -> String {
419    let mut out = path.replace('\\', "/");
420    if let Some(stripped) = out.strip_prefix("./") {
421        out = stripped.to_string();
422    }
423    out
424}