1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use anyhow::Result;
5use tokmd_analysis_types::{
6 BusFactorRow, CodeAgeBucket, CodeAgeDistributionReport, CommitIntentCounts, CommitIntentReport,
7 CouplingRow, FreshnessReport, GitReport, HotspotRow, ModuleFreshnessRow, ModuleIntentRow,
8 TrendClass,
9};
10use tokmd_types::{ExportData, FileKind, FileRow};
11
12use tokmd_analysis_util::normalize_path;
13use tokmd_math::{percentile, round_f64};
14
15const SECONDS_PER_DAY: i64 = 86_400;
16const REFRESH_WINDOW_DAYS: i64 = 30;
17const REFRESH_TREND_EPSILON: f64 = 0.10;
18
19pub fn build_git_report(
20 repo_root: &Path,
21 export: &ExportData,
22 commits: &[tokmd_git::GitCommit],
23) -> Result<GitReport> {
24 let mut row_map: BTreeMap<String, (&FileRow, String)> = BTreeMap::new();
25 for row in export.rows.iter().filter(|r| r.kind == FileKind::Parent) {
26 let key = normalize_path(&row.path, repo_root);
27 row_map.insert(key, (row, row.module.clone()));
28 }
29
30 let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
31 let mut authors_by_module: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
32 let mut last_change: BTreeMap<String, i64> = BTreeMap::new();
33 let mut max_ts = 0i64;
34
35 for commit in commits {
36 max_ts = max_ts.max(commit.timestamp);
37 for file in &commit.files {
38 let key = normalize_git_path(file);
39 if let Some((row, module)) = row_map.get(&key) {
40 if let Some(val) = commit_counts.get_mut(&key) {
41 *val += 1;
42 } else {
43 commit_counts.insert(key.clone(), 1);
44 }
45 if let Some(val) = authors_by_module.get_mut(module) {
46 val.insert(commit.author.clone());
47 } else {
48 let mut set = BTreeSet::new();
49 set.insert(commit.author.clone());
50 authors_by_module.insert(module.clone(), set);
51 }
52 if !last_change.contains_key(&key) {
53 last_change.insert(key.clone(), commit.timestamp);
54 }
55 let _ = row;
56 }
57 }
58 }
59
60 let mut hotspots: Vec<HotspotRow> = commit_counts
61 .iter()
62 .filter_map(|(path, commits)| {
63 let (row, _) = row_map.get(path)?;
64 Some(HotspotRow {
65 path: path.clone(),
66 commits: *commits,
67 lines: row.lines,
68 score: row.lines * commits,
69 })
70 })
71 .collect();
72 hotspots.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
73
74 let mut bus_factor: Vec<BusFactorRow> = authors_by_module
75 .into_iter()
76 .map(|(module, authors)| BusFactorRow {
77 module,
78 authors: authors.len(),
79 })
80 .collect();
81 bus_factor.sort_by(|a, b| {
82 a.authors
83 .cmp(&b.authors)
84 .then_with(|| a.module.cmp(&b.module))
85 });
86
87 let freshness = build_freshness_report(&last_change, &row_map, max_ts);
88 let age_distribution = build_code_age_distribution(&last_change, max_ts, commits);
89
90 let coupling = build_coupling(commits, &row_map);
91 let intent = build_intent_report(commits, &row_map);
92
93 Ok(GitReport {
94 commits_scanned: commits.len(),
95 files_seen: commit_counts.len(),
96 hotspots,
97 bus_factor,
98 freshness,
99 coupling,
100 age_distribution: Some(age_distribution),
101 intent: Some(intent),
102 })
103}
104
105fn build_freshness_report(
106 last_change: &BTreeMap<String, i64>,
107 row_map: &BTreeMap<String, (&FileRow, String)>,
108 reference_ts: i64,
109) -> FreshnessReport {
110 let threshold_days = 365usize;
111 let mut stale_files = 0usize;
112 let mut total_files = 0usize;
113 let mut by_module: BTreeMap<String, Vec<usize>> = BTreeMap::new();
114
115 for (path, ts) in last_change {
116 let (_, module) = match row_map.get(path) {
117 Some(v) => v,
118 None => continue,
119 };
120 let days = if reference_ts > *ts {
121 ((reference_ts - *ts) / 86_400) as usize
122 } else {
123 0
124 };
125 total_files += 1;
126 if days > threshold_days {
127 stale_files += 1;
128 }
129 by_module.entry(module.clone()).or_default().push(days);
130 }
131
132 let stale_pct = if total_files == 0 {
133 0.0
134 } else {
135 round_f64(stale_files as f64 / total_files as f64, 4)
136 };
137
138 let mut module_rows: Vec<ModuleFreshnessRow> = Vec::new();
139 for (module, mut days) in by_module {
140 days.sort();
141 let avg = if days.is_empty() {
142 0.0
143 } else {
144 round_f64(days.iter().sum::<usize>() as f64 / days.len() as f64, 2)
145 };
146 let p90 = if days.is_empty() {
147 0.0
148 } else {
149 round_f64(percentile(&days, 0.90), 2)
150 };
151 let stale = days.iter().filter(|d| **d > threshold_days).count();
152 let pct = if days.is_empty() {
153 0.0
154 } else {
155 round_f64(stale as f64 / days.len() as f64, 4)
156 };
157 module_rows.push(ModuleFreshnessRow {
158 module,
159 avg_days: avg,
160 p90_days: p90,
161 stale_pct: pct,
162 });
163 }
164 module_rows.sort_by(|a, b| a.module.cmp(&b.module));
165
166 FreshnessReport {
167 threshold_days,
168 stale_files,
169 total_files,
170 stale_pct,
171 by_module: module_rows,
172 }
173}
174
175fn build_coupling(
176 commits: &[tokmd_git::GitCommit],
177 row_map: &BTreeMap<String, (&FileRow, String)>,
178) -> Vec<CouplingRow> {
179 let mut pairs: BTreeMap<(String, String), usize> = BTreeMap::new();
180 let mut touches: BTreeMap<String, usize> = BTreeMap::new();
181 let mut commits_considered: usize = 0;
182
183 for commit in commits {
184 let mut modules: BTreeSet<String> = BTreeSet::new();
185 for file in &commit.files {
186 let key = normalize_git_path(file);
187 if let Some((_row, module)) = row_map.get(&key) {
188 modules.insert(module.clone());
189 }
190 }
191 if modules.is_empty() {
193 continue;
194 }
195 commits_considered += 1;
196 for m in &modules {
197 if let Some(val) = touches.get_mut(m) {
198 *val += 1;
199 } else {
200 touches.insert(m.clone(), 1);
201 }
202 }
203 let modules: Vec<String> = modules.into_iter().collect();
204 for i in 0..modules.len() {
205 for j in (i + 1)..modules.len() {
206 let left = modules[i].clone();
207 let right = modules[j].clone();
208 let key = if left <= right {
209 (left, right)
210 } else {
211 (right, left)
212 };
213 *pairs.entry(key).or_insert(0) += 1;
214 }
215 }
216 }
217
218 let n = commits_considered;
219
220 let mut rows: Vec<CouplingRow> = pairs
221 .into_iter()
222 .map(|((left, right), count)| {
223 let n_a = touches.get(&left).copied().unwrap_or(0);
224 let n_b = touches.get(&right).copied().unwrap_or(0);
225 let denom = (n_a + n_b).saturating_sub(count);
226 let jaccard = if denom > 0 {
227 Some(round_f64(count as f64 / denom as f64, 4))
228 } else {
229 None
230 };
231 let lift = if n > 0 && n_a > 0 && n_b > 0 {
232 Some(round_f64(
233 (count as f64 * n as f64) / (n_a as f64 * n_b as f64),
234 4,
235 ))
236 } else {
237 None
238 };
239 CouplingRow {
240 left,
241 right,
242 count,
243 jaccard,
244 lift,
245 n_left: Some(n_a),
246 n_right: Some(n_b),
247 }
248 })
249 .collect();
250 rows.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.left.cmp(&b.left)));
251 rows
252}
253
254fn build_intent_report(
255 commits: &[tokmd_git::GitCommit],
256 row_map: &BTreeMap<String, (&FileRow, String)>,
257) -> CommitIntentReport {
258 let mut overall = CommitIntentCounts::default();
259 let mut by_module_counts: BTreeMap<String, CommitIntentCounts> = BTreeMap::new();
260
261 for commit in commits {
262 let kind = tokmd_git::classify_intent(&commit.subject);
263 overall.increment(kind);
264
265 let mut modules: BTreeSet<String> = BTreeSet::new();
267 for file in &commit.files {
268 let key = normalize_git_path(file);
269 if let Some((_row, module)) = row_map.get(&key) {
270 modules.insert(module.clone());
271 }
272 }
273 for module in modules {
274 by_module_counts.entry(module).or_default().increment(kind);
275 }
276 }
277
278 let unknown_pct = if overall.total > 0 {
279 round_f64(overall.other as f64 / overall.total as f64, 4)
280 } else {
281 0.0
282 };
283
284 let corrective_ratio = if overall.total > 0 {
285 Some(round_f64(
286 (overall.fix + overall.revert) as f64 / overall.total as f64,
287 4,
288 ))
289 } else {
290 None
291 };
292
293 let mut by_module: Vec<ModuleIntentRow> = by_module_counts
294 .into_iter()
295 .map(|(module, counts)| ModuleIntentRow { module, counts })
296 .collect();
297 by_module.sort_by(|a, b| a.module.cmp(&b.module));
298
299 CommitIntentReport {
300 overall,
301 by_module,
302 unknown_pct,
303 corrective_ratio,
304 }
305}
306
307fn build_code_age_distribution(
308 last_change: &BTreeMap<String, i64>,
309 reference_ts: i64,
310 commits: &[tokmd_git::GitCommit],
311) -> CodeAgeDistributionReport {
312 let mut ages_days: Vec<usize> = last_change
313 .values()
314 .map(|ts| {
315 if reference_ts > *ts {
316 ((reference_ts - *ts) / SECONDS_PER_DAY) as usize
317 } else {
318 0
319 }
320 })
321 .collect();
322 ages_days.sort_unstable();
323
324 let buckets = vec![
325 ("0-30d", 0usize, Some(30usize)),
326 ("31-90d", 31usize, Some(90usize)),
327 ("91-180d", 91usize, Some(180usize)),
328 ("181-365d", 181usize, Some(365usize)),
329 ("366d+", 366usize, None),
330 ];
331
332 let mut counts = vec![0usize; buckets.len()];
333 for age in &ages_days {
334 for (idx, (_label, min_days, max_days)) in buckets.iter().enumerate() {
335 let in_range = if let Some(max_days) = max_days {
336 *age >= *min_days && *age <= *max_days
337 } else {
338 *age >= *min_days
339 };
340 if in_range {
341 counts[idx] += 1;
342 break;
343 }
344 }
345 }
346
347 let total_files = ages_days.len();
348 let bucket_rows: Vec<CodeAgeBucket> = buckets
349 .into_iter()
350 .zip(counts)
351 .map(|((label, min_days, max_days), files)| CodeAgeBucket {
352 label: label.to_string(),
353 min_days,
354 max_days,
355 files,
356 pct: if total_files == 0 {
357 0.0
358 } else {
359 round_f64(files as f64 / total_files as f64, 4)
360 },
361 })
362 .collect();
363
364 let tracked_paths: BTreeSet<String> = last_change.keys().cloned().collect();
365 let (recent_refreshes, prior_refreshes, refresh_trend) =
366 compute_refresh_trend(commits, reference_ts, &tracked_paths);
367
368 CodeAgeDistributionReport {
369 buckets: bucket_rows,
370 recent_refreshes,
371 prior_refreshes,
372 refresh_trend,
373 }
374}
375
376fn compute_refresh_trend(
377 commits: &[tokmd_git::GitCommit],
378 reference_ts: i64,
379 tracked_paths: &BTreeSet<String>,
380) -> (usize, usize, TrendClass) {
381 if commits.is_empty() || tracked_paths.is_empty() || reference_ts <= 0 {
382 return (0, 0, TrendClass::Flat);
383 }
384
385 let recent_start = reference_ts - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
386 let prior_start = recent_start - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
387
388 let mut recent_files: BTreeSet<String> = BTreeSet::new();
389 let mut prior_files: BTreeSet<String> = BTreeSet::new();
390
391 for commit in commits {
392 if commit.timestamp >= recent_start {
393 for file in &commit.files {
394 let normalized = normalize_git_path(file);
395 if tracked_paths.contains(&normalized) {
396 recent_files.insert(normalized);
397 }
398 }
399 } else if commit.timestamp >= prior_start {
400 for file in &commit.files {
401 let normalized = normalize_git_path(file);
402 if tracked_paths.contains(&normalized) {
403 prior_files.insert(normalized);
404 }
405 }
406 }
407 }
408
409 let recent = recent_files.len();
410 let prior = prior_files.len();
411 let trend = if prior == 0 {
412 if recent > 0 {
413 TrendClass::Rising
414 } else {
415 TrendClass::Flat
416 }
417 } else {
418 let delta_pct = (recent as f64 - prior as f64) / prior as f64;
419 if delta_pct > REFRESH_TREND_EPSILON {
420 TrendClass::Rising
421 } else if delta_pct < -REFRESH_TREND_EPSILON {
422 TrendClass::Falling
423 } else {
424 TrendClass::Flat
425 }
426 };
427
428 (recent, prior, trend)
429}
430
431fn normalize_git_path(path: &str) -> String {
432 let mut out = path.replace('\\', "/");
433 if let Some(stripped) = out.strip_prefix("./") {
434 out = stripped.to_string();
435 }
436 out
437}