1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use anyhow::Result;
5use tokmd_analysis_types::{
6 BusFactorRow, CodeAgeBucket, CodeAgeDistributionReport, CommitIntentCounts, CommitIntentReport,
7 CouplingRow, FreshnessReport, GitReport, HotspotRow, ModuleFreshnessRow, ModuleIntentRow,
8 TrendClass,
9};
10use tokmd_types::{ExportData, FileKind, FileRow};
11
12use tokmd_analysis_util::normalize_path;
13use tokmd_math::{percentile, round_f64};
14
15const SECONDS_PER_DAY: i64 = 86_400;
16const REFRESH_WINDOW_DAYS: i64 = 30;
17const REFRESH_TREND_EPSILON: f64 = 0.10;
18
19pub fn build_git_report(
20 repo_root: &Path,
21 export: &ExportData,
22 commits: &[tokmd_git::GitCommit],
23) -> Result<GitReport> {
24 let mut row_map: BTreeMap<String, (&FileRow, String)> = BTreeMap::new();
25 for row in export.rows.iter().filter(|r| r.kind == FileKind::Parent) {
26 let key = normalize_path(&row.path, repo_root);
27 row_map.insert(key, (row, row.module.clone()));
28 }
29
30 let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
31 let mut authors_by_module: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
32 let mut last_change: BTreeMap<String, i64> = BTreeMap::new();
33 let mut max_ts = 0i64;
34
35 for commit in commits {
36 max_ts = max_ts.max(commit.timestamp);
37 for file in &commit.files {
38 let key = normalize_git_path(file);
39 if let Some((row, module)) = row_map.get(&key) {
40 *commit_counts.entry(key.clone()).or_insert(0) += 1;
41 authors_by_module
42 .entry(module.clone())
43 .or_default()
44 .insert(commit.author.clone());
45 last_change.entry(key.clone()).or_insert(commit.timestamp);
46 let _ = row;
47 }
48 }
49 }
50
51 let mut hotspots: Vec<HotspotRow> = commit_counts
52 .iter()
53 .filter_map(|(path, commits)| {
54 let (row, _) = row_map.get(path)?;
55 Some(HotspotRow {
56 path: path.clone(),
57 commits: *commits,
58 lines: row.lines,
59 score: row.lines * commits,
60 })
61 })
62 .collect();
63 hotspots.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
64
65 let mut bus_factor: Vec<BusFactorRow> = authors_by_module
66 .into_iter()
67 .map(|(module, authors)| BusFactorRow {
68 module,
69 authors: authors.len(),
70 })
71 .collect();
72 bus_factor.sort_by(|a, b| {
73 a.authors
74 .cmp(&b.authors)
75 .then_with(|| a.module.cmp(&b.module))
76 });
77
78 let freshness = build_freshness_report(&last_change, &row_map, max_ts);
79 let age_distribution = build_code_age_distribution(&last_change, max_ts, commits);
80
81 let coupling = build_coupling(commits, &row_map);
82 let intent = build_intent_report(commits, &row_map);
83
84 Ok(GitReport {
85 commits_scanned: commits.len(),
86 files_seen: commit_counts.len(),
87 hotspots,
88 bus_factor,
89 freshness,
90 coupling,
91 age_distribution: Some(age_distribution),
92 intent: Some(intent),
93 })
94}
95
96fn build_freshness_report(
97 last_change: &BTreeMap<String, i64>,
98 row_map: &BTreeMap<String, (&FileRow, String)>,
99 reference_ts: i64,
100) -> FreshnessReport {
101 let threshold_days = 365usize;
102 let mut stale_files = 0usize;
103 let mut total_files = 0usize;
104 let mut by_module: BTreeMap<String, Vec<usize>> = BTreeMap::new();
105
106 for (path, ts) in last_change {
107 let (_, module) = match row_map.get(path) {
108 Some(v) => v,
109 None => continue,
110 };
111 let days = if reference_ts > *ts {
112 ((reference_ts - *ts) / 86_400) as usize
113 } else {
114 0
115 };
116 total_files += 1;
117 if days > threshold_days {
118 stale_files += 1;
119 }
120 by_module.entry(module.clone()).or_default().push(days);
121 }
122
123 let stale_pct = if total_files == 0 {
124 0.0
125 } else {
126 round_f64(stale_files as f64 / total_files as f64, 4)
127 };
128
129 let mut module_rows: Vec<ModuleFreshnessRow> = Vec::new();
130 for (module, mut days) in by_module {
131 days.sort();
132 let avg = if days.is_empty() {
133 0.0
134 } else {
135 round_f64(days.iter().sum::<usize>() as f64 / days.len() as f64, 2)
136 };
137 let p90 = if days.is_empty() {
138 0.0
139 } else {
140 round_f64(percentile(&days, 0.90), 2)
141 };
142 let stale = days.iter().filter(|d| **d > threshold_days).count();
143 let pct = if days.is_empty() {
144 0.0
145 } else {
146 round_f64(stale as f64 / days.len() as f64, 4)
147 };
148 module_rows.push(ModuleFreshnessRow {
149 module,
150 avg_days: avg,
151 p90_days: p90,
152 stale_pct: pct,
153 });
154 }
155 module_rows.sort_by(|a, b| a.module.cmp(&b.module));
156
157 FreshnessReport {
158 threshold_days,
159 stale_files,
160 total_files,
161 stale_pct,
162 by_module: module_rows,
163 }
164}
165
166fn build_coupling(
167 commits: &[tokmd_git::GitCommit],
168 row_map: &BTreeMap<String, (&FileRow, String)>,
169) -> Vec<CouplingRow> {
170 let mut pairs: BTreeMap<(String, String), usize> = BTreeMap::new();
171 let mut touches: BTreeMap<String, usize> = BTreeMap::new();
172 let mut commits_considered: usize = 0;
173
174 for commit in commits {
175 let mut modules: BTreeSet<String> = BTreeSet::new();
176 for file in &commit.files {
177 let key = normalize_git_path(file);
178 if let Some((_row, module)) = row_map.get(&key) {
179 modules.insert(module.clone());
180 }
181 }
182 if modules.is_empty() {
184 continue;
185 }
186 commits_considered += 1;
187 for m in &modules {
188 *touches.entry(m.clone()).or_insert(0) += 1;
189 }
190 let modules: Vec<String> = modules.into_iter().collect();
191 for i in 0..modules.len() {
192 for j in (i + 1)..modules.len() {
193 let left = modules[i].clone();
194 let right = modules[j].clone();
195 let key = if left <= right {
196 (left, right)
197 } else {
198 (right, left)
199 };
200 *pairs.entry(key).or_insert(0) += 1;
201 }
202 }
203 }
204
205 let n = commits_considered;
206
207 let mut rows: Vec<CouplingRow> = pairs
208 .into_iter()
209 .map(|((left, right), count)| {
210 let n_a = touches.get(&left).copied().unwrap_or(0);
211 let n_b = touches.get(&right).copied().unwrap_or(0);
212 let denom = (n_a + n_b).saturating_sub(count);
213 let jaccard = if denom > 0 {
214 Some(round_f64(count as f64 / denom as f64, 4))
215 } else {
216 None
217 };
218 let lift = if n > 0 && n_a > 0 && n_b > 0 {
219 Some(round_f64(
220 (count as f64 * n as f64) / (n_a as f64 * n_b as f64),
221 4,
222 ))
223 } else {
224 None
225 };
226 CouplingRow {
227 left,
228 right,
229 count,
230 jaccard,
231 lift,
232 n_left: Some(n_a),
233 n_right: Some(n_b),
234 }
235 })
236 .collect();
237 rows.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.left.cmp(&b.left)));
238 rows
239}
240
241fn build_intent_report(
242 commits: &[tokmd_git::GitCommit],
243 row_map: &BTreeMap<String, (&FileRow, String)>,
244) -> CommitIntentReport {
245 let mut overall = CommitIntentCounts::default();
246 let mut by_module_counts: BTreeMap<String, CommitIntentCounts> = BTreeMap::new();
247
248 for commit in commits {
249 let kind = tokmd_git::classify_intent(&commit.subject);
250 overall.increment(kind);
251
252 let mut modules: BTreeSet<String> = BTreeSet::new();
254 for file in &commit.files {
255 let key = normalize_git_path(file);
256 if let Some((_row, module)) = row_map.get(&key) {
257 modules.insert(module.clone());
258 }
259 }
260 for module in modules {
261 by_module_counts.entry(module).or_default().increment(kind);
262 }
263 }
264
265 let unknown_pct = if overall.total > 0 {
266 round_f64(overall.other as f64 / overall.total as f64, 4)
267 } else {
268 0.0
269 };
270
271 let corrective_ratio = if overall.total > 0 {
272 Some(round_f64(
273 (overall.fix + overall.revert) as f64 / overall.total as f64,
274 4,
275 ))
276 } else {
277 None
278 };
279
280 let mut by_module: Vec<ModuleIntentRow> = by_module_counts
281 .into_iter()
282 .map(|(module, counts)| ModuleIntentRow { module, counts })
283 .collect();
284 by_module.sort_by(|a, b| a.module.cmp(&b.module));
285
286 CommitIntentReport {
287 overall,
288 by_module,
289 unknown_pct,
290 corrective_ratio,
291 }
292}
293
294fn build_code_age_distribution(
295 last_change: &BTreeMap<String, i64>,
296 reference_ts: i64,
297 commits: &[tokmd_git::GitCommit],
298) -> CodeAgeDistributionReport {
299 let mut ages_days: Vec<usize> = last_change
300 .values()
301 .map(|ts| {
302 if reference_ts > *ts {
303 ((reference_ts - *ts) / SECONDS_PER_DAY) as usize
304 } else {
305 0
306 }
307 })
308 .collect();
309 ages_days.sort_unstable();
310
311 let buckets = vec![
312 ("0-30d", 0usize, Some(30usize)),
313 ("31-90d", 31usize, Some(90usize)),
314 ("91-180d", 91usize, Some(180usize)),
315 ("181-365d", 181usize, Some(365usize)),
316 ("366d+", 366usize, None),
317 ];
318
319 let mut counts = vec![0usize; buckets.len()];
320 for age in &ages_days {
321 for (idx, (_label, min_days, max_days)) in buckets.iter().enumerate() {
322 let in_range = if let Some(max_days) = max_days {
323 *age >= *min_days && *age <= *max_days
324 } else {
325 *age >= *min_days
326 };
327 if in_range {
328 counts[idx] += 1;
329 break;
330 }
331 }
332 }
333
334 let total_files = ages_days.len();
335 let bucket_rows: Vec<CodeAgeBucket> = buckets
336 .into_iter()
337 .zip(counts)
338 .map(|((label, min_days, max_days), files)| CodeAgeBucket {
339 label: label.to_string(),
340 min_days,
341 max_days,
342 files,
343 pct: if total_files == 0 {
344 0.0
345 } else {
346 round_f64(files as f64 / total_files as f64, 4)
347 },
348 })
349 .collect();
350
351 let tracked_paths: BTreeSet<String> = last_change.keys().cloned().collect();
352 let (recent_refreshes, prior_refreshes, refresh_trend) =
353 compute_refresh_trend(commits, reference_ts, &tracked_paths);
354
355 CodeAgeDistributionReport {
356 buckets: bucket_rows,
357 recent_refreshes,
358 prior_refreshes,
359 refresh_trend,
360 }
361}
362
363fn compute_refresh_trend(
364 commits: &[tokmd_git::GitCommit],
365 reference_ts: i64,
366 tracked_paths: &BTreeSet<String>,
367) -> (usize, usize, TrendClass) {
368 if commits.is_empty() || tracked_paths.is_empty() || reference_ts <= 0 {
369 return (0, 0, TrendClass::Flat);
370 }
371
372 let recent_start = reference_ts - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
373 let prior_start = recent_start - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
374
375 let mut recent_files: BTreeSet<String> = BTreeSet::new();
376 let mut prior_files: BTreeSet<String> = BTreeSet::new();
377
378 for commit in commits {
379 if commit.timestamp >= recent_start {
380 for file in &commit.files {
381 let normalized = normalize_git_path(file);
382 if tracked_paths.contains(&normalized) {
383 recent_files.insert(normalized);
384 }
385 }
386 } else if commit.timestamp >= prior_start {
387 for file in &commit.files {
388 let normalized = normalize_git_path(file);
389 if tracked_paths.contains(&normalized) {
390 prior_files.insert(normalized);
391 }
392 }
393 }
394 }
395
396 let recent = recent_files.len();
397 let prior = prior_files.len();
398 let trend = if prior == 0 {
399 if recent > 0 {
400 TrendClass::Rising
401 } else {
402 TrendClass::Flat
403 }
404 } else {
405 let delta_pct = (recent as f64 - prior as f64) / prior as f64;
406 if delta_pct > REFRESH_TREND_EPSILON {
407 TrendClass::Rising
408 } else if delta_pct < -REFRESH_TREND_EPSILON {
409 TrendClass::Falling
410 } else {
411 TrendClass::Flat
412 }
413 };
414
415 (recent, prior, trend)
416}
417
418fn normalize_git_path(path: &str) -> String {
419 let mut out = path.replace('\\', "/");
420 if let Some(stripped) = out.strip_prefix("./") {
421 out = stripped.to_string();
422 }
423 out
424}