1use std::collections::{BTreeMap, BTreeSet};
2use std::path::Path;
3
4use anyhow::Result;
5use tokmd_analysis_types::{
6 BusFactorRow, CodeAgeBucket, CodeAgeDistributionReport, CommitIntentCounts, CommitIntentReport,
7 CouplingRow, FreshnessReport, GitReport, HotspotRow, ModuleFreshnessRow, ModuleIntentRow,
8 TrendClass,
9};
10use tokmd_types::{ExportData, FileKind, FileRow};
11
12use tokmd_analysis_util::normalize_path;
13use tokmd_math::{percentile, round_f64};
14
15const SECONDS_PER_DAY: i64 = 86_400;
16const REFRESH_WINDOW_DAYS: i64 = 30;
17const REFRESH_TREND_EPSILON: f64 = 0.10;
18
19pub fn build_git_report(
20 repo_root: &Path,
21 export: &ExportData,
22 commits: &[tokmd_git::GitCommit],
23) -> Result<GitReport> {
24 let mut row_map: BTreeMap<String, (&FileRow, String)> = BTreeMap::new();
25 for row in export.rows.iter().filter(|r| r.kind == FileKind::Parent) {
26 let key = normalize_path(&row.path, repo_root);
27 row_map.insert(key, (row, row.module.clone()));
28 }
29
30 let mut commit_counts: BTreeMap<String, usize> = BTreeMap::new();
31 let mut authors_by_module: BTreeMap<String, BTreeSet<String>> = BTreeMap::new();
32 let mut last_change: BTreeMap<String, i64> = BTreeMap::new();
33 let mut max_ts = 0i64;
34
35 for commit in commits {
36 max_ts = max_ts.max(commit.timestamp);
37 for file in &commit.files {
38 let key = normalize_git_path(file);
39 if let Some((row, module)) = row_map.get(&key) {
40 if let Some(val) = commit_counts.get_mut(&key) {
41 *val += 1;
42 } else {
43 commit_counts.insert(key.clone(), 1);
44 }
45 if let Some(val) = authors_by_module.get_mut(module) {
46 val.insert(commit.author.clone());
47 } else {
48 let mut set = BTreeSet::new();
49 set.insert(commit.author.clone());
50 authors_by_module.insert(module.clone(), set);
51 }
52 if !last_change.contains_key(&key) {
53 last_change.insert(key.clone(), commit.timestamp);
54 }
55 let _ = row;
56 }
57 }
58 }
59
60 let mut hotspots: Vec<HotspotRow> = commit_counts
61 .iter()
62 .filter_map(|(path, commits)| {
63 let (row, _) = row_map.get(path)?;
64 Some(HotspotRow {
65 path: path.clone(),
66 commits: *commits,
67 lines: row.lines,
68 score: row.lines * commits,
69 })
70 })
71 .collect();
72 hotspots.sort_by(|a, b| b.score.cmp(&a.score).then_with(|| a.path.cmp(&b.path)));
73
74 let mut bus_factor: Vec<BusFactorRow> = authors_by_module
75 .into_iter()
76 .map(|(module, authors)| BusFactorRow {
77 module,
78 authors: authors.len(),
79 })
80 .collect();
81 bus_factor.sort_by(|a, b| {
82 a.authors
83 .cmp(&b.authors)
84 .then_with(|| a.module.cmp(&b.module))
85 });
86
87 let freshness = build_freshness_report(&last_change, &row_map, max_ts);
88 let age_distribution = build_code_age_distribution(&last_change, max_ts, commits);
89
90 let coupling = build_coupling(commits, &row_map);
91 let intent = build_intent_report(commits, &row_map);
92
93 Ok(GitReport {
94 commits_scanned: commits.len(),
95 files_seen: commit_counts.len(),
96 hotspots,
97 bus_factor,
98 freshness,
99 coupling,
100 age_distribution: Some(age_distribution),
101 intent: Some(intent),
102 })
103}
104
105fn build_freshness_report(
106 last_change: &BTreeMap<String, i64>,
107 row_map: &BTreeMap<String, (&FileRow, String)>,
108 reference_ts: i64,
109) -> FreshnessReport {
110 let threshold_days = 365usize;
111 let mut stale_files = 0usize;
112 let mut total_files = 0usize;
113 let mut by_module: BTreeMap<String, Vec<usize>> = BTreeMap::new();
114
115 for (path, ts) in last_change {
116 let (_, module) = match row_map.get(path) {
117 Some(v) => v,
118 None => continue,
119 };
120 let days = if reference_ts > *ts {
121 ((reference_ts - *ts) / 86_400) as usize
122 } else {
123 0
124 };
125 total_files += 1;
126 if days > threshold_days {
127 stale_files += 1;
128 }
129 if let Some(list) = by_module.get_mut(module) {
130 list.push(days);
131 } else {
132 by_module.insert(module.clone(), vec![days]);
133 }
134 }
135
136 let stale_pct = if total_files == 0 {
137 0.0
138 } else {
139 round_f64(stale_files as f64 / total_files as f64, 4)
140 };
141
142 let mut module_rows: Vec<ModuleFreshnessRow> = Vec::new();
143 for (module, mut days) in by_module {
144 days.sort();
145 let avg = if days.is_empty() {
146 0.0
147 } else {
148 round_f64(days.iter().sum::<usize>() as f64 / days.len() as f64, 2)
149 };
150 let p90 = if days.is_empty() {
151 0.0
152 } else {
153 round_f64(percentile(&days, 0.90), 2)
154 };
155 let stale = days.iter().filter(|d| **d > threshold_days).count();
156 let pct = if days.is_empty() {
157 0.0
158 } else {
159 round_f64(stale as f64 / days.len() as f64, 4)
160 };
161 module_rows.push(ModuleFreshnessRow {
162 module,
163 avg_days: avg,
164 p90_days: p90,
165 stale_pct: pct,
166 });
167 }
168 module_rows.sort_by(|a, b| a.module.cmp(&b.module));
169
170 FreshnessReport {
171 threshold_days,
172 stale_files,
173 total_files,
174 stale_pct,
175 by_module: module_rows,
176 }
177}
178
179fn build_coupling(
180 commits: &[tokmd_git::GitCommit],
181 row_map: &BTreeMap<String, (&FileRow, String)>,
182) -> Vec<CouplingRow> {
183 let mut pairs: BTreeMap<(&str, &str), usize> = BTreeMap::new();
184 let mut touches: BTreeMap<&str, usize> = BTreeMap::new();
185 let mut commits_considered: usize = 0;
186
187 for commit in commits {
188 let mut modules: BTreeSet<&str> = BTreeSet::new();
189 for file in &commit.files {
190 let key = normalize_git_path(file);
191 if let Some((_row, module)) = row_map.get(&key) {
192 modules.insert(module.as_str());
193 }
194 }
195 if modules.is_empty() {
197 continue;
198 }
199 commits_considered += 1;
200 for m in &modules {
201 if let Some(val) = touches.get_mut(m) {
202 *val += 1;
203 } else {
204 touches.insert(*m, 1);
205 }
206 }
207 let modules: Vec<&str> = modules.into_iter().collect();
208 for i in 0..modules.len() {
209 let left = modules[i];
210 for right in modules.iter().skip(i + 1) {
211 let key = (left, *right);
212 *pairs.entry(key).or_insert(0) += 1;
213 }
214 }
215 }
216
217 let n = commits_considered;
218
219 let mut rows: Vec<CouplingRow> = pairs
220 .into_iter()
221 .map(|((left, right), count)| {
222 let n_a = touches.get(left).copied().unwrap_or(0);
223 let n_b = touches.get(right).copied().unwrap_or(0);
224 let denom = (n_a + n_b).saturating_sub(count);
225 let jaccard = if denom > 0 {
226 Some(round_f64(count as f64 / denom as f64, 4))
227 } else {
228 None
229 };
230 let lift = if n > 0 && n_a > 0 && n_b > 0 {
231 Some(round_f64(
232 (count as f64 * n as f64) / (n_a as f64 * n_b as f64),
233 4,
234 ))
235 } else {
236 None
237 };
238 CouplingRow {
239 left: left.to_string(),
240 right: right.to_string(),
241 count,
242 jaccard,
243 lift,
244 n_left: Some(n_a),
245 n_right: Some(n_b),
246 }
247 })
248 .collect();
249 rows.sort_by(|a, b| b.count.cmp(&a.count).then_with(|| a.left.cmp(&b.left)));
250 rows
251}
252
253fn build_intent_report(
254 commits: &[tokmd_git::GitCommit],
255 row_map: &BTreeMap<String, (&FileRow, String)>,
256) -> CommitIntentReport {
257 let mut overall = CommitIntentCounts::default();
258 let mut by_module_counts: BTreeMap<String, CommitIntentCounts> = BTreeMap::new();
259
260 for commit in commits {
261 let kind = tokmd_git::classify_intent(&commit.subject);
262 overall.increment(kind);
263
264 let mut modules: BTreeSet<&str> = BTreeSet::new();
266 for file in &commit.files {
267 let key = normalize_git_path(file);
268 if let Some((_row, module)) = row_map.get(&key) {
269 modules.insert(module.as_str());
270 }
271 }
272 for module in modules {
273 by_module_counts
274 .entry(module.to_string())
275 .or_default()
276 .increment(kind);
277 }
278 }
279
280 let unknown_pct = if overall.total > 0 {
281 round_f64(overall.other as f64 / overall.total as f64, 4)
282 } else {
283 0.0
284 };
285
286 let corrective_ratio = if overall.total > 0 {
287 Some(round_f64(
288 (overall.fix + overall.revert) as f64 / overall.total as f64,
289 4,
290 ))
291 } else {
292 None
293 };
294
295 let mut by_module: Vec<ModuleIntentRow> = by_module_counts
296 .into_iter()
297 .map(|(module, counts)| ModuleIntentRow { module, counts })
298 .collect();
299 by_module.sort_by(|a, b| a.module.cmp(&b.module));
300
301 CommitIntentReport {
302 overall,
303 by_module,
304 unknown_pct,
305 corrective_ratio,
306 }
307}
308
309fn build_code_age_distribution(
310 last_change: &BTreeMap<String, i64>,
311 reference_ts: i64,
312 commits: &[tokmd_git::GitCommit],
313) -> CodeAgeDistributionReport {
314 let mut ages_days: Vec<usize> = last_change
315 .values()
316 .map(|ts| {
317 if reference_ts > *ts {
318 ((reference_ts - *ts) / SECONDS_PER_DAY) as usize
319 } else {
320 0
321 }
322 })
323 .collect();
324 ages_days.sort_unstable();
325
326 let buckets = vec![
327 ("0-30d", 0usize, Some(30usize)),
328 ("31-90d", 31usize, Some(90usize)),
329 ("91-180d", 91usize, Some(180usize)),
330 ("181-365d", 181usize, Some(365usize)),
331 ("366d+", 366usize, None),
332 ];
333
334 let mut counts = vec![0usize; buckets.len()];
335 for age in &ages_days {
336 for (idx, (_label, min_days, max_days)) in buckets.iter().enumerate() {
337 let in_range = if let Some(max_days) = max_days {
338 *age >= *min_days && *age <= *max_days
339 } else {
340 *age >= *min_days
341 };
342 if in_range {
343 counts[idx] += 1;
344 break;
345 }
346 }
347 }
348
349 let total_files = ages_days.len();
350 let bucket_rows: Vec<CodeAgeBucket> = buckets
351 .into_iter()
352 .zip(counts)
353 .map(|((label, min_days, max_days), files)| CodeAgeBucket {
354 label: label.to_string(),
355 min_days,
356 max_days,
357 files,
358 pct: if total_files == 0 {
359 0.0
360 } else {
361 round_f64(files as f64 / total_files as f64, 4)
362 },
363 })
364 .collect();
365
366 let tracked_paths: BTreeSet<String> = last_change.keys().cloned().collect();
367 let (recent_refreshes, prior_refreshes, refresh_trend) =
368 compute_refresh_trend(commits, reference_ts, &tracked_paths);
369
370 CodeAgeDistributionReport {
371 buckets: bucket_rows,
372 recent_refreshes,
373 prior_refreshes,
374 refresh_trend,
375 }
376}
377
378fn compute_refresh_trend(
379 commits: &[tokmd_git::GitCommit],
380 reference_ts: i64,
381 tracked_paths: &BTreeSet<String>,
382) -> (usize, usize, TrendClass) {
383 if commits.is_empty() || tracked_paths.is_empty() || reference_ts <= 0 {
384 return (0, 0, TrendClass::Flat);
385 }
386
387 let recent_start = reference_ts - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
388 let prior_start = recent_start - REFRESH_WINDOW_DAYS * SECONDS_PER_DAY;
389
390 let mut recent_files: BTreeSet<String> = BTreeSet::new();
391 let mut prior_files: BTreeSet<String> = BTreeSet::new();
392
393 for commit in commits {
394 if commit.timestamp >= recent_start {
395 for file in &commit.files {
396 let normalized = normalize_git_path(file);
397 if tracked_paths.contains(&normalized) {
398 recent_files.insert(normalized);
399 }
400 }
401 } else if commit.timestamp >= prior_start {
402 for file in &commit.files {
403 let normalized = normalize_git_path(file);
404 if tracked_paths.contains(&normalized) {
405 prior_files.insert(normalized);
406 }
407 }
408 }
409 }
410
411 let recent = recent_files.len();
412 let prior = prior_files.len();
413 let trend = if prior == 0 {
414 if recent > 0 {
415 TrendClass::Rising
416 } else {
417 TrendClass::Flat
418 }
419 } else {
420 let delta_pct = (recent as f64 - prior as f64) / prior as f64;
421 if delta_pct > REFRESH_TREND_EPSILON {
422 TrendClass::Rising
423 } else if delta_pct < -REFRESH_TREND_EPSILON {
424 TrendClass::Falling
425 } else {
426 TrendClass::Flat
427 }
428 };
429
430 (recent, prior, trend)
431}
432
433fn normalize_git_path(path: &str) -> String {
434 let mut out = path.replace('\\', "/");
435 if let Some(stripped) = out.strip_prefix("./") {
436 out = stripped.to_string();
437 }
438 out
439}