Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use serde::Serialize;
11
12/// Number of seconds in one day.
13const SECS_PER_DAY: f64 = 86_400.0;
14
15/// Recency weight half-life in days. A commit from 90 days ago counts half
16/// as much as today's commit; 180 days ago counts 25%.
17const HALF_LIFE_DAYS: f64 = 90.0;
18
19/// Parsed duration for the `--since` flag.
20#[derive(Debug, Clone)]
21pub struct SinceDuration {
22    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
23    pub git_after: String,
24    /// Human-readable display string (e.g., `"6 months"`).
25    pub display: String,
26}
27
28/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
30#[serde(rename_all = "snake_case")]
31pub enum ChurnTrend {
32    /// Recent half has >1.5× the commits of the older half.
33    Accelerating,
34    /// Churn is roughly stable between halves.
35    Stable,
36    /// Recent half has <0.67× the commits of the older half.
37    Cooling,
38}
39
40impl std::fmt::Display for ChurnTrend {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            Self::Accelerating => write!(f, "accelerating"),
44            Self::Stable => write!(f, "stable"),
45            Self::Cooling => write!(f, "cooling"),
46        }
47    }
48}
49
50/// Per-file churn data collected from git history.
51#[derive(Debug, Clone)]
52pub struct FileChurn {
53    /// Absolute file path.
54    pub path: PathBuf,
55    /// Total number of commits touching this file in the analysis window.
56    pub commits: u32,
57    /// Recency-weighted commit count (exponential decay, half-life 90 days).
58    pub weighted_commits: f64,
59    /// Total lines added across all commits.
60    pub lines_added: u32,
61    /// Total lines deleted across all commits.
62    pub lines_deleted: u32,
63    /// Churn trend: accelerating, stable, or cooling.
64    pub trend: ChurnTrend,
65}
66
67/// Result of churn analysis.
68pub struct ChurnResult {
69    /// Per-file churn data, keyed by absolute path.
70    pub files: FxHashMap<PathBuf, FileChurn>,
71    /// Whether the repository is a shallow clone.
72    pub shallow_clone: bool,
73}
74
75/// Parse a `--since` value into a git-compatible duration.
76///
77/// Accepts:
78/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
79/// - ISO dates: `2025-06-01`
80///
81/// # Errors
82///
83/// Returns an error if the input is not a recognized duration format or ISO date,
84/// the numeric part is invalid, or the duration is zero.
85pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
86    // Try ISO date first (YYYY-MM-DD)
87    if is_iso_date(input) {
88        return Ok(SinceDuration {
89            git_after: input.to_string(),
90            display: input.to_string(),
91        });
92    }
93
94    // Parse duration: number + unit
95    let (num_str, unit) = split_number_unit(input)?;
96    let num: u64 = num_str
97        .parse()
98        .map_err(|_| format!("invalid number in --since: {input}"))?;
99
100    if num == 0 {
101        return Err("--since duration must be greater than 0".to_string());
102    }
103
104    match unit {
105        "d" | "day" | "days" => {
106            let s = if num == 1 { "" } else { "s" };
107            Ok(SinceDuration {
108                git_after: format!("{num} day{s} ago"),
109                display: format!("{num} day{s}"),
110            })
111        }
112        "w" | "week" | "weeks" => {
113            let s = if num == 1 { "" } else { "s" };
114            Ok(SinceDuration {
115                git_after: format!("{num} week{s} ago"),
116                display: format!("{num} week{s}"),
117            })
118        }
119        "m" | "month" | "months" => {
120            let s = if num == 1 { "" } else { "s" };
121            Ok(SinceDuration {
122                git_after: format!("{num} month{s} ago"),
123                display: format!("{num} month{s}"),
124            })
125        }
126        "y" | "year" | "years" => {
127            let s = if num == 1 { "" } else { "s" };
128            Ok(SinceDuration {
129                git_after: format!("{num} year{s} ago"),
130                display: format!("{num} year{s}"),
131            })
132        }
133        _ => Err(format!(
134            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
135        )),
136    }
137}
138
139/// Analyze git churn for files in the given root directory.
140///
141/// Returns `None` if git is not available or the directory is not a git repository.
142pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
143    let shallow = is_shallow_clone(root);
144
145    let output = Command::new("git")
146        .args([
147            "log",
148            "--numstat",
149            "--no-merges",
150            "--no-renames",
151            "--format=format:%at",
152            &format!("--after={}", since.git_after),
153        ])
154        .current_dir(root)
155        .output();
156
157    let output = match output {
158        Ok(o) => o,
159        Err(e) => {
160            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
161            return None;
162        }
163    };
164
165    if !output.status.success() {
166        let stderr = String::from_utf8_lossy(&output.stderr);
167        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
168        return None;
169    }
170
171    let stdout = String::from_utf8_lossy(&output.stdout);
172    let files = parse_git_log(&stdout, root);
173
174    Some(ChurnResult {
175        files,
176        shallow_clone: shallow,
177    })
178}
179
180/// Check if the repository is a shallow clone.
181#[must_use]
182pub fn is_shallow_clone(root: &Path) -> bool {
183    Command::new("git")
184        .args(["rev-parse", "--is-shallow-repository"])
185        .current_dir(root)
186        .output()
187        .map(|o| {
188            String::from_utf8_lossy(&o.stdout)
189                .trim()
190                .eq_ignore_ascii_case("true")
191        })
192        .unwrap_or(false)
193}
194
195/// Check if the directory is inside a git repository.
196#[must_use]
197pub fn is_git_repo(root: &Path) -> bool {
198    Command::new("git")
199        .args(["rev-parse", "--git-dir"])
200        .current_dir(root)
201        .stdout(std::process::Stdio::null())
202        .stderr(std::process::Stdio::null())
203        .status()
204        .map(|s| s.success())
205        .unwrap_or(false)
206}
207
208// ── Internal ──────────────────────────────────────────────────────
209
210/// Intermediate per-file accumulator during git log parsing.
211struct FileAccum {
212    /// Commit timestamps (epoch seconds) for trend computation.
213    commit_timestamps: Vec<u64>,
214    /// Recency-weighted commit sum.
215    weighted_commits: f64,
216    lines_added: u32,
217    lines_deleted: u32,
218}
219
220/// Parse `git log --numstat --format=format:%at` output.
221#[expect(
222    clippy::cast_possible_truncation,
223    reason = "commit count per file is bounded by git history depth"
224)]
225fn parse_git_log(stdout: &str, root: &Path) -> FxHashMap<PathBuf, FileChurn> {
226    let now_secs = std::time::SystemTime::now()
227        .duration_since(std::time::UNIX_EPOCH)
228        .unwrap_or_default()
229        .as_secs();
230
231    let mut accum: FxHashMap<PathBuf, FileAccum> = FxHashMap::default();
232    let mut current_timestamp: Option<u64> = None;
233
234    for line in stdout.lines() {
235        let line = line.trim();
236        if line.is_empty() {
237            continue;
238        }
239
240        // Try to parse as epoch timestamp (from %at format)
241        if let Ok(ts) = line.parse::<u64>() {
242            current_timestamp = Some(ts);
243            continue;
244        }
245
246        // Try to parse as numstat line: "10\t5\tpath/to/file"
247        if let Some((added, deleted, path)) = parse_numstat_line(line) {
248            let abs_path = root.join(path);
249            let ts = current_timestamp.unwrap_or(now_secs);
250            let age_days = (now_secs.saturating_sub(ts)) as f64 / SECS_PER_DAY;
251            let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
252
253            let entry = accum.entry(abs_path).or_insert_with(|| FileAccum {
254                commit_timestamps: Vec::new(),
255                weighted_commits: 0.0,
256                lines_added: 0,
257                lines_deleted: 0,
258            });
259            entry.commit_timestamps.push(ts);
260            entry.weighted_commits += weight;
261            entry.lines_added += added;
262            entry.lines_deleted += deleted;
263        }
264    }
265
266    // Convert accumulators to FileChurn with trend computation
267    accum
268        .into_iter()
269        .map(|(path, acc)| {
270            let commits = acc.commit_timestamps.len() as u32;
271            let trend = compute_trend(&acc.commit_timestamps);
272            let churn = FileChurn {
273                path: path.clone(),
274                commits,
275                weighted_commits: (acc.weighted_commits * 100.0).round() / 100.0,
276                lines_added: acc.lines_added,
277                lines_deleted: acc.lines_deleted,
278                trend,
279            };
280            (path, churn)
281        })
282        .collect()
283}
284
285/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
286/// Binary files show as `"-\t-\tpath"` — skip those.
287fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
288    let mut parts = line.splitn(3, '\t');
289    let added_str = parts.next()?;
290    let deleted_str = parts.next()?;
291    let path = parts.next()?;
292
293    // Binary files show "-" for added/deleted — skip them
294    let added: u32 = added_str.parse().ok()?;
295    let deleted: u32 = deleted_str.parse().ok()?;
296
297    Some((added, deleted, path))
298}
299
300/// Compute churn trend by splitting commits into two temporal halves.
301///
302/// Finds the midpoint between the oldest and newest commit timestamps,
303/// then compares commit counts in each half:
304/// - Recent > 1.5× older → Accelerating
305/// - Recent < 0.67× older → Cooling
306/// - Otherwise → Stable
307fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
308    if timestamps.len() < 2 {
309        return ChurnTrend::Stable;
310    }
311
312    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
313    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
314
315    if max_ts == min_ts {
316        return ChurnTrend::Stable;
317    }
318
319    let midpoint = min_ts + (max_ts - min_ts) / 2;
320    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
321    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
322
323    if older < 1.0 {
324        return ChurnTrend::Stable;
325    }
326
327    let ratio = recent / older;
328    if ratio > 1.5 {
329        ChurnTrend::Accelerating
330    } else if ratio < 0.67 {
331        ChurnTrend::Cooling
332    } else {
333        ChurnTrend::Stable
334    }
335}
336
337fn is_iso_date(input: &str) -> bool {
338    input.len() == 10
339        && input.as_bytes().get(4) == Some(&b'-')
340        && input.as_bytes().get(7) == Some(&b'-')
341        && input[..4].bytes().all(|b| b.is_ascii_digit())
342        && input[5..7].bytes().all(|b| b.is_ascii_digit())
343        && input[8..10].bytes().all(|b| b.is_ascii_digit())
344}
345
346fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
347    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
348        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
349    })?;
350    if pos == 0 {
351        return Err(format!(
352            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
353        ));
354    }
355    Ok((&input[..pos], &input[pos..]))
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361
362    // ── parse_since ──────────────────────────────────────────────
363
364    #[test]
365    fn parse_since_months_short() {
366        let d = parse_since("6m").unwrap();
367        assert_eq!(d.git_after, "6 months ago");
368        assert_eq!(d.display, "6 months");
369    }
370
371    #[test]
372    fn parse_since_months_long() {
373        let d = parse_since("6months").unwrap();
374        assert_eq!(d.git_after, "6 months ago");
375        assert_eq!(d.display, "6 months");
376    }
377
378    #[test]
379    fn parse_since_days() {
380        let d = parse_since("90d").unwrap();
381        assert_eq!(d.git_after, "90 days ago");
382        assert_eq!(d.display, "90 days");
383    }
384
385    #[test]
386    fn parse_since_year_singular() {
387        let d = parse_since("1y").unwrap();
388        assert_eq!(d.git_after, "1 year ago");
389        assert_eq!(d.display, "1 year");
390    }
391
392    #[test]
393    fn parse_since_years_plural() {
394        let d = parse_since("2years").unwrap();
395        assert_eq!(d.git_after, "2 years ago");
396        assert_eq!(d.display, "2 years");
397    }
398
399    #[test]
400    fn parse_since_weeks() {
401        let d = parse_since("2w").unwrap();
402        assert_eq!(d.git_after, "2 weeks ago");
403        assert_eq!(d.display, "2 weeks");
404    }
405
406    #[test]
407    fn parse_since_iso_date() {
408        let d = parse_since("2025-06-01").unwrap();
409        assert_eq!(d.git_after, "2025-06-01");
410        assert_eq!(d.display, "2025-06-01");
411    }
412
413    #[test]
414    fn parse_since_month_singular() {
415        let d = parse_since("1month").unwrap();
416        assert_eq!(d.display, "1 month");
417    }
418
419    #[test]
420    fn parse_since_day_singular() {
421        let d = parse_since("1day").unwrap();
422        assert_eq!(d.display, "1 day");
423    }
424
425    #[test]
426    fn parse_since_zero_rejected() {
427        assert!(parse_since("0m").is_err());
428    }
429
430    #[test]
431    fn parse_since_no_unit_rejected() {
432        assert!(parse_since("90").is_err());
433    }
434
435    #[test]
436    fn parse_since_unknown_unit_rejected() {
437        assert!(parse_since("6x").is_err());
438    }
439
440    #[test]
441    fn parse_since_no_number_rejected() {
442        assert!(parse_since("months").is_err());
443    }
444
445    // ── parse_numstat_line ───────────────────────────────────────
446
447    #[test]
448    fn numstat_normal() {
449        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
450        assert_eq!(a, 10);
451        assert_eq!(d, 5);
452        assert_eq!(p, "src/file.ts");
453    }
454
455    #[test]
456    fn numstat_binary_skipped() {
457        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
458    }
459
460    #[test]
461    fn numstat_zero_lines() {
462        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
463        assert_eq!(a, 0);
464        assert_eq!(d, 0);
465        assert_eq!(p, "src/empty.ts");
466    }
467
468    // ── compute_trend ────────────────────────────────────────────
469
470    #[test]
471    fn trend_empty_is_stable() {
472        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
473    }
474
475    #[test]
476    fn trend_single_commit_is_stable() {
477        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
478    }
479
480    #[test]
481    fn trend_accelerating() {
482        // 2 old commits, 5 recent commits
483        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
484        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
485    }
486
487    #[test]
488    fn trend_cooling() {
489        // 5 old commits, 2 recent commits
490        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
491        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
492    }
493
494    #[test]
495    fn trend_stable_even_distribution() {
496        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
497        let timestamps = vec![100, 200, 300, 700, 800, 900];
498        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
499    }
500
501    #[test]
502    fn trend_same_timestamp_is_stable() {
503        let timestamps = vec![500, 500, 500];
504        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
505    }
506
507    // ── is_iso_date ──────────────────────────────────────────────
508
509    #[test]
510    fn iso_date_valid() {
511        assert!(is_iso_date("2025-06-01"));
512        assert!(is_iso_date("2025-12-31"));
513    }
514
515    #[test]
516    fn iso_date_with_time_rejected() {
517        // Only exact YYYY-MM-DD (10 chars) is accepted
518        assert!(!is_iso_date("2025-06-01T00:00:00"));
519    }
520
521    #[test]
522    fn iso_date_invalid() {
523        assert!(!is_iso_date("6months"));
524        assert!(!is_iso_date("2025"));
525        assert!(!is_iso_date("not-a-date"));
526        assert!(!is_iso_date("abcd-ef-gh"));
527    }
528
529    // ── Display ──────────────────────────────────────────────────
530
531    #[test]
532    fn trend_display() {
533        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
534        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
535        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
536    }
537
538    // ── parse_git_log ───────────────────────────────────────────
539
540    #[test]
541    fn parse_git_log_single_commit() {
542        let root = Path::new("/project");
543        let output = "1700000000\n10\t5\tsrc/index.ts\n";
544        let result = parse_git_log(output, root);
545        assert_eq!(result.len(), 1);
546        let churn = &result[&PathBuf::from("/project/src/index.ts")];
547        assert_eq!(churn.commits, 1);
548        assert_eq!(churn.lines_added, 10);
549        assert_eq!(churn.lines_deleted, 5);
550    }
551
552    #[test]
553    fn parse_git_log_multiple_commits_same_file() {
554        let root = Path::new("/project");
555        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
556        let result = parse_git_log(output, root);
557        assert_eq!(result.len(), 1);
558        let churn = &result[&PathBuf::from("/project/src/index.ts")];
559        assert_eq!(churn.commits, 2);
560        assert_eq!(churn.lines_added, 13);
561        assert_eq!(churn.lines_deleted, 7);
562    }
563
564    #[test]
565    fn parse_git_log_multiple_files() {
566        let root = Path::new("/project");
567        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
568        let result = parse_git_log(output, root);
569        assert_eq!(result.len(), 2);
570        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
571        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
572    }
573
574    #[test]
575    fn parse_git_log_empty_output() {
576        let root = Path::new("/project");
577        let result = parse_git_log("", root);
578        assert!(result.is_empty());
579    }
580
581    #[test]
582    fn parse_git_log_skips_binary_files() {
583        let root = Path::new("/project");
584        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
585        let result = parse_git_log(output, root);
586        assert_eq!(result.len(), 1);
587        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
588    }
589
590    #[test]
591    fn parse_git_log_weighted_commits_are_positive() {
592        let root = Path::new("/project");
593        // Use a timestamp near "now" to ensure weight doesn't decay to zero
594        let now_secs = std::time::SystemTime::now()
595            .duration_since(std::time::UNIX_EPOCH)
596            .unwrap()
597            .as_secs();
598        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
599        let result = parse_git_log(&output, root);
600        let churn = &result[&PathBuf::from("/project/src/a.ts")];
601        assert!(
602            churn.weighted_commits > 0.0,
603            "weighted_commits should be positive for recent commits"
604        );
605    }
606
607    // ── compute_trend edge cases ─────────────────────────────────
608
609    #[test]
610    fn trend_boundary_1_5x_ratio() {
611        // Exactly 1.5x ratio (3 recent : 2 old) → boundary between stable and accelerating
612        // midpoint = 100 + (1000-100)/2 = 550
613        // old: 100, 200 (2 timestamps <= 550)
614        // recent: 600, 800, 1000 (3 timestamps > 550)
615        // ratio = 3/2 = 1.5 — NOT > 1.5, so stable
616        let timestamps = vec![100, 200, 600, 800, 1000];
617        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
618    }
619
620    #[test]
621    fn trend_just_above_1_5x() {
622        // midpoint = 100 + (1000-100)/2 = 550
623        // old: 100 (1 timestamp <= 550)
624        // recent: 600, 800, 1000 (3 timestamps > 550)
625        // ratio = 3/1 = 3.0 → accelerating
626        let timestamps = vec![100, 600, 800, 1000];
627        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
628    }
629}