Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use serde::Serialize;
11
12/// Recency weight half-life in days. A commit from 90 days ago counts half
13/// as much as today's commit; 180 days ago counts 25%.
14const HALF_LIFE_DAYS: f64 = 90.0;
15
16/// Parsed duration for the `--since` flag.
17#[derive(Debug, Clone)]
18pub struct SinceDuration {
19    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
20    pub git_after: String,
21    /// Human-readable display string (e.g., `"6 months"`).
22    pub display: String,
23}
24
25/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
27#[serde(rename_all = "snake_case")]
28pub enum ChurnTrend {
29    /// Recent half has >1.5× the commits of the older half.
30    Accelerating,
31    /// Churn is roughly stable between halves.
32    Stable,
33    /// Recent half has <0.67× the commits of the older half.
34    Cooling,
35}
36
37impl std::fmt::Display for ChurnTrend {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            Self::Accelerating => write!(f, "accelerating"),
41            Self::Stable => write!(f, "stable"),
42            Self::Cooling => write!(f, "cooling"),
43        }
44    }
45}
46
47/// Per-file churn data collected from git history.
48#[derive(Debug, Clone)]
49pub struct FileChurn {
50    /// Absolute file path.
51    pub path: PathBuf,
52    /// Total number of commits touching this file in the analysis window.
53    pub commits: u32,
54    /// Recency-weighted commit count (exponential decay, half-life 90 days).
55    pub weighted_commits: f64,
56    /// Total lines added across all commits.
57    pub lines_added: u32,
58    /// Total lines deleted across all commits.
59    pub lines_deleted: u32,
60    /// Churn trend: accelerating, stable, or cooling.
61    pub trend: ChurnTrend,
62}
63
64/// Result of churn analysis.
65pub struct ChurnResult {
66    /// Per-file churn data, keyed by absolute path.
67    pub files: FxHashMap<PathBuf, FileChurn>,
68    /// Whether the repository is a shallow clone.
69    pub shallow_clone: bool,
70}
71
72/// Parse a `--since` value into a git-compatible duration.
73///
74/// Accepts:
75/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
76/// - ISO dates: `2025-06-01`
77///
78/// # Errors
79///
80/// Returns an error if the input is not a recognized duration format or ISO date,
81/// the numeric part is invalid, or the duration is zero.
82pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
83    // Try ISO date first (YYYY-MM-DD)
84    if is_iso_date(input) {
85        return Ok(SinceDuration {
86            git_after: input.to_string(),
87            display: input.to_string(),
88        });
89    }
90
91    // Parse duration: number + unit
92    let (num_str, unit) = split_number_unit(input)?;
93    let num: u64 = num_str
94        .parse()
95        .map_err(|_| format!("invalid number in --since: {input}"))?;
96
97    if num == 0 {
98        return Err("--since duration must be greater than 0".to_string());
99    }
100
101    match unit {
102        "d" | "day" | "days" => {
103            let s = if num == 1 { "" } else { "s" };
104            Ok(SinceDuration {
105                git_after: format!("{num} day{s} ago"),
106                display: format!("{num} day{s}"),
107            })
108        }
109        "w" | "week" | "weeks" => {
110            let s = if num == 1 { "" } else { "s" };
111            Ok(SinceDuration {
112                git_after: format!("{num} week{s} ago"),
113                display: format!("{num} week{s}"),
114            })
115        }
116        "m" | "month" | "months" => {
117            let s = if num == 1 { "" } else { "s" };
118            Ok(SinceDuration {
119                git_after: format!("{num} month{s} ago"),
120                display: format!("{num} month{s}"),
121            })
122        }
123        "y" | "year" | "years" => {
124            let s = if num == 1 { "" } else { "s" };
125            Ok(SinceDuration {
126                git_after: format!("{num} year{s} ago"),
127                display: format!("{num} year{s}"),
128            })
129        }
130        _ => Err(format!(
131            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
132        )),
133    }
134}
135
136/// Analyze git churn for files in the given root directory.
137///
138/// Returns `None` if git is not available or the directory is not a git repository.
139pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
140    let shallow = is_shallow_clone(root);
141
142    let output = Command::new("git")
143        .args([
144            "log",
145            "--numstat",
146            "--no-merges",
147            "--no-renames",
148            "--format=format:%at",
149            &format!("--after={}", since.git_after),
150        ])
151        .current_dir(root)
152        .output();
153
154    let output = match output {
155        Ok(o) => o,
156        Err(e) => {
157            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
158            return None;
159        }
160    };
161
162    if !output.status.success() {
163        let stderr = String::from_utf8_lossy(&output.stderr);
164        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
165        return None;
166    }
167
168    let stdout = String::from_utf8_lossy(&output.stdout);
169    let files = parse_git_log(&stdout, root);
170
171    Some(ChurnResult {
172        files,
173        shallow_clone: shallow,
174    })
175}
176
177/// Check if the repository is a shallow clone.
178#[must_use]
179pub fn is_shallow_clone(root: &Path) -> bool {
180    Command::new("git")
181        .args(["rev-parse", "--is-shallow-repository"])
182        .current_dir(root)
183        .output()
184        .map(|o| {
185            String::from_utf8_lossy(&o.stdout)
186                .trim()
187                .eq_ignore_ascii_case("true")
188        })
189        .unwrap_or(false)
190}
191
192/// Check if the directory is inside a git repository.
193#[must_use]
194pub fn is_git_repo(root: &Path) -> bool {
195    Command::new("git")
196        .args(["rev-parse", "--git-dir"])
197        .current_dir(root)
198        .stdout(std::process::Stdio::null())
199        .stderr(std::process::Stdio::null())
200        .status()
201        .map(|s| s.success())
202        .unwrap_or(false)
203}
204
205// ── Internal ──────────────────────────────────────────────────────
206
207/// Intermediate per-file accumulator during git log parsing.
208struct FileAccum {
209    /// Commit timestamps (epoch seconds) for trend computation.
210    commit_timestamps: Vec<u64>,
211    /// Recency-weighted commit sum.
212    weighted_commits: f64,
213    lines_added: u32,
214    lines_deleted: u32,
215}
216
217/// Parse `git log --numstat --format=format:%at` output.
218#[expect(
219    clippy::cast_possible_truncation,
220    reason = "commit count per file is bounded by git history depth"
221)]
222fn parse_git_log(stdout: &str, root: &Path) -> FxHashMap<PathBuf, FileChurn> {
223    let now_secs = std::time::SystemTime::now()
224        .duration_since(std::time::UNIX_EPOCH)
225        .unwrap_or_default()
226        .as_secs();
227
228    let mut accum: FxHashMap<PathBuf, FileAccum> = FxHashMap::default();
229    let mut current_timestamp: Option<u64> = None;
230
231    for line in stdout.lines() {
232        let line = line.trim();
233        if line.is_empty() {
234            continue;
235        }
236
237        // Try to parse as epoch timestamp (from %at format)
238        if let Ok(ts) = line.parse::<u64>() {
239            current_timestamp = Some(ts);
240            continue;
241        }
242
243        // Try to parse as numstat line: "10\t5\tpath/to/file"
244        if let Some((added, deleted, path)) = parse_numstat_line(line) {
245            let abs_path = root.join(path);
246            let ts = current_timestamp.unwrap_or(now_secs);
247            let age_days = (now_secs.saturating_sub(ts)) as f64 / 86400.0;
248            let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
249
250            let entry = accum.entry(abs_path).or_insert_with(|| FileAccum {
251                commit_timestamps: Vec::new(),
252                weighted_commits: 0.0,
253                lines_added: 0,
254                lines_deleted: 0,
255            });
256            entry.commit_timestamps.push(ts);
257            entry.weighted_commits += weight;
258            entry.lines_added += added;
259            entry.lines_deleted += deleted;
260        }
261    }
262
263    // Convert accumulators to FileChurn with trend computation
264    accum
265        .into_iter()
266        .map(|(path, acc)| {
267            let commits = acc.commit_timestamps.len() as u32;
268            let trend = compute_trend(&acc.commit_timestamps);
269            let churn = FileChurn {
270                path: path.clone(),
271                commits,
272                weighted_commits: (acc.weighted_commits * 100.0).round() / 100.0,
273                lines_added: acc.lines_added,
274                lines_deleted: acc.lines_deleted,
275                trend,
276            };
277            (path, churn)
278        })
279        .collect()
280}
281
282/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
283/// Binary files show as `"-\t-\tpath"` — skip those.
284fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
285    let mut parts = line.splitn(3, '\t');
286    let added_str = parts.next()?;
287    let deleted_str = parts.next()?;
288    let path = parts.next()?;
289
290    // Binary files show "-" for added/deleted — skip them
291    let added: u32 = added_str.parse().ok()?;
292    let deleted: u32 = deleted_str.parse().ok()?;
293
294    Some((added, deleted, path))
295}
296
297/// Compute churn trend by splitting commits into two temporal halves.
298///
299/// Finds the midpoint between the oldest and newest commit timestamps,
300/// then compares commit counts in each half:
301/// - Recent > 1.5× older → Accelerating
302/// - Recent < 0.67× older → Cooling
303/// - Otherwise → Stable
304fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
305    if timestamps.len() < 2 {
306        return ChurnTrend::Stable;
307    }
308
309    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
310    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
311
312    if max_ts == min_ts {
313        return ChurnTrend::Stable;
314    }
315
316    let midpoint = min_ts + (max_ts - min_ts) / 2;
317    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
318    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
319
320    if older < 1.0 {
321        return ChurnTrend::Stable;
322    }
323
324    let ratio = recent / older;
325    if ratio > 1.5 {
326        ChurnTrend::Accelerating
327    } else if ratio < 0.67 {
328        ChurnTrend::Cooling
329    } else {
330        ChurnTrend::Stable
331    }
332}
333
334fn is_iso_date(input: &str) -> bool {
335    input.len() == 10
336        && input.as_bytes().get(4) == Some(&b'-')
337        && input.as_bytes().get(7) == Some(&b'-')
338        && input[..4].bytes().all(|b| b.is_ascii_digit())
339        && input[5..7].bytes().all(|b| b.is_ascii_digit())
340        && input[8..10].bytes().all(|b| b.is_ascii_digit())
341}
342
343fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
344    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
345        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
346    })?;
347    if pos == 0 {
348        return Err(format!(
349            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
350        ));
351    }
352    Ok((&input[..pos], &input[pos..]))
353}
354
355#[cfg(test)]
356mod tests {
357    use super::*;
358
359    // ── parse_since ──────────────────────────────────────────────
360
361    #[test]
362    fn parse_since_months_short() {
363        let d = parse_since("6m").unwrap();
364        assert_eq!(d.git_after, "6 months ago");
365        assert_eq!(d.display, "6 months");
366    }
367
368    #[test]
369    fn parse_since_months_long() {
370        let d = parse_since("6months").unwrap();
371        assert_eq!(d.git_after, "6 months ago");
372        assert_eq!(d.display, "6 months");
373    }
374
375    #[test]
376    fn parse_since_days() {
377        let d = parse_since("90d").unwrap();
378        assert_eq!(d.git_after, "90 days ago");
379        assert_eq!(d.display, "90 days");
380    }
381
382    #[test]
383    fn parse_since_year_singular() {
384        let d = parse_since("1y").unwrap();
385        assert_eq!(d.git_after, "1 year ago");
386        assert_eq!(d.display, "1 year");
387    }
388
389    #[test]
390    fn parse_since_years_plural() {
391        let d = parse_since("2years").unwrap();
392        assert_eq!(d.git_after, "2 years ago");
393        assert_eq!(d.display, "2 years");
394    }
395
396    #[test]
397    fn parse_since_weeks() {
398        let d = parse_since("2w").unwrap();
399        assert_eq!(d.git_after, "2 weeks ago");
400        assert_eq!(d.display, "2 weeks");
401    }
402
403    #[test]
404    fn parse_since_iso_date() {
405        let d = parse_since("2025-06-01").unwrap();
406        assert_eq!(d.git_after, "2025-06-01");
407        assert_eq!(d.display, "2025-06-01");
408    }
409
410    #[test]
411    fn parse_since_month_singular() {
412        let d = parse_since("1month").unwrap();
413        assert_eq!(d.display, "1 month");
414    }
415
416    #[test]
417    fn parse_since_day_singular() {
418        let d = parse_since("1day").unwrap();
419        assert_eq!(d.display, "1 day");
420    }
421
422    #[test]
423    fn parse_since_zero_rejected() {
424        assert!(parse_since("0m").is_err());
425    }
426
427    #[test]
428    fn parse_since_no_unit_rejected() {
429        assert!(parse_since("90").is_err());
430    }
431
432    #[test]
433    fn parse_since_unknown_unit_rejected() {
434        assert!(parse_since("6x").is_err());
435    }
436
437    #[test]
438    fn parse_since_no_number_rejected() {
439        assert!(parse_since("months").is_err());
440    }
441
442    // ── parse_numstat_line ───────────────────────────────────────
443
444    #[test]
445    fn numstat_normal() {
446        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
447        assert_eq!(a, 10);
448        assert_eq!(d, 5);
449        assert_eq!(p, "src/file.ts");
450    }
451
452    #[test]
453    fn numstat_binary_skipped() {
454        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
455    }
456
457    #[test]
458    fn numstat_zero_lines() {
459        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
460        assert_eq!(a, 0);
461        assert_eq!(d, 0);
462        assert_eq!(p, "src/empty.ts");
463    }
464
465    // ── compute_trend ────────────────────────────────────────────
466
467    #[test]
468    fn trend_empty_is_stable() {
469        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
470    }
471
472    #[test]
473    fn trend_single_commit_is_stable() {
474        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
475    }
476
477    #[test]
478    fn trend_accelerating() {
479        // 2 old commits, 5 recent commits
480        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
481        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
482    }
483
484    #[test]
485    fn trend_cooling() {
486        // 5 old commits, 2 recent commits
487        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
488        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
489    }
490
491    #[test]
492    fn trend_stable_even_distribution() {
493        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
494        let timestamps = vec![100, 200, 300, 700, 800, 900];
495        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
496    }
497
498    #[test]
499    fn trend_same_timestamp_is_stable() {
500        let timestamps = vec![500, 500, 500];
501        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
502    }
503
504    // ── is_iso_date ──────────────────────────────────────────────
505
506    #[test]
507    fn iso_date_valid() {
508        assert!(is_iso_date("2025-06-01"));
509        assert!(is_iso_date("2025-12-31"));
510    }
511
512    #[test]
513    fn iso_date_with_time_rejected() {
514        // Only exact YYYY-MM-DD (10 chars) is accepted
515        assert!(!is_iso_date("2025-06-01T00:00:00"));
516    }
517
518    #[test]
519    fn iso_date_invalid() {
520        assert!(!is_iso_date("6months"));
521        assert!(!is_iso_date("2025"));
522        assert!(!is_iso_date("not-a-date"));
523        assert!(!is_iso_date("abcd-ef-gh"));
524    }
525
526    // ── Display ──────────────────────────────────────────────────
527
528    #[test]
529    fn trend_display() {
530        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
531        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
532        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
533    }
534
535    // ── parse_git_log ───────────────────────────────────────────
536
537    #[test]
538    fn parse_git_log_single_commit() {
539        let root = Path::new("/project");
540        let output = "1700000000\n10\t5\tsrc/index.ts\n";
541        let result = parse_git_log(output, root);
542        assert_eq!(result.len(), 1);
543        let churn = &result[&PathBuf::from("/project/src/index.ts")];
544        assert_eq!(churn.commits, 1);
545        assert_eq!(churn.lines_added, 10);
546        assert_eq!(churn.lines_deleted, 5);
547    }
548
549    #[test]
550    fn parse_git_log_multiple_commits_same_file() {
551        let root = Path::new("/project");
552        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
553        let result = parse_git_log(output, root);
554        assert_eq!(result.len(), 1);
555        let churn = &result[&PathBuf::from("/project/src/index.ts")];
556        assert_eq!(churn.commits, 2);
557        assert_eq!(churn.lines_added, 13);
558        assert_eq!(churn.lines_deleted, 7);
559    }
560
561    #[test]
562    fn parse_git_log_multiple_files() {
563        let root = Path::new("/project");
564        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
565        let result = parse_git_log(output, root);
566        assert_eq!(result.len(), 2);
567        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
568        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
569    }
570
571    #[test]
572    fn parse_git_log_empty_output() {
573        let root = Path::new("/project");
574        let result = parse_git_log("", root);
575        assert!(result.is_empty());
576    }
577
578    #[test]
579    fn parse_git_log_skips_binary_files() {
580        let root = Path::new("/project");
581        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
582        let result = parse_git_log(output, root);
583        assert_eq!(result.len(), 1);
584        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
585    }
586
587    #[test]
588    fn parse_git_log_weighted_commits_are_positive() {
589        let root = Path::new("/project");
590        // Use a timestamp near "now" to ensure weight doesn't decay to zero
591        let now_secs = std::time::SystemTime::now()
592            .duration_since(std::time::UNIX_EPOCH)
593            .unwrap()
594            .as_secs();
595        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
596        let result = parse_git_log(&output, root);
597        let churn = &result[&PathBuf::from("/project/src/a.ts")];
598        assert!(
599            churn.weighted_commits > 0.0,
600            "weighted_commits should be positive for recent commits"
601        );
602    }
603
604    // ── compute_trend edge cases ─────────────────────────────────
605
606    #[test]
607    fn trend_boundary_1_5x_ratio() {
608        // Exactly 1.5x ratio (3 recent : 2 old) → boundary between stable and accelerating
609        // midpoint = 100 + (1000-100)/2 = 550
610        // old: 100, 200 (2 timestamps <= 550)
611        // recent: 600, 800, 1000 (3 timestamps > 550)
612        // ratio = 3/2 = 1.5 — NOT > 1.5, so stable
613        let timestamps = vec![100, 200, 600, 800, 1000];
614        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
615    }
616
617    #[test]
618    fn trend_just_above_1_5x() {
619        // midpoint = 100 + (1000-100)/2 = 550
620        // old: 100 (1 timestamp <= 550)
621        // recent: 600, 800, 1000 (3 timestamps > 550)
622        // ratio = 3/1 = 3.0 → accelerating
623        let timestamps = vec![100, 600, 800, 1000];
624        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
625    }
626}