Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use serde::Serialize;
11
12/// Recency weight half-life in days. A commit from 90 days ago counts half
13/// as much as today's commit; 180 days ago counts 25%.
14const HALF_LIFE_DAYS: f64 = 90.0;
15
16/// Parsed duration for the `--since` flag.
17#[derive(Debug, Clone)]
18pub struct SinceDuration {
19    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
20    pub git_after: String,
21    /// Human-readable display string (e.g., `"6 months"`).
22    pub display: String,
23}
24
25/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
26#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)]
27#[serde(rename_all = "snake_case")]
28pub enum ChurnTrend {
29    /// Recent half has >1.5× the commits of the older half.
30    Accelerating,
31    /// Churn is roughly stable between halves.
32    Stable,
33    /// Recent half has <0.67× the commits of the older half.
34    Cooling,
35}
36
37impl std::fmt::Display for ChurnTrend {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            Self::Accelerating => write!(f, "accelerating"),
41            Self::Stable => write!(f, "stable"),
42            Self::Cooling => write!(f, "cooling"),
43        }
44    }
45}
46
47/// Per-file churn data collected from git history.
48#[derive(Debug, Clone)]
49pub struct FileChurn {
50    /// Absolute file path.
51    pub path: PathBuf,
52    /// Total number of commits touching this file in the analysis window.
53    pub commits: u32,
54    /// Recency-weighted commit count (exponential decay, half-life 90 days).
55    pub weighted_commits: f64,
56    /// Total lines added across all commits.
57    pub lines_added: u32,
58    /// Total lines deleted across all commits.
59    pub lines_deleted: u32,
60    /// Churn trend: accelerating, stable, or cooling.
61    pub trend: ChurnTrend,
62}
63
64/// Result of churn analysis.
65pub struct ChurnResult {
66    /// Per-file churn data, keyed by absolute path.
67    pub files: FxHashMap<PathBuf, FileChurn>,
68    /// Whether the repository is a shallow clone.
69    pub shallow_clone: bool,
70}
71
72/// Parse a `--since` value into a git-compatible duration.
73///
74/// Accepts:
75/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
76/// - ISO dates: `2025-06-01`
77///
78/// Returns an error for unrecognized formats.
79pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
80    // Try ISO date first (YYYY-MM-DD)
81    if is_iso_date(input) {
82        return Ok(SinceDuration {
83            git_after: input.to_string(),
84            display: input.to_string(),
85        });
86    }
87
88    // Parse duration: number + unit
89    let (num_str, unit) = split_number_unit(input)?;
90    let num: u64 = num_str
91        .parse()
92        .map_err(|_| format!("invalid number in --since: {input}"))?;
93
94    if num == 0 {
95        return Err("--since duration must be greater than 0".to_string());
96    }
97
98    match unit {
99        "d" | "day" | "days" => {
100            let s = if num == 1 { "" } else { "s" };
101            Ok(SinceDuration {
102                git_after: format!("{num} day{s} ago"),
103                display: format!("{num} day{s}"),
104            })
105        }
106        "w" | "week" | "weeks" => {
107            let s = if num == 1 { "" } else { "s" };
108            Ok(SinceDuration {
109                git_after: format!("{num} week{s} ago"),
110                display: format!("{num} week{s}"),
111            })
112        }
113        "m" | "month" | "months" => {
114            let s = if num == 1 { "" } else { "s" };
115            Ok(SinceDuration {
116                git_after: format!("{num} month{s} ago"),
117                display: format!("{num} month{s}"),
118            })
119        }
120        "y" | "year" | "years" => {
121            let s = if num == 1 { "" } else { "s" };
122            Ok(SinceDuration {
123                git_after: format!("{num} year{s} ago"),
124                display: format!("{num} year{s}"),
125            })
126        }
127        _ => Err(format!(
128            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
129        )),
130    }
131}
132
133/// Analyze git churn for files in the given root directory.
134///
135/// Returns `None` if git is not available or the directory is not a git repository.
136pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
137    let shallow = is_shallow_clone(root);
138
139    let output = Command::new("git")
140        .args([
141            "log",
142            "--numstat",
143            "--no-merges",
144            "--no-renames",
145            "--format=format:%at",
146            &format!("--after={}", since.git_after),
147        ])
148        .current_dir(root)
149        .output();
150
151    let output = match output {
152        Ok(o) => o,
153        Err(e) => {
154            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
155            return None;
156        }
157    };
158
159    if !output.status.success() {
160        let stderr = String::from_utf8_lossy(&output.stderr);
161        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
162        return None;
163    }
164
165    let stdout = String::from_utf8_lossy(&output.stdout);
166    let files = parse_git_log(&stdout, root);
167
168    Some(ChurnResult {
169        files,
170        shallow_clone: shallow,
171    })
172}
173
174/// Check if the repository is a shallow clone.
175pub fn is_shallow_clone(root: &Path) -> bool {
176    Command::new("git")
177        .args(["rev-parse", "--is-shallow-repository"])
178        .current_dir(root)
179        .output()
180        .map(|o| {
181            String::from_utf8_lossy(&o.stdout)
182                .trim()
183                .eq_ignore_ascii_case("true")
184        })
185        .unwrap_or(false)
186}
187
188/// Check if the directory is inside a git repository.
189pub fn is_git_repo(root: &Path) -> bool {
190    Command::new("git")
191        .args(["rev-parse", "--git-dir"])
192        .current_dir(root)
193        .stdout(std::process::Stdio::null())
194        .stderr(std::process::Stdio::null())
195        .status()
196        .map(|s| s.success())
197        .unwrap_or(false)
198}
199
200// ── Internal ──────────────────────────────────────────────────────
201
202/// Intermediate per-file accumulator during git log parsing.
203struct FileAccum {
204    /// Commit timestamps (epoch seconds) for trend computation.
205    commit_timestamps: Vec<u64>,
206    /// Recency-weighted commit sum.
207    weighted_commits: f64,
208    lines_added: u32,
209    lines_deleted: u32,
210}
211
212/// Parse `git log --numstat --format=format:%at` output.
213fn parse_git_log(stdout: &str, root: &Path) -> FxHashMap<PathBuf, FileChurn> {
214    let now_secs = std::time::SystemTime::now()
215        .duration_since(std::time::UNIX_EPOCH)
216        .unwrap_or_default()
217        .as_secs();
218
219    let mut accum: FxHashMap<PathBuf, FileAccum> = FxHashMap::default();
220    let mut current_timestamp: Option<u64> = None;
221
222    for line in stdout.lines() {
223        let line = line.trim();
224        if line.is_empty() {
225            continue;
226        }
227
228        // Try to parse as epoch timestamp (from %at format)
229        if let Ok(ts) = line.parse::<u64>() {
230            current_timestamp = Some(ts);
231            continue;
232        }
233
234        // Try to parse as numstat line: "10\t5\tpath/to/file"
235        if let Some((added, deleted, path)) = parse_numstat_line(line) {
236            let abs_path = root.join(path);
237            let ts = current_timestamp.unwrap_or(now_secs);
238            let age_days = (now_secs.saturating_sub(ts)) as f64 / 86400.0;
239            let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
240
241            let entry = accum.entry(abs_path).or_insert_with(|| FileAccum {
242                commit_timestamps: Vec::new(),
243                weighted_commits: 0.0,
244                lines_added: 0,
245                lines_deleted: 0,
246            });
247            entry.commit_timestamps.push(ts);
248            entry.weighted_commits += weight;
249            entry.lines_added += added;
250            entry.lines_deleted += deleted;
251        }
252    }
253
254    // Convert accumulators to FileChurn with trend computation
255    accum
256        .into_iter()
257        .map(|(path, acc)| {
258            let commits = acc.commit_timestamps.len() as u32;
259            let trend = compute_trend(&acc.commit_timestamps);
260            let churn = FileChurn {
261                path: path.clone(),
262                commits,
263                weighted_commits: (acc.weighted_commits * 100.0).round() / 100.0,
264                lines_added: acc.lines_added,
265                lines_deleted: acc.lines_deleted,
266                trend,
267            };
268            (path, churn)
269        })
270        .collect()
271}
272
273/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
274/// Binary files show as `"-\t-\tpath"` — skip those.
275fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
276    let mut parts = line.splitn(3, '\t');
277    let added_str = parts.next()?;
278    let deleted_str = parts.next()?;
279    let path = parts.next()?;
280
281    // Binary files show "-" for added/deleted — skip them
282    let added: u32 = added_str.parse().ok()?;
283    let deleted: u32 = deleted_str.parse().ok()?;
284
285    Some((added, deleted, path))
286}
287
288/// Compute churn trend by splitting commits into two temporal halves.
289///
290/// Finds the midpoint between the oldest and newest commit timestamps,
291/// then compares commit counts in each half:
292/// - Recent > 1.5× older → Accelerating
293/// - Recent < 0.67× older → Cooling
294/// - Otherwise → Stable
295fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
296    if timestamps.len() < 2 {
297        return ChurnTrend::Stable;
298    }
299
300    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
301    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
302
303    if max_ts == min_ts {
304        return ChurnTrend::Stable;
305    }
306
307    let midpoint = min_ts + (max_ts - min_ts) / 2;
308    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
309    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
310
311    if older < 1.0 {
312        return ChurnTrend::Stable;
313    }
314
315    let ratio = recent / older;
316    if ratio > 1.5 {
317        ChurnTrend::Accelerating
318    } else if ratio < 0.67 {
319        ChurnTrend::Cooling
320    } else {
321        ChurnTrend::Stable
322    }
323}
324
325fn is_iso_date(input: &str) -> bool {
326    input.len() == 10
327        && input.as_bytes().get(4) == Some(&b'-')
328        && input.as_bytes().get(7) == Some(&b'-')
329        && input[..4].bytes().all(|b| b.is_ascii_digit())
330        && input[5..7].bytes().all(|b| b.is_ascii_digit())
331        && input[8..10].bytes().all(|b| b.is_ascii_digit())
332}
333
334fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
335    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
336        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
337    })?;
338    if pos == 0 {
339        return Err(format!(
340            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
341        ));
342    }
343    Ok((&input[..pos], &input[pos..]))
344}
345
346#[cfg(test)]
347mod tests {
348    use super::*;
349
350    // ── parse_since ──────────────────────────────────────────────
351
352    #[test]
353    fn parse_since_months_short() {
354        let d = parse_since("6m").unwrap();
355        assert_eq!(d.git_after, "6 months ago");
356        assert_eq!(d.display, "6 months");
357    }
358
359    #[test]
360    fn parse_since_months_long() {
361        let d = parse_since("6months").unwrap();
362        assert_eq!(d.git_after, "6 months ago");
363        assert_eq!(d.display, "6 months");
364    }
365
366    #[test]
367    fn parse_since_days() {
368        let d = parse_since("90d").unwrap();
369        assert_eq!(d.git_after, "90 days ago");
370        assert_eq!(d.display, "90 days");
371    }
372
373    #[test]
374    fn parse_since_year_singular() {
375        let d = parse_since("1y").unwrap();
376        assert_eq!(d.git_after, "1 year ago");
377        assert_eq!(d.display, "1 year");
378    }
379
380    #[test]
381    fn parse_since_years_plural() {
382        let d = parse_since("2years").unwrap();
383        assert_eq!(d.git_after, "2 years ago");
384        assert_eq!(d.display, "2 years");
385    }
386
387    #[test]
388    fn parse_since_weeks() {
389        let d = parse_since("2w").unwrap();
390        assert_eq!(d.git_after, "2 weeks ago");
391        assert_eq!(d.display, "2 weeks");
392    }
393
394    #[test]
395    fn parse_since_iso_date() {
396        let d = parse_since("2025-06-01").unwrap();
397        assert_eq!(d.git_after, "2025-06-01");
398        assert_eq!(d.display, "2025-06-01");
399    }
400
401    #[test]
402    fn parse_since_month_singular() {
403        let d = parse_since("1month").unwrap();
404        assert_eq!(d.display, "1 month");
405    }
406
407    #[test]
408    fn parse_since_day_singular() {
409        let d = parse_since("1day").unwrap();
410        assert_eq!(d.display, "1 day");
411    }
412
413    #[test]
414    fn parse_since_zero_rejected() {
415        assert!(parse_since("0m").is_err());
416    }
417
418    #[test]
419    fn parse_since_no_unit_rejected() {
420        assert!(parse_since("90").is_err());
421    }
422
423    #[test]
424    fn parse_since_unknown_unit_rejected() {
425        assert!(parse_since("6x").is_err());
426    }
427
428    #[test]
429    fn parse_since_no_number_rejected() {
430        assert!(parse_since("months").is_err());
431    }
432
433    // ── parse_numstat_line ───────────────────────────────────────
434
435    #[test]
436    fn numstat_normal() {
437        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
438        assert_eq!(a, 10);
439        assert_eq!(d, 5);
440        assert_eq!(p, "src/file.ts");
441    }
442
443    #[test]
444    fn numstat_binary_skipped() {
445        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
446    }
447
448    #[test]
449    fn numstat_zero_lines() {
450        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
451        assert_eq!(a, 0);
452        assert_eq!(d, 0);
453        assert_eq!(p, "src/empty.ts");
454    }
455
456    // ── compute_trend ────────────────────────────────────────────
457
458    #[test]
459    fn trend_empty_is_stable() {
460        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
461    }
462
463    #[test]
464    fn trend_single_commit_is_stable() {
465        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
466    }
467
468    #[test]
469    fn trend_accelerating() {
470        // 2 old commits, 5 recent commits
471        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
472        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
473    }
474
475    #[test]
476    fn trend_cooling() {
477        // 5 old commits, 2 recent commits
478        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
479        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
480    }
481
482    #[test]
483    fn trend_stable_even_distribution() {
484        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
485        let timestamps = vec![100, 200, 300, 700, 800, 900];
486        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
487    }
488
489    #[test]
490    fn trend_same_timestamp_is_stable() {
491        let timestamps = vec![500, 500, 500];
492        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
493    }
494
495    // ── is_iso_date ──────────────────────────────────────────────
496
497    #[test]
498    fn iso_date_valid() {
499        assert!(is_iso_date("2025-06-01"));
500        assert!(is_iso_date("2025-12-31"));
501    }
502
503    #[test]
504    fn iso_date_with_time_rejected() {
505        // Only exact YYYY-MM-DD (10 chars) is accepted
506        assert!(!is_iso_date("2025-06-01T00:00:00"));
507    }
508
509    #[test]
510    fn iso_date_invalid() {
511        assert!(!is_iso_date("6months"));
512        assert!(!is_iso_date("2025"));
513        assert!(!is_iso_date("not-a-date"));
514        assert!(!is_iso_date("abcd-ef-gh"));
515    }
516
517    // ── Display ──────────────────────────────────────────────────
518
519    #[test]
520    fn trend_display() {
521        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
522        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
523        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
524    }
525}