Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::{Command, Output};
9use std::sync::OnceLock;
10
11use serde::{Deserialize, Serialize};
12
13/// Function pointer signature used by `set_spawn_hook` to intercept the
14/// `git log --numstat` subprocess. Lets the CLI route long-running git
15/// log calls through its `ScopedChild` registry so SIGINT / SIGTERM
16/// reap the subprocess instead of leaving it running after the parent
17/// exits. See `crates/cli/src/signal/` and issue #477.
18pub type ChurnSpawnHook = fn(&mut Command) -> std::io::Result<Output>;
19
20static SPAWN_HOOK: OnceLock<ChurnSpawnHook> = OnceLock::new();
21
22/// Install a spawn-hook that wraps the `git log` subprocess. Idempotent;
23/// subsequent calls are no-ops. Called once from the CLI's `main()` to
24/// route through the signal registry; defaults to `Command::output`
25/// when not set so the function-pointer indirection stays free for tests
26/// and embedders that don't care.
27pub fn set_spawn_hook(hook: ChurnSpawnHook) {
28    let _ = SPAWN_HOOK.set(hook);
29}
30
31fn spawn_output(command: &mut Command) -> std::io::Result<Output> {
32    if let Some(hook) = SPAWN_HOOK.get() {
33        hook(command)
34    } else {
35        command.output()
36    }
37}
38
39/// Number of seconds in one day.
40const SECS_PER_DAY: f64 = 86_400.0;
41
42/// Recency weight half-life in days. A commit from 90 days ago counts half
43/// as much as today's commit; 180 days ago counts 25%.
44const HALF_LIFE_DAYS: f64 = 90.0;
45
46/// Schema discriminator a `--churn-file` document must declare.
47const CHURN_FILE_SCHEMA: &str = "fallow-churn/v1";
48
49/// Upper bound on imported churn events. A file past this size is a sign of a
50/// pathological export (whole-history dump of a giant monorepo) rather than a
51/// useful hotspot window; parsing is rejected so we never allocate unbounded
52/// state from a single untrusted file. Mirrors the diff parser's
53/// `MAX_ADDED_LINES` guard in the CLI.
54const MAX_CHURN_EVENTS: usize = 5_000_000;
55
56/// Reject an imported `timestamp` more than this many seconds in the future
57/// (one year). A unix-seconds commit time is never legitimately this far ahead
58/// even with clock skew, so a value past it is almost always a millisecond
59/// timestamp (~52000 years out) or corruption. Caught loudly because the
60/// recency decay uses `saturating_sub`, so a future timestamp would otherwise
61/// clamp to age 0, give every commit full weight, and silently collapse the
62/// recency signal that distinguishes recent from old churn.
63const MAX_FUTURE_TIMESTAMP_SECS: u64 = 365 * 24 * 60 * 60;
64
65/// Parsed duration for the `--since` flag.
66#[derive(Debug, Clone)]
67pub struct SinceDuration {
68    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
69    pub git_after: String,
70    /// Human-readable display string (e.g., `"6 months"`).
71    pub display: String,
72}
73
74/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
76#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
77#[serde(rename_all = "snake_case")]
78pub enum ChurnTrend {
79    /// Recent half has >1.5× the commits of the older half.
80    Accelerating,
81    /// Churn is roughly stable between halves.
82    Stable,
83    /// Recent half has <0.67× the commits of the older half.
84    Cooling,
85}
86
87impl std::fmt::Display for ChurnTrend {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        match self {
90            Self::Accelerating => write!(f, "accelerating"),
91            Self::Stable => write!(f, "stable"),
92            Self::Cooling => write!(f, "cooling"),
93        }
94    }
95}
96
97/// Per-author commit aggregation for a single file.
98///
99/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
100/// per-file maps small and the bitcode cache compact.
101#[derive(Debug, Clone, Copy)]
102pub struct AuthorContribution {
103    /// Total commits by this author touching this file in the analysis window.
104    pub commits: u32,
105    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
106    pub weighted_commits: f64,
107    /// Earliest commit timestamp by this author (epoch seconds).
108    pub first_commit_ts: u64,
109    /// Latest commit timestamp by this author (epoch seconds).
110    pub last_commit_ts: u64,
111}
112
113/// Per-file churn data collected from git history.
114#[derive(Debug, Clone)]
115pub struct FileChurn {
116    /// Absolute file path.
117    pub path: PathBuf,
118    /// Total number of commits touching this file in the analysis window.
119    pub commits: u32,
120    /// Recency-weighted commit count (exponential decay, half-life 90 days).
121    pub weighted_commits: f64,
122    /// Total lines added across all commits.
123    pub lines_added: u32,
124    /// Total lines deleted across all commits.
125    pub lines_deleted: u32,
126    /// Churn trend: accelerating, stable, or cooling.
127    pub trend: ChurnTrend,
128    /// Per-author contributions keyed by interned author index.
129    /// Indices reference [`ChurnResult::author_pool`].
130    pub authors: FxHashMap<u32, AuthorContribution>,
131}
132
133/// Result of churn analysis.
134#[derive(Debug)]
135pub struct ChurnResult {
136    /// Per-file churn data, keyed by absolute path.
137    pub files: FxHashMap<PathBuf, FileChurn>,
138    /// Whether the repository is a shallow clone.
139    pub shallow_clone: bool,
140    /// Author email pool. Per-file [`AuthorContribution`] entries reference
141    /// authors by their index into this vector.
142    pub author_pool: Vec<String>,
143}
144
145/// Parse a `--since` value into a git-compatible duration.
146///
147/// Accepts:
148/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
149/// - ISO dates: `2025-06-01`
150///
151/// # Errors
152///
153/// Returns an error if the input is not a recognized duration format or ISO date,
154/// the numeric part is invalid, or the duration is zero.
155pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
156    if is_iso_date(input) {
157        return Ok(SinceDuration {
158            git_after: input.to_string(),
159            display: input.to_string(),
160        });
161    }
162
163    let (num_str, unit) = split_number_unit(input)?;
164    let num: u64 = num_str
165        .parse()
166        .map_err(|_| format!("invalid number in --since: {input}"))?;
167
168    if num == 0 {
169        return Err("--since duration must be greater than 0".to_string());
170    }
171
172    match unit {
173        "d" | "day" | "days" => {
174            let s = if num == 1 { "" } else { "s" };
175            Ok(SinceDuration {
176                git_after: format!("{num} day{s} ago"),
177                display: format!("{num} day{s}"),
178            })
179        }
180        "w" | "week" | "weeks" => {
181            let s = if num == 1 { "" } else { "s" };
182            Ok(SinceDuration {
183                git_after: format!("{num} week{s} ago"),
184                display: format!("{num} week{s}"),
185            })
186        }
187        "m" | "month" | "months" => {
188            let s = if num == 1 { "" } else { "s" };
189            Ok(SinceDuration {
190                git_after: format!("{num} month{s} ago"),
191                display: format!("{num} month{s}"),
192            })
193        }
194        "y" | "year" | "years" => {
195            let s = if num == 1 { "" } else { "s" };
196            Ok(SinceDuration {
197                git_after: format!("{num} year{s} ago"),
198                display: format!("{num} year{s}"),
199            })
200        }
201        _ => Err(format!(
202            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
203        )),
204    }
205}
206
207/// Analyze git churn for files in the given root directory.
208///
209/// Returns `None` if git is not available or the directory is not a git repository.
210pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
211    let shallow = is_shallow_clone(root);
212    let state = analyze_churn_events(root, since, None)?;
213    Some(build_churn_result(state, shallow))
214}
215
216/// A `fallow-churn/v1` import document: a normalized, VCS-agnostic stand-in for
217/// `git log --numstat` output. Unknown fields are ignored (no
218/// `deny_unknown_fields`) so wrappers may carry extra metadata and so the
219/// reserved `commit` field can be added in a future revision without breaking
220/// v1 consumers.
221#[derive(Debug, Deserialize)]
222struct ChurnFileDoc {
223    schema: String,
224    #[serde(default)]
225    events: Vec<ChurnFileEvent>,
226}
227
228/// One per-(commit, file) change event, the natural shape of a `<vcs> log
229/// --numstat` row. `commit` is intentionally NOT a field: extra keys are
230/// already ignored, so a wrapper emitting `commit` is forward-compatible and a
231/// future revision can promote it to a real field without a breaking change.
232#[derive(Debug, Deserialize)]
233struct ChurnFileEvent {
234    /// Repo-root-relative, forward-slash path. Joined to `root`.
235    path: String,
236    /// Commit time, unix SECONDS UTC (not milliseconds).
237    timestamp: u64,
238    /// Opaque author identity (email recommended); absent contributes no
239    /// ownership signal. fallow does NOT apply mailmap to imported authors.
240    #[serde(default)]
241    author: Option<String>,
242    /// Lines added in this file in this commit.
243    added: u32,
244    /// Lines deleted in this file in this commit.
245    deleted: u32,
246}
247
248/// Build churn data from a normalized `fallow-churn/v1` JSON import instead of
249/// `git log`. Lets projects on a non-git VCS (Yandex Arc, Mercurial, Perforce)
250/// feed change history into hotspot / ownership / bus-factor analysis: a small
251/// wrapper translates the VCS log into the contract and fallow runs all the
252/// usual recency-weighting, trend, and ownership logic on the imported events.
253///
254/// `root` is the project root that relative event paths are joined to (matching
255/// how the git path joins numstat paths), so the churn keys line up with the
256/// analyzed files. Returns a human-readable error (the CLI maps it to exit code
257/// 2) on a missing file, malformed JSON, wrong `schema`, an empty event path, a
258/// far-future timestamp, or an event count past `MAX_CHURN_EVENTS`. An empty
259/// `events` array is valid (no hotspots), not an error. Never runs `git`.
260pub fn analyze_churn_from_file(path: &Path, root: &Path) -> Result<ChurnResult, String> {
261    let raw = std::fs::read_to_string(path)
262        .map_err(|e| format!("failed to read churn file {}: {e}", path.display()))?;
263    let doc: ChurnFileDoc = serde_json::from_str(&raw)
264        .map_err(|e| format!("failed to parse churn file {}: {e}", path.display()))?;
265    if doc.schema != CHURN_FILE_SCHEMA {
266        return Err(format!(
267            "churn file {} declares schema \"{}\", expected \"{CHURN_FILE_SCHEMA}\"",
268            path.display(),
269            doc.schema
270        ));
271    }
272    if doc.events.len() > MAX_CHURN_EVENTS {
273        return Err(format!(
274            "churn file {} has {} events, exceeding the {MAX_CHURN_EVENTS} limit",
275            path.display(),
276            doc.events.len()
277        ));
278    }
279
280    let state = churn_event_state_from_doc(&doc, path, root)?;
281    Ok(build_churn_result(state, false))
282}
283
284/// Validate and fold a parsed `fallow-churn/v1` document into event state.
285///
286/// Rejects empty paths and far-future (likely millisecond) timestamps; interns
287/// authors into the pool exactly as the git-log path does.
288fn churn_event_state_from_doc(
289    doc: &ChurnFileDoc,
290    path: &Path,
291    root: &Path,
292) -> Result<ChurnEventState, String> {
293    let mut builder = ChurnFileImportBuilder::new(path, root, churn_file_future_limit());
294
295    for event in &doc.events {
296        builder.push_event(event)?;
297    }
298
299    Ok(builder.finish())
300}
301
302fn churn_file_future_limit() -> u64 {
303    let now_secs = std::time::SystemTime::now()
304        .duration_since(std::time::UNIX_EPOCH)
305        .unwrap_or_default()
306        .as_secs();
307    now_secs.saturating_add(MAX_FUTURE_TIMESTAMP_SECS)
308}
309
310struct ChurnFileImportBuilder<'a> {
311    path: &'a Path,
312    root: &'a Path,
313    future_limit: u64,
314    files: FxHashMap<PathBuf, FileEvents>,
315    author_pool: Vec<String>,
316    author_index: FxHashMap<String, u32>,
317}
318
319impl<'a> ChurnFileImportBuilder<'a> {
320    fn new(path: &'a Path, root: &'a Path, future_limit: u64) -> Self {
321        Self {
322            path,
323            root,
324            future_limit,
325            files: FxHashMap::default(),
326            author_pool: Vec::new(),
327            author_index: FxHashMap::default(),
328        }
329    }
330
331    fn push_event(&mut self, event: &ChurnFileEvent) -> Result<(), String> {
332        let rel = normalize_churn_event_path(self.path, &event.path)?;
333        validate_churn_event_timestamp(self.path, event.timestamp, self.future_limit, &rel)?;
334
335        let abs_path = self.root.join(&rel);
336        let author_idx = self.intern_author(event.author.as_deref());
337        self.files
338            .entry(abs_path)
339            .or_insert_with(|| FileEvents { events: Vec::new() })
340            .events
341            .push(CachedCommitEvent {
342                timestamp: event.timestamp,
343                lines_added: event.added,
344                lines_deleted: event.deleted,
345                author_idx,
346            });
347        Ok(())
348    }
349
350    fn intern_author(&mut self, author: Option<&str>) -> Option<u32> {
351        author
352            .map(str::trim)
353            .filter(|email| !email.is_empty())
354            .map(|email| intern_author(email, &mut self.author_pool, &mut self.author_index))
355    }
356
357    fn finish(self) -> ChurnEventState {
358        ChurnEventState {
359            files: self.files,
360            author_pool: self.author_pool,
361        }
362    }
363}
364
365fn normalize_churn_event_path(path: &Path, event_path: &str) -> Result<String, String> {
366    let normalized = event_path.replace('\\', "/");
367    let rel = normalized.trim();
368    if rel.is_empty() {
369        return Err(format!(
370            "churn file {} has an event with an empty path",
371            path.display()
372        ));
373    }
374    Ok(rel.to_string())
375}
376
377fn validate_churn_event_timestamp(
378    path: &Path,
379    timestamp: u64,
380    future_limit: u64,
381    rel: &str,
382) -> Result<(), String> {
383    if timestamp <= future_limit {
384        return Ok(());
385    }
386
387    Err(format!(
388        "churn file {} has event timestamp {} for \"{rel}\" more than a year in the \
389         future; timestamps must be unix SECONDS (not milliseconds), UTC",
390        path.display(),
391        timestamp
392    ))
393}
394
395/// Check if the repository is a shallow clone.
396#[must_use]
397pub fn is_shallow_clone(root: &Path) -> bool {
398    let mut command = crate::spawn::git();
399    command
400        .args(["rev-parse", "--is-shallow-repository"])
401        .current_dir(root);
402    command.output().is_ok_and(|o| {
403        String::from_utf8_lossy(&o.stdout)
404            .trim()
405            .eq_ignore_ascii_case("true")
406    })
407}
408
409/// Check if the directory is inside a git repository.
410#[must_use]
411pub fn is_git_repo(root: &Path) -> bool {
412    let mut command = crate::spawn::git();
413    command
414        .args(["rev-parse", "--git-dir"])
415        .current_dir(root)
416        .stdout(std::process::Stdio::null())
417        .stderr(std::process::Stdio::null());
418    command.status().is_ok_and(|s| s.success())
419}
420
421/// Maximum size of a churn cache file (64 MB). The incremental cache stores
422/// per-commit events, so it needs more headroom than the old aggregate rows.
423const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
424
425/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
426/// changes so older payloads are rejected on load. Bumped to 3 when the cache
427/// switched from aggregate rows to per-commit events for incremental updates.
428const CHURN_CACHE_VERSION: u8 = 3;
429
430/// Serializable per-commit event for the disk cache.
431#[derive(Clone, bitcode::Encode, bitcode::Decode)]
432struct CachedCommitEvent {
433    timestamp: u64,
434    lines_added: u32,
435    lines_deleted: u32,
436    author_idx: Option<u32>,
437}
438
439/// Serializable per-file churn entry for the disk cache.
440#[derive(Clone, bitcode::Encode, bitcode::Decode)]
441struct CachedFileChurn {
442    path: String,
443    events: Vec<CachedCommitEvent>,
444}
445
446/// Cached churn data keyed by last indexed SHA and since string.
447#[derive(Clone, bitcode::Encode, bitcode::Decode)]
448struct ChurnCache {
449    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
450    version: u8,
451    last_indexed_sha: String,
452    git_after: String,
453    files: Vec<CachedFileChurn>,
454    shallow_clone: bool,
455    /// Author email pool referenced by [`CachedCommitEvent::author_idx`].
456    author_pool: Vec<String>,
457}
458
459/// Per-file commit events retained in memory while building or updating churn.
460struct FileEvents {
461    events: Vec<CachedCommitEvent>,
462}
463
464/// Event-level churn state. Unlike [`ChurnResult`], this preserves commit
465/// timestamps so a cache can merge new commits and recompute trend/recency.
466struct ChurnEventState {
467    files: FxHashMap<PathBuf, FileEvents>,
468    author_pool: Vec<String>,
469}
470
471/// Get the full HEAD SHA for cache keying.
472fn get_head_sha(root: &Path) -> Option<String> {
473    let mut command = crate::spawn::git();
474    command.args(["rev-parse", "HEAD"]).current_dir(root);
475    command
476        .output()
477        .ok()
478        .filter(|o| o.status.success())
479        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
480}
481
482/// Check whether `ancestor` is still reachable from `descendant`.
483fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
484    let mut command = crate::spawn::git();
485    command
486        .args(["merge-base", "--is-ancestor", ancestor, descendant])
487        .current_dir(root);
488    command.status().is_ok_and(|s| s.success())
489}
490
491/// Try to load churn data from disk cache. Returns `None` on cache miss
492/// or version mismatch.
493fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
494    let cache_file = cache_dir.join("churn.bin");
495    let data = std::fs::read(&cache_file).ok()?;
496    if data.len() > MAX_CHURN_CACHE_SIZE {
497        return None;
498    }
499    let cache: ChurnCache = bitcode::decode(&data).ok()?;
500    if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
501        return None;
502    }
503    Some(cache)
504}
505
506/// Save churn data to disk cache.
507fn save_churn_cache(
508    cache_dir: &Path,
509    last_indexed_sha: &str,
510    git_after: &str,
511    state: &ChurnEventState,
512    shallow_clone: bool,
513) {
514    let files: Vec<CachedFileChurn> = state
515        .files
516        .iter()
517        .map(|f| CachedFileChurn {
518            path: f.0.to_string_lossy().to_string(),
519            events: f.1.events.clone(),
520        })
521        .collect();
522    let cache = ChurnCache {
523        version: CHURN_CACHE_VERSION,
524        last_indexed_sha: last_indexed_sha.to_string(),
525        git_after: git_after.to_string(),
526        files,
527        shallow_clone,
528        author_pool: state.author_pool.clone(),
529    };
530    let _ = std::fs::create_dir_all(cache_dir);
531    let data = bitcode::encode(&cache);
532    let tmp = cache_dir.join("churn.bin.tmp");
533    if std::fs::write(&tmp, data).is_ok() {
534        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
535    }
536}
537
538/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
539/// since duration match. If HEAD advanced from the cached SHA, runs an
540/// incremental `git log <cached>..HEAD --numstat` scan and merges it.
541///
542/// Returns `(ChurnResult, bool)` where the bool indicates whether reusable
543/// cache state was used.
544/// Returns `None` if git analysis fails.
545pub fn analyze_churn_cached(
546    root: &Path,
547    since: &SinceDuration,
548    cache_dir: &Path,
549    no_cache: bool,
550) -> Option<(ChurnResult, bool)> {
551    let head_sha = get_head_sha(root)?;
552
553    if !no_cache && let Some(result) = try_reuse_churn_cache(root, since, cache_dir, &head_sha) {
554        return Some((result, true));
555    }
556
557    analyze_fresh_churn(root, since, cache_dir, no_cache, &head_sha).map(|result| (result, false))
558}
559
560fn try_reuse_churn_cache(
561    root: &Path,
562    since: &SinceDuration,
563    cache_dir: &Path,
564    head_sha: &str,
565) -> Option<ChurnResult> {
566    let cache = load_churn_cache(cache_dir, &since.git_after)?;
567    if cache.last_indexed_sha == head_sha {
568        let shallow_clone = cache.shallow_clone;
569        return Some(build_churn_result(cache.into_event_state(), shallow_clone));
570    }
571
572    if !is_ancestor(root, &cache.last_indexed_sha, head_sha) {
573        return None;
574    }
575
576    extend_churn_cache(root, since, cache_dir, head_sha, cache)
577}
578
579fn extend_churn_cache(
580    root: &Path,
581    since: &SinceDuration,
582    cache_dir: &Path,
583    head_sha: &str,
584    cache: ChurnCache,
585) -> Option<ChurnResult> {
586    let shallow_clone = is_shallow_clone(root);
587    let range = format!("{}..HEAD", cache.last_indexed_sha);
588    let delta = analyze_churn_events(root, since, Some(&range))?;
589    let mut state = cache.into_event_state();
590    merge_churn_states(&mut state, delta);
591    save_churn_cache(cache_dir, head_sha, &since.git_after, &state, shallow_clone);
592    Some(build_churn_result(state, shallow_clone))
593}
594
595fn analyze_fresh_churn(
596    root: &Path,
597    since: &SinceDuration,
598    cache_dir: &Path,
599    no_cache: bool,
600    head_sha: &str,
601) -> Option<ChurnResult> {
602    let shallow_clone = is_shallow_clone(root);
603    let state = analyze_churn_events(root, since, None)?;
604    if !no_cache {
605        save_churn_cache(cache_dir, head_sha, &since.git_after, &state, shallow_clone);
606    }
607
608    Some(build_churn_result(state, shallow_clone))
609}
610
611impl ChurnCache {
612    fn into_event_state(self) -> ChurnEventState {
613        let files = self
614            .files
615            .into_iter()
616            .map(|entry| {
617                (
618                    PathBuf::from(entry.path),
619                    FileEvents {
620                        events: entry.events,
621                    },
622                )
623            })
624            .collect();
625        ChurnEventState {
626            files,
627            author_pool: self.author_pool,
628        }
629    }
630}
631
632/// Run `git log --numstat` and return event-level churn state.
633fn analyze_churn_events(
634    root: &Path,
635    since: &SinceDuration,
636    revision_range: Option<&str>,
637) -> Option<ChurnEventState> {
638    let mut command = crate::spawn::git();
639    command.arg("log");
640    if let Some(range) = revision_range {
641        command.arg(range);
642    }
643    command
644        .args([
645            "--numstat",
646            "--no-merges",
647            "--no-renames",
648            "--use-mailmap",
649            "--format=format:%at|%ae",
650            &format!("--after={}", since.git_after),
651        ])
652        .current_dir(root);
653
654    let output = match spawn_output(&mut command) {
655        Ok(o) => o,
656        Err(e) => {
657            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
658            return None;
659        }
660    };
661
662    if !output.status.success() {
663        let stderr = String::from_utf8_lossy(&output.stderr);
664        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
665        return None;
666    }
667
668    let stdout = String::from_utf8_lossy(&output.stdout);
669    Some(parse_git_log_events(&stdout, root))
670}
671
672/// Merge new churn events into cached event state.
673fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
674    let mut base_author_index: FxHashMap<String, u32> = base
675        .author_pool
676        .iter()
677        .enumerate()
678        .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
679        .collect();
680
681    let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
682    for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
683        let Ok(old_idx) = u32::try_from(old_idx) else {
684            continue;
685        };
686        let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
687        author_mapping.insert(old_idx, new_idx);
688    }
689
690    for (path, mut file) in delta.files {
691        for event in &mut file.events {
692            event.author_idx = event
693                .author_idx
694                .and_then(|idx| author_mapping.get(&idx).copied());
695        }
696        base.files
697            .entry(path)
698            .and_modify(|existing| existing.events.append(&mut file.events))
699            .or_insert(file);
700    }
701}
702
703/// Parse `git log --numstat --format=format:%at|%ae` output into events.
704fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
705    let now_secs = std::time::SystemTime::now()
706        .duration_since(std::time::UNIX_EPOCH)
707        .unwrap_or_default()
708        .as_secs();
709
710    let mut parser = GitLogEventParser::new(root, now_secs);
711
712    for line in stdout.lines() {
713        parser.consume_line(line);
714    }
715
716    parser.finish()
717}
718
719struct GitLogEventParser<'a> {
720    root: &'a Path,
721    now_secs: u64,
722    files: FxHashMap<PathBuf, FileEvents>,
723    author_pool: Vec<String>,
724    author_index: FxHashMap<String, u32>,
725    current_timestamp: Option<u64>,
726    current_author_idx: Option<u32>,
727}
728
729impl<'a> GitLogEventParser<'a> {
730    fn new(root: &'a Path, now_secs: u64) -> Self {
731        Self {
732            root,
733            now_secs,
734            files: FxHashMap::default(),
735            author_pool: Vec::new(),
736            author_index: FxHashMap::default(),
737            current_timestamp: None,
738            current_author_idx: None,
739        }
740    }
741
742    fn consume_line(&mut self, line: &str) {
743        let line = line.trim();
744        if line.is_empty() {
745            return;
746        }
747
748        if self.record_commit_header(line) {
749            return;
750        }
751        if self.record_legacy_timestamp(line) {
752            return;
753        }
754        self.record_numstat(line);
755    }
756
757    fn record_commit_header(&mut self, line: &str) -> bool {
758        let Some((ts_str, email)) = line.split_once('|') else {
759            return false;
760        };
761        let Ok(ts) = ts_str.parse::<u64>() else {
762            return false;
763        };
764
765        self.current_timestamp = Some(ts);
766        self.current_author_idx = Some(intern_author(
767            email,
768            &mut self.author_pool,
769            &mut self.author_index,
770        ));
771        true
772    }
773
774    fn record_legacy_timestamp(&mut self, line: &str) -> bool {
775        let Ok(ts) = line.parse::<u64>() else {
776            return false;
777        };
778
779        self.current_timestamp = Some(ts);
780        self.current_author_idx = None;
781        true
782    }
783
784    fn record_numstat(&mut self, line: &str) {
785        let Some((added, deleted, path)) = parse_numstat_line(line) else {
786            return;
787        };
788
789        let ts = self.current_timestamp.unwrap_or(self.now_secs);
790        self.files
791            .entry(self.root.join(path))
792            .or_insert_with(|| FileEvents { events: Vec::new() })
793            .events
794            .push(CachedCommitEvent {
795                timestamp: ts,
796                lines_added: added,
797                lines_deleted: deleted,
798                author_idx: self.current_author_idx,
799            });
800    }
801
802    fn finish(self) -> ChurnEventState {
803        ChurnEventState {
804            files: self.files,
805            author_pool: self.author_pool,
806        }
807    }
808}
809
810/// Aggregate one file's raw commit events into a [`FileChurn`], applying
811/// recency weighting, trend detection, and per-author accumulation.
812#[expect(
813    clippy::cast_possible_truncation,
814    reason = "commit count per file is bounded by git history depth"
815)]
816fn aggregate_file_churn(path: PathBuf, file: FileEvents, now_secs: u64) -> FileChurn {
817    let mut timestamps = Vec::with_capacity(file.events.len());
818    let mut weighted_commits = 0.0;
819    let mut lines_added = 0;
820    let mut lines_deleted = 0;
821    let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
822
823    for event in file.events {
824        timestamps.push(event.timestamp);
825        let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
826        let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
827        weighted_commits += weight;
828        lines_added += event.lines_added;
829        lines_deleted += event.lines_deleted;
830        accumulate_author(&mut authors, event.author_idx, weight, event.timestamp);
831    }
832
833    let commits = timestamps.len() as u32;
834    let trend = compute_trend(&timestamps);
835    for c in authors.values_mut() {
836        c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
837    }
838    FileChurn {
839        path,
840        commits,
841        weighted_commits: (weighted_commits * 100.0).round() / 100.0,
842        lines_added,
843        lines_deleted,
844        trend,
845        authors,
846    }
847}
848
849/// Fold a single commit's author contribution into the per-author map.
850fn accumulate_author(
851    authors: &mut FxHashMap<u32, AuthorContribution>,
852    author_idx: Option<u32>,
853    weight: f64,
854    timestamp: u64,
855) {
856    let Some(idx) = author_idx else {
857        return;
858    };
859    authors
860        .entry(idx)
861        .and_modify(|c| {
862            c.commits += 1;
863            c.weighted_commits += weight;
864            c.first_commit_ts = c.first_commit_ts.min(timestamp);
865            c.last_commit_ts = c.last_commit_ts.max(timestamp);
866        })
867        .or_insert(AuthorContribution {
868            commits: 1,
869            weighted_commits: weight,
870            first_commit_ts: timestamp,
871            last_commit_ts: timestamp,
872        });
873}
874
875/// Convert event-level churn state into the public aggregate result.
876fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
877    let now_secs = std::time::SystemTime::now()
878        .duration_since(std::time::UNIX_EPOCH)
879        .unwrap_or_default()
880        .as_secs();
881
882    let files = state
883        .files
884        .into_iter()
885        .map(|(path, file)| {
886            let churn = aggregate_file_churn(path.clone(), file, now_secs);
887            (path, churn)
888        })
889        .collect();
890
891    ChurnResult {
892        files,
893        shallow_clone,
894        author_pool: state.author_pool,
895    }
896}
897
898/// Parse `git log --numstat --format=format:%at|%ae` output.
899///
900/// Returns a per-file churn map plus the author email pool referenced by
901/// interned indices in [`FileChurn::authors`].
902#[cfg(test)]
903fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
904    let result = build_churn_result(parse_git_log_events(stdout, root), false);
905    (result.files, result.author_pool)
906}
907
908/// Intern an author email into the pool, returning its stable index.
909fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
910    if let Some(&idx) = index.get(email) {
911        return idx;
912    }
913    #[expect(
914        clippy::cast_possible_truncation,
915        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
916    )]
917    let idx = pool.len() as u32;
918    let owned = email.to_string();
919    index.insert(owned.clone(), idx);
920    pool.push(owned);
921    idx
922}
923
924/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
925/// Binary files show as `"-\t-\tpath"` — skip those.
926fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
927    let mut parts = line.splitn(3, '\t');
928    let added_str = parts.next()?;
929    let deleted_str = parts.next()?;
930    let path = parts.next()?;
931
932    let added: u32 = added_str.parse().ok()?;
933    let deleted: u32 = deleted_str.parse().ok()?;
934
935    Some((added, deleted, path))
936}
937
938/// Compute churn trend by splitting commits into two temporal halves.
939///
940/// Finds the midpoint between the oldest and newest commit timestamps,
941/// then compares commit counts in each half:
942/// - Recent > 1.5× older → Accelerating
943/// - Recent < 0.67× older → Cooling
944/// - Otherwise → Stable
945fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
946    if timestamps.len() < 2 {
947        return ChurnTrend::Stable;
948    }
949
950    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
951    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
952
953    if max_ts == min_ts {
954        return ChurnTrend::Stable;
955    }
956
957    let midpoint = min_ts + (max_ts - min_ts) / 2;
958    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
959    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
960
961    if older < 1.0 {
962        return ChurnTrend::Stable;
963    }
964
965    let ratio = recent / older;
966    if ratio > 1.5 {
967        ChurnTrend::Accelerating
968    } else if ratio < 0.67 {
969        ChurnTrend::Cooling
970    } else {
971        ChurnTrend::Stable
972    }
973}
974
975fn is_iso_date(input: &str) -> bool {
976    input.len() == 10
977        && input.as_bytes().get(4) == Some(&b'-')
978        && input.as_bytes().get(7) == Some(&b'-')
979        && input[..4].bytes().all(|b| b.is_ascii_digit())
980        && input[5..7].bytes().all(|b| b.is_ascii_digit())
981        && input[8..10].bytes().all(|b| b.is_ascii_digit())
982}
983
984fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
985    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
986        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
987    })?;
988    if pos == 0 {
989        return Err(format!(
990            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
991        ));
992    }
993    Ok((&input[..pos], &input[pos..]))
994}
995
996#[cfg(test)]
997mod tests {
998    use super::*;
999
1000    #[test]
1001    fn parse_since_months_short() {
1002        let d = parse_since("6m").unwrap();
1003        assert_eq!(d.git_after, "6 months ago");
1004        assert_eq!(d.display, "6 months");
1005    }
1006
1007    #[test]
1008    fn parse_since_months_long() {
1009        let d = parse_since("6months").unwrap();
1010        assert_eq!(d.git_after, "6 months ago");
1011        assert_eq!(d.display, "6 months");
1012    }
1013
1014    #[test]
1015    fn parse_since_days() {
1016        let d = parse_since("90d").unwrap();
1017        assert_eq!(d.git_after, "90 days ago");
1018        assert_eq!(d.display, "90 days");
1019    }
1020
1021    #[test]
1022    fn parse_since_year_singular() {
1023        let d = parse_since("1y").unwrap();
1024        assert_eq!(d.git_after, "1 year ago");
1025        assert_eq!(d.display, "1 year");
1026    }
1027
1028    #[test]
1029    fn parse_since_years_plural() {
1030        let d = parse_since("2years").unwrap();
1031        assert_eq!(d.git_after, "2 years ago");
1032        assert_eq!(d.display, "2 years");
1033    }
1034
1035    #[test]
1036    fn parse_since_weeks() {
1037        let d = parse_since("2w").unwrap();
1038        assert_eq!(d.git_after, "2 weeks ago");
1039        assert_eq!(d.display, "2 weeks");
1040    }
1041
1042    #[test]
1043    fn parse_since_iso_date() {
1044        let d = parse_since("2025-06-01").unwrap();
1045        assert_eq!(d.git_after, "2025-06-01");
1046        assert_eq!(d.display, "2025-06-01");
1047    }
1048
1049    #[test]
1050    fn parse_since_month_singular() {
1051        let d = parse_since("1month").unwrap();
1052        assert_eq!(d.display, "1 month");
1053    }
1054
1055    #[test]
1056    fn parse_since_day_singular() {
1057        let d = parse_since("1day").unwrap();
1058        assert_eq!(d.display, "1 day");
1059    }
1060
1061    #[test]
1062    fn parse_since_zero_rejected() {
1063        assert!(parse_since("0m").is_err());
1064    }
1065
1066    #[test]
1067    fn parse_since_no_unit_rejected() {
1068        assert!(parse_since("90").is_err());
1069    }
1070
1071    #[test]
1072    fn parse_since_unknown_unit_rejected() {
1073        assert!(parse_since("6x").is_err());
1074    }
1075
1076    #[test]
1077    fn parse_since_no_number_rejected() {
1078        assert!(parse_since("months").is_err());
1079    }
1080
1081    #[test]
1082    fn numstat_normal() {
1083        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
1084        assert_eq!(a, 10);
1085        assert_eq!(d, 5);
1086        assert_eq!(p, "src/file.ts");
1087    }
1088
1089    #[test]
1090    fn numstat_binary_skipped() {
1091        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
1092    }
1093
1094    #[test]
1095    fn numstat_zero_lines() {
1096        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
1097        assert_eq!(a, 0);
1098        assert_eq!(d, 0);
1099        assert_eq!(p, "src/empty.ts");
1100    }
1101
1102    #[test]
1103    fn trend_empty_is_stable() {
1104        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
1105    }
1106
1107    #[test]
1108    fn trend_single_commit_is_stable() {
1109        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
1110    }
1111
1112    #[test]
1113    fn trend_accelerating() {
1114        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
1115        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
1116    }
1117
1118    #[test]
1119    fn trend_cooling() {
1120        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
1121        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
1122    }
1123
1124    #[test]
1125    fn trend_stable_even_distribution() {
1126        let timestamps = vec![100, 200, 300, 700, 800, 900];
1127        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1128    }
1129
1130    #[test]
1131    fn trend_same_timestamp_is_stable() {
1132        let timestamps = vec![500, 500, 500];
1133        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1134    }
1135
1136    #[test]
1137    fn iso_date_valid() {
1138        assert!(is_iso_date("2025-06-01"));
1139        assert!(is_iso_date("2025-12-31"));
1140    }
1141
1142    #[test]
1143    fn iso_date_with_time_rejected() {
1144        assert!(!is_iso_date("2025-06-01T00:00:00"));
1145    }
1146
1147    #[test]
1148    fn iso_date_invalid() {
1149        assert!(!is_iso_date("6months"));
1150        assert!(!is_iso_date("2025"));
1151        assert!(!is_iso_date("not-a-date"));
1152        assert!(!is_iso_date("abcd-ef-gh"));
1153    }
1154
1155    #[test]
1156    fn trend_display() {
1157        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
1158        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
1159        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
1160    }
1161
1162    #[test]
1163    fn parse_git_log_single_commit() {
1164        let root = Path::new("/project");
1165        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1166        let (result, _) = parse_git_log(output, root);
1167        assert_eq!(result.len(), 1);
1168        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1169        assert_eq!(churn.commits, 1);
1170        assert_eq!(churn.lines_added, 10);
1171        assert_eq!(churn.lines_deleted, 5);
1172    }
1173
1174    #[test]
1175    fn parse_git_log_multiple_commits_same_file() {
1176        let root = Path::new("/project");
1177        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
1178        let (result, _) = parse_git_log(output, root);
1179        assert_eq!(result.len(), 1);
1180        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1181        assert_eq!(churn.commits, 2);
1182        assert_eq!(churn.lines_added, 13);
1183        assert_eq!(churn.lines_deleted, 7);
1184    }
1185
1186    #[test]
1187    fn parse_git_log_multiple_files() {
1188        let root = Path::new("/project");
1189        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
1190        let (result, _) = parse_git_log(output, root);
1191        assert_eq!(result.len(), 2);
1192        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
1193        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
1194    }
1195
1196    #[test]
1197    fn parse_git_log_empty_output() {
1198        let root = Path::new("/project");
1199        let (result, _) = parse_git_log("", root);
1200        assert!(result.is_empty());
1201    }
1202
1203    #[test]
1204    fn parse_git_log_skips_binary_files() {
1205        let root = Path::new("/project");
1206        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
1207        let (result, _) = parse_git_log(output, root);
1208        assert_eq!(result.len(), 1);
1209        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
1210    }
1211
1212    #[test]
1213    fn parse_git_log_weighted_commits_are_positive() {
1214        let root = Path::new("/project");
1215        let now_secs = std::time::SystemTime::now()
1216            .duration_since(std::time::UNIX_EPOCH)
1217            .unwrap()
1218            .as_secs();
1219        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
1220        let (result, _) = parse_git_log(&output, root);
1221        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1222        assert!(
1223            churn.weighted_commits > 0.0,
1224            "weighted_commits should be positive for recent commits"
1225        );
1226    }
1227
1228    #[test]
1229    fn trend_boundary_1_5x_ratio() {
1230        let timestamps = vec![100, 200, 600, 800, 1000];
1231        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1232    }
1233
1234    #[test]
1235    fn trend_just_above_1_5x() {
1236        let timestamps = vec![100, 600, 800, 1000];
1237        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
1238    }
1239
1240    #[test]
1241    fn trend_boundary_0_67x_ratio() {
1242        let timestamps = vec![100, 200, 300, 600, 1000];
1243        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
1244    }
1245
1246    #[test]
1247    fn trend_two_timestamps_different() {
1248        let timestamps = vec![100, 200];
1249        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1250    }
1251
1252    #[test]
1253    fn parse_since_week_singular() {
1254        let d = parse_since("1week").unwrap();
1255        assert_eq!(d.git_after, "1 week ago");
1256        assert_eq!(d.display, "1 week");
1257    }
1258
1259    #[test]
1260    fn parse_since_weeks_long() {
1261        let d = parse_since("3weeks").unwrap();
1262        assert_eq!(d.git_after, "3 weeks ago");
1263        assert_eq!(d.display, "3 weeks");
1264    }
1265
1266    #[test]
1267    fn parse_since_days_long() {
1268        let d = parse_since("30days").unwrap();
1269        assert_eq!(d.git_after, "30 days ago");
1270        assert_eq!(d.display, "30 days");
1271    }
1272
1273    #[test]
1274    fn parse_since_year_long() {
1275        let d = parse_since("1year").unwrap();
1276        assert_eq!(d.git_after, "1 year ago");
1277        assert_eq!(d.display, "1 year");
1278    }
1279
1280    #[test]
1281    fn parse_since_overflow_number_rejected() {
1282        let result = parse_since("99999999999999999999d");
1283        assert!(result.is_err());
1284        let err = result.unwrap_err();
1285        assert!(err.contains("invalid number"));
1286    }
1287
1288    #[test]
1289    fn parse_since_zero_days_rejected() {
1290        assert!(parse_since("0d").is_err());
1291    }
1292
1293    #[test]
1294    fn parse_since_zero_weeks_rejected() {
1295        assert!(parse_since("0w").is_err());
1296    }
1297
1298    #[test]
1299    fn parse_since_zero_years_rejected() {
1300        assert!(parse_since("0y").is_err());
1301    }
1302
1303    #[test]
1304    fn numstat_missing_path() {
1305        assert!(parse_numstat_line("10\t5").is_none());
1306    }
1307
1308    #[test]
1309    fn numstat_single_field() {
1310        assert!(parse_numstat_line("10").is_none());
1311    }
1312
1313    #[test]
1314    fn numstat_empty_string() {
1315        assert!(parse_numstat_line("").is_none());
1316    }
1317
1318    #[test]
1319    fn numstat_only_added_is_binary() {
1320        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1321    }
1322
1323    #[test]
1324    fn numstat_only_deleted_is_binary() {
1325        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1326    }
1327
1328    #[test]
1329    fn numstat_path_with_spaces() {
1330        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1331        assert_eq!(a, 3);
1332        assert_eq!(d, 1);
1333        assert_eq!(p, "path with spaces/file.ts");
1334    }
1335
1336    #[test]
1337    fn numstat_large_numbers() {
1338        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1339        assert_eq!(a, 9999);
1340        assert_eq!(d, 8888);
1341        assert_eq!(p, "src/big.ts");
1342    }
1343
1344    #[test]
1345    fn iso_date_wrong_separator_positions() {
1346        assert!(!is_iso_date("20-25-0601"));
1347        assert!(!is_iso_date("202506-01-"));
1348    }
1349
1350    #[test]
1351    fn iso_date_too_short() {
1352        assert!(!is_iso_date("2025-06-0"));
1353    }
1354
1355    #[test]
1356    fn iso_date_letters_in_day() {
1357        assert!(!is_iso_date("2025-06-ab"));
1358    }
1359
1360    #[test]
1361    fn iso_date_letters_in_month() {
1362        assert!(!is_iso_date("2025-ab-01"));
1363    }
1364
1365    #[test]
1366    fn split_number_unit_valid() {
1367        let (num, unit) = split_number_unit("42days").unwrap();
1368        assert_eq!(num, "42");
1369        assert_eq!(unit, "days");
1370    }
1371
1372    #[test]
1373    fn split_number_unit_single_digit() {
1374        let (num, unit) = split_number_unit("1m").unwrap();
1375        assert_eq!(num, "1");
1376        assert_eq!(unit, "m");
1377    }
1378
1379    #[test]
1380    fn split_number_unit_no_digits() {
1381        let err = split_number_unit("abc").unwrap_err();
1382        assert!(err.contains("must start with a number"));
1383    }
1384
1385    #[test]
1386    fn split_number_unit_no_unit() {
1387        let err = split_number_unit("123").unwrap_err();
1388        assert!(err.contains("requires a unit suffix"));
1389    }
1390
1391    #[test]
1392    fn parse_git_log_numstat_before_timestamp_uses_now() {
1393        let root = Path::new("/project");
1394        let output = "10\t5\tsrc/no_ts.ts\n";
1395        let (result, _) = parse_git_log(output, root);
1396        assert_eq!(result.len(), 1);
1397        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1398        assert_eq!(churn.commits, 1);
1399        assert_eq!(churn.lines_added, 10);
1400        assert_eq!(churn.lines_deleted, 5);
1401        assert!(
1402            churn.weighted_commits > 0.9,
1403            "weight should be near 1.0 when timestamp defaults to now"
1404        );
1405    }
1406
1407    #[test]
1408    fn parse_git_log_whitespace_lines_ignored() {
1409        let root = Path::new("/project");
1410        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1411        let (result, _) = parse_git_log(output, root);
1412        assert_eq!(result.len(), 1);
1413    }
1414
1415    #[test]
1416    fn parse_git_log_trend_is_computed_per_file() {
1417        let root = Path::new("/project");
1418        let output = "\
14191000\n5\t1\tsrc/old.ts\n\
14202000\n3\t1\tsrc/old.ts\n\
14211000\n1\t0\tsrc/hot.ts\n\
14221800\n1\t0\tsrc/hot.ts\n\
14231900\n1\t0\tsrc/hot.ts\n\
14241950\n1\t0\tsrc/hot.ts\n\
14252000\n1\t0\tsrc/hot.ts\n";
1426        let (result, _) = parse_git_log(output, root);
1427        let old = &result[&PathBuf::from("/project/src/old.ts")];
1428        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1429        assert_eq!(old.commits, 2);
1430        assert_eq!(hot.commits, 5);
1431        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1432    }
1433
1434    #[test]
1435    fn parse_git_log_weighted_decay_for_old_commits() {
1436        let root = Path::new("/project");
1437        let now = std::time::SystemTime::now()
1438            .duration_since(std::time::UNIX_EPOCH)
1439            .unwrap()
1440            .as_secs();
1441        let old_ts = now - (180 * 86_400);
1442        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1443        let (result, _) = parse_git_log(&output, root);
1444        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1445        assert!(
1446            churn.weighted_commits < 0.5,
1447            "180-day-old commit should weigh ~0.25, got {}",
1448            churn.weighted_commits
1449        );
1450        assert!(
1451            churn.weighted_commits > 0.1,
1452            "180-day-old commit should weigh ~0.25, got {}",
1453            churn.weighted_commits
1454        );
1455    }
1456
1457    #[test]
1458    fn parse_git_log_path_stored_as_absolute() {
1459        let root = Path::new("/my/project");
1460        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1461        let (result, _) = parse_git_log(output, root);
1462        let key = PathBuf::from("/my/project/lib/utils.ts");
1463        assert!(result.contains_key(&key));
1464        assert_eq!(result[&key].path, key);
1465    }
1466
1467    #[test]
1468    fn parse_git_log_weighted_commits_rounded() {
1469        let root = Path::new("/project");
1470        let now = std::time::SystemTime::now()
1471            .duration_since(std::time::UNIX_EPOCH)
1472            .unwrap()
1473            .as_secs();
1474        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1475        let (result, _) = parse_git_log(&output, root);
1476        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1477        let decimals = format!("{:.2}", churn.weighted_commits);
1478        assert_eq!(
1479            churn.weighted_commits.to_string().len(),
1480            decimals.len().min(churn.weighted_commits.to_string().len()),
1481            "weighted_commits should be rounded to at most 2 decimal places"
1482        );
1483    }
1484
1485    #[test]
1486    fn trend_serde_serialization() {
1487        assert_eq!(
1488            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1489            "\"accelerating\""
1490        );
1491        assert_eq!(
1492            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1493            "\"stable\""
1494        );
1495        assert_eq!(
1496            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1497            "\"cooling\""
1498        );
1499    }
1500
1501    #[test]
1502    fn parse_git_log_extracts_author_email() {
1503        let root = Path::new("/project");
1504        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1505        let (result, pool) = parse_git_log(output, root);
1506        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1507        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1508        assert_eq!(churn.authors.len(), 1);
1509        let alice = &churn.authors[&0];
1510        assert_eq!(alice.commits, 1);
1511        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1512        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1513    }
1514
1515    #[test]
1516    fn parse_git_log_intern_dedupes_authors() {
1517        let root = Path::new("/project");
1518        let output = "\
15191700000000|alice@example.com
15201\t0\ta.ts
15211700100000|bob@example.com
15222\t1\tb.ts
15231700200000|alice@example.com
15243\t2\tc.ts
1525";
1526        let (_result, pool) = parse_git_log(output, root);
1527        assert_eq!(pool.len(), 2);
1528        assert!(pool.contains(&"alice@example.com".to_string()));
1529        assert!(pool.contains(&"bob@example.com".to_string()));
1530    }
1531
1532    #[test]
1533    fn parse_git_log_aggregates_per_author() {
1534        let root = Path::new("/project");
1535        let output = "\
15361700000000|alice@example.com
15371\t0\tsrc/index.ts
15381700100000|bob@example.com
15392\t0\tsrc/index.ts
15401700200000|alice@example.com
15411\t1\tsrc/index.ts
1542";
1543        let (result, pool) = parse_git_log(output, root);
1544        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1545        assert_eq!(churn.commits, 3);
1546        assert_eq!(churn.authors.len(), 2);
1547
1548        let alice_idx =
1549            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1550        let alice = &churn.authors[&alice_idx];
1551        assert_eq!(alice.commits, 2);
1552        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1553        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1554    }
1555
1556    #[test]
1557    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1558        let root = Path::new("/project");
1559        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1560        let (result, pool) = parse_git_log(output, root);
1561        assert!(pool.is_empty());
1562        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1563        assert_eq!(churn.commits, 1);
1564        assert!(churn.authors.is_empty());
1565    }
1566
1567    #[test]
1568    fn intern_author_returns_existing_index() {
1569        let mut pool = Vec::new();
1570        let mut index = FxHashMap::default();
1571        let i1 = intern_author("alice@x", &mut pool, &mut index);
1572        let i2 = intern_author("alice@x", &mut pool, &mut index);
1573        assert_eq!(i1, i2);
1574        assert_eq!(pool.len(), 1);
1575    }
1576
1577    #[test]
1578    fn intern_author_assigns_sequential_indices() {
1579        let mut pool = Vec::new();
1580        let mut index = FxHashMap::default();
1581        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1582        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1583        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1584        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1585    }
1586
1587    fn git(root: &Path, args: &[&str]) {
1588        let status = std::process::Command::new("git")
1589            .args(args)
1590            .current_dir(root)
1591            .status()
1592            .expect("run git");
1593        assert!(status.success(), "git {args:?} failed");
1594    }
1595
1596    fn write(root: &Path, path: &str, contents: &str) {
1597        let path = root.join(path);
1598        std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1599        std::fs::write(path, contents).unwrap();
1600    }
1601
1602    #[test]
1603    fn cached_churn_merges_new_commits_after_head_advances() {
1604        let repo = tempfile::tempdir().expect("create repo");
1605        let root = repo.path();
1606        git(root, &["init"]);
1607        git(root, &["config", "user.email", "churn@example.test"]);
1608        git(root, &["config", "user.name", "Churn Test"]);
1609        git(root, &["config", "commit.gpgsign", "false"]);
1610
1611        write(root, "src/a.ts", "export const a = 1;\n");
1612        git(root, &["add", "."]);
1613        git(root, &["commit", "-m", "initial"]);
1614
1615        let since = parse_since("1y").unwrap();
1616        let cache = tempfile::tempdir().expect("create cache dir");
1617        let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1618        assert!(!cold_hit);
1619        let file = root.join("src/a.ts");
1620        assert_eq!(cold.files[&file].commits, 1);
1621
1622        let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1623        assert!(warm_hit);
1624
1625        write(
1626            root,
1627            "src/a.ts",
1628            "export const a = 1;\nexport const b = 2;\n",
1629        );
1630        git(root, &["add", "."]);
1631        git(root, &["commit", "-m", "update a"]);
1632        let head = get_head_sha(root).unwrap();
1633
1634        let (incremental, incremental_hit) =
1635            analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1636        assert!(incremental_hit);
1637        assert_eq!(incremental.files[&file].commits, 2);
1638
1639        let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1640        assert_eq!(cache.last_indexed_sha, head);
1641    }
1642
1643    fn write_churn_file(dir: &std::path::Path, contents: &str) -> PathBuf {
1644        let path = dir.join("churn.json");
1645        std::fs::write(&path, contents).unwrap();
1646        path
1647    }
1648
1649    #[test]
1650    fn churn_file_happy_path() {
1651        let dir = tempfile::tempdir().unwrap();
1652        let root = Path::new("/project");
1653        let path = write_churn_file(
1654            dir.path(),
1655            r#"{
1656              "schema": "fallow-churn/v1",
1657              "events": [
1658                { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1659                { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1660              ]
1661            }"#,
1662        );
1663        let result = analyze_churn_from_file(&path, root).unwrap();
1664        let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1665        assert_eq!(churn.commits, 2);
1666        assert_eq!(churn.lines_added, 13);
1667        assert_eq!(churn.lines_deleted, 7);
1668        assert_eq!(churn.authors.len(), 2);
1669        assert!(result.author_pool.contains(&"alice@corp".to_string()));
1670        assert!(result.author_pool.contains(&"bob@corp".to_string()));
1671        assert!(!result.shallow_clone);
1672    }
1673
1674    #[test]
1675    fn churn_file_matches_git_parse() {
1676        // The same events fed via git numstat and via the JSON import must
1677        // produce identical aggregate churn: the import reuses
1678        // build_churn_result, so only the SOURCE differs.
1679        let dir = tempfile::tempdir().unwrap();
1680        let root = Path::new("/project");
1681        let git_output = "1700000000|alice@corp\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n\n1700100000|bob@corp\n3\t2\tsrc/a.ts\n";
1682        let (git_files, git_pool) = parse_git_log(git_output, root);
1683
1684        let path = write_churn_file(
1685            dir.path(),
1686            r#"{
1687              "schema": "fallow-churn/v1",
1688              "events": [
1689                { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1690                { "path": "src/b.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 3, "deleted": 1 },
1691                { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1692              ]
1693            }"#,
1694        );
1695        let imported = analyze_churn_from_file(&path, root).unwrap();
1696
1697        assert_eq!(git_pool, imported.author_pool, "author pools diverge");
1698        assert_eq!(git_files.len(), imported.files.len());
1699        for (file, git_churn) in &git_files {
1700            let imp = &imported.files[file];
1701            assert_eq!(git_churn.commits, imp.commits, "commits for {file:?}");
1702            assert_eq!(git_churn.lines_added, imp.lines_added, "added for {file:?}");
1703            assert_eq!(
1704                git_churn.lines_deleted, imp.lines_deleted,
1705                "deleted for {file:?}"
1706            );
1707            assert_eq!(git_churn.trend, imp.trend, "trend for {file:?}");
1708            assert_eq!(
1709                git_churn.authors.len(),
1710                imp.authors.len(),
1711                "authors for {file:?}"
1712            );
1713            assert!(
1714                (git_churn.weighted_commits - imp.weighted_commits).abs() < 0.02,
1715                "weighted_commits for {file:?}: {} vs {}",
1716                git_churn.weighted_commits,
1717                imp.weighted_commits
1718            );
1719        }
1720    }
1721
1722    #[test]
1723    fn churn_file_empty_events_is_valid() {
1724        let dir = tempfile::tempdir().unwrap();
1725        let path = write_churn_file(
1726            dir.path(),
1727            r#"{ "schema": "fallow-churn/v1", "events": [] }"#,
1728        );
1729        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1730        assert!(result.files.is_empty());
1731        assert!(result.author_pool.is_empty());
1732    }
1733
1734    #[test]
1735    fn churn_file_missing_events_key_is_valid() {
1736        let dir = tempfile::tempdir().unwrap();
1737        let path = write_churn_file(dir.path(), r#"{ "schema": "fallow-churn/v1" }"#);
1738        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1739        assert!(result.files.is_empty());
1740    }
1741
1742    #[test]
1743    fn churn_file_bad_schema_rejected() {
1744        let dir = tempfile::tempdir().unwrap();
1745        let path = write_churn_file(
1746            dir.path(),
1747            r#"{ "schema": "fallow-churn/v2", "events": [] }"#,
1748        );
1749        let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1750        assert!(err.contains("expected \"fallow-churn/v1\""), "{err}");
1751    }
1752
1753    #[test]
1754    fn churn_file_malformed_json_rejected() {
1755        let dir = tempfile::tempdir().unwrap();
1756        let path = write_churn_file(dir.path(), "{ not json");
1757        assert!(analyze_churn_from_file(&path, Path::new("/project")).is_err());
1758    }
1759
1760    #[test]
1761    fn churn_file_missing_file_rejected() {
1762        let err = analyze_churn_from_file(Path::new("/no/such/churn.json"), Path::new("/project"))
1763            .unwrap_err();
1764        assert!(err.contains("failed to read churn file"), "{err}");
1765    }
1766
1767    #[test]
1768    fn churn_file_empty_path_rejected() {
1769        let dir = tempfile::tempdir().unwrap();
1770        let path = write_churn_file(
1771            dir.path(),
1772            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "  ", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1773        );
1774        let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1775        assert!(err.contains("empty path"), "{err}");
1776    }
1777
1778    #[test]
1779    fn churn_file_millisecond_timestamp_rejected() {
1780        let dir = tempfile::tempdir().unwrap();
1781        // 1700000000000 is milliseconds; ~52000 years in the future as seconds.
1782        let path = write_churn_file(
1783            dir.path(),
1784            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000000, "added": 1, "deleted": 0 } ] }"#,
1785        );
1786        let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1787        assert!(err.contains("milliseconds"), "{err}");
1788    }
1789
1790    #[test]
1791    fn churn_file_missing_author_contributes_no_signal() {
1792        let dir = tempfile::tempdir().unwrap();
1793        let path = write_churn_file(
1794            dir.path(),
1795            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1796        );
1797        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1798        let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1799        assert_eq!(churn.commits, 1);
1800        assert!(churn.authors.is_empty());
1801        assert!(result.author_pool.is_empty());
1802    }
1803
1804    #[test]
1805    fn churn_file_empty_author_string_treated_as_absent() {
1806        let dir = tempfile::tempdir().unwrap();
1807        let path = write_churn_file(
1808            dir.path(),
1809            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": "  ", "added": 1, "deleted": 0 } ] }"#,
1810        );
1811        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1812        assert!(result.author_pool.is_empty());
1813    }
1814
1815    #[test]
1816    fn churn_file_unknown_fields_ignored() {
1817        // Extra keys (including the reserved `commit`) are accepted and ignored,
1818        // so a wrapper carrying extra metadata stays forward-compatible.
1819        let dir = tempfile::tempdir().unwrap();
1820        let path = write_churn_file(
1821            dir.path(),
1822            r#"{ "schema": "fallow-churn/v1", "extra": true, "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 1, "deleted": 0, "commit": "abc123", "tz": "+0200" } ] }"#,
1823        );
1824        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1825        assert_eq!(result.files[&PathBuf::from("/project/src/a.ts")].commits, 1);
1826    }
1827
1828    #[test]
1829    fn churn_file_backslash_paths_normalized() {
1830        let dir = tempfile::tempdir().unwrap();
1831        let path = write_churn_file(
1832            dir.path(),
1833            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src\\a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1834        );
1835        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1836        assert!(
1837            result
1838                .files
1839                .contains_key(&PathBuf::from("/project/src/a.ts"))
1840        );
1841    }
1842}