Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::{Command, Output};
9use std::sync::OnceLock;
10
11use serde::{Deserialize, Serialize};
12
13/// Function pointer signature used by `set_spawn_hook` to intercept the
14/// `git log --numstat` subprocess. Lets the CLI route long-running git
15/// log calls through its `ScopedChild` registry so SIGINT / SIGTERM
16/// reap the subprocess instead of leaving it running after the parent
17/// exits. See `crates/cli/src/signal/` and issue #477.
18pub type ChurnSpawnHook = fn(&mut Command) -> std::io::Result<Output>;
19
20static SPAWN_HOOK: OnceLock<ChurnSpawnHook> = OnceLock::new();
21
22/// Install a spawn-hook that wraps the `git log` subprocess. Idempotent;
23/// subsequent calls are no-ops. Called once from the CLI's `main()` to
24/// route through the signal registry; defaults to `Command::output`
25/// when not set so the function-pointer indirection stays free for tests
26/// and embedders that don't care.
27pub fn set_spawn_hook(hook: ChurnSpawnHook) {
28    let _ = SPAWN_HOOK.set(hook);
29}
30
31fn spawn_output(command: &mut Command) -> std::io::Result<Output> {
32    if let Some(hook) = SPAWN_HOOK.get() {
33        hook(command)
34    } else {
35        command.output()
36    }
37}
38
39/// Number of seconds in one day.
40const SECS_PER_DAY: f64 = 86_400.0;
41
42/// Recency weight half-life in days. A commit from 90 days ago counts half
43/// as much as today's commit; 180 days ago counts 25%.
44const HALF_LIFE_DAYS: f64 = 90.0;
45
46/// Schema discriminator a `--churn-file` document must declare.
47const CHURN_FILE_SCHEMA: &str = "fallow-churn/v1";
48
49/// Upper bound on imported churn events. A file past this size is a sign of a
50/// pathological export (whole-history dump of a giant monorepo) rather than a
51/// useful hotspot window; parsing is rejected so we never allocate unbounded
52/// state from a single untrusted file. Mirrors the diff parser's
53/// `MAX_ADDED_LINES` guard in the CLI.
54const MAX_CHURN_EVENTS: usize = 5_000_000;
55
56/// Reject an imported `timestamp` more than this many seconds in the future
57/// (one year). A unix-seconds commit time is never legitimately this far ahead
58/// even with clock skew, so a value past it is almost always a millisecond
59/// timestamp (~52000 years out) or corruption. Caught loudly because the
60/// recency decay uses `saturating_sub`, so a future timestamp would otherwise
61/// clamp to age 0, give every commit full weight, and silently collapse the
62/// recency signal that distinguishes recent from old churn.
63const MAX_FUTURE_TIMESTAMP_SECS: u64 = 365 * 24 * 60 * 60;
64
65/// Parsed duration for the `--since` flag.
66#[derive(Debug, Clone)]
67pub struct SinceDuration {
68    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
69    pub git_after: String,
70    /// Human-readable display string (e.g., `"6 months"`).
71    pub display: String,
72}
73
74/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
75#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
76#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
77#[serde(rename_all = "snake_case")]
78pub enum ChurnTrend {
79    /// Recent half has >1.5× the commits of the older half.
80    Accelerating,
81    /// Churn is roughly stable between halves.
82    Stable,
83    /// Recent half has <0.67× the commits of the older half.
84    Cooling,
85}
86
87impl std::fmt::Display for ChurnTrend {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        match self {
90            Self::Accelerating => write!(f, "accelerating"),
91            Self::Stable => write!(f, "stable"),
92            Self::Cooling => write!(f, "cooling"),
93        }
94    }
95}
96
97/// Per-author commit aggregation for a single file.
98///
99/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
100/// per-file maps small and the bitcode cache compact.
101#[derive(Debug, Clone, Copy)]
102pub struct AuthorContribution {
103    /// Total commits by this author touching this file in the analysis window.
104    pub commits: u32,
105    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
106    pub weighted_commits: f64,
107    /// Earliest commit timestamp by this author (epoch seconds).
108    pub first_commit_ts: u64,
109    /// Latest commit timestamp by this author (epoch seconds).
110    pub last_commit_ts: u64,
111}
112
113/// Per-file churn data collected from git history.
114#[derive(Debug, Clone)]
115pub struct FileChurn {
116    /// Absolute file path.
117    pub path: PathBuf,
118    /// Total number of commits touching this file in the analysis window.
119    pub commits: u32,
120    /// Recency-weighted commit count (exponential decay, half-life 90 days).
121    pub weighted_commits: f64,
122    /// Total lines added across all commits.
123    pub lines_added: u32,
124    /// Total lines deleted across all commits.
125    pub lines_deleted: u32,
126    /// Churn trend: accelerating, stable, or cooling.
127    pub trend: ChurnTrend,
128    /// Per-author contributions keyed by interned author index.
129    /// Indices reference [`ChurnResult::author_pool`].
130    pub authors: FxHashMap<u32, AuthorContribution>,
131}
132
133/// Result of churn analysis.
134#[derive(Debug)]
135pub struct ChurnResult {
136    /// Per-file churn data, keyed by absolute path.
137    pub files: FxHashMap<PathBuf, FileChurn>,
138    /// Whether the repository is a shallow clone.
139    pub shallow_clone: bool,
140    /// Author email pool. Per-file [`AuthorContribution`] entries reference
141    /// authors by their index into this vector.
142    pub author_pool: Vec<String>,
143}
144
145/// Parse a `--since` value into a git-compatible duration.
146///
147/// Accepts:
148/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
149/// - ISO dates: `2025-06-01`
150///
151/// # Errors
152///
153/// Returns an error if the input is not a recognized duration format or ISO date,
154/// the numeric part is invalid, or the duration is zero.
155pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
156    if is_iso_date(input) {
157        return Ok(SinceDuration {
158            git_after: input.to_string(),
159            display: input.to_string(),
160        });
161    }
162
163    let (num_str, unit) = split_number_unit(input)?;
164    let num: u64 = num_str
165        .parse()
166        .map_err(|_| format!("invalid number in --since: {input}"))?;
167
168    if num == 0 {
169        return Err("--since duration must be greater than 0".to_string());
170    }
171
172    match unit {
173        "d" | "day" | "days" => {
174            let s = if num == 1 { "" } else { "s" };
175            Ok(SinceDuration {
176                git_after: format!("{num} day{s} ago"),
177                display: format!("{num} day{s}"),
178            })
179        }
180        "w" | "week" | "weeks" => {
181            let s = if num == 1 { "" } else { "s" };
182            Ok(SinceDuration {
183                git_after: format!("{num} week{s} ago"),
184                display: format!("{num} week{s}"),
185            })
186        }
187        "m" | "month" | "months" => {
188            let s = if num == 1 { "" } else { "s" };
189            Ok(SinceDuration {
190                git_after: format!("{num} month{s} ago"),
191                display: format!("{num} month{s}"),
192            })
193        }
194        "y" | "year" | "years" => {
195            let s = if num == 1 { "" } else { "s" };
196            Ok(SinceDuration {
197                git_after: format!("{num} year{s} ago"),
198                display: format!("{num} year{s}"),
199            })
200        }
201        _ => Err(format!(
202            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
203        )),
204    }
205}
206
207/// Analyze git churn for files in the given root directory.
208///
209/// Returns `None` if git is not available or the directory is not a git repository.
210pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
211    let shallow = is_shallow_clone(root);
212    let state = analyze_churn_events(root, since, None)?;
213    Some(build_churn_result(state, shallow))
214}
215
216/// A `fallow-churn/v1` import document: a normalized, VCS-agnostic stand-in for
217/// `git log --numstat` output. Unknown fields are ignored (no
218/// `deny_unknown_fields`) so wrappers may carry extra metadata and so the
219/// reserved `commit` field can be added in a future revision without breaking
220/// v1 consumers.
221#[derive(Debug, Deserialize)]
222struct ChurnFileDoc {
223    schema: String,
224    #[serde(default)]
225    events: Vec<ChurnFileEvent>,
226}
227
228/// One per-(commit, file) change event, the natural shape of a `<vcs> log
229/// --numstat` row. `commit` is intentionally NOT a field: extra keys are
230/// already ignored, so a wrapper emitting `commit` is forward-compatible and a
231/// future revision can promote it to a real field without a breaking change.
232#[derive(Debug, Deserialize)]
233struct ChurnFileEvent {
234    /// Repo-root-relative, forward-slash path. Joined to `root`.
235    path: String,
236    /// Commit time, unix SECONDS UTC (not milliseconds).
237    timestamp: u64,
238    /// Opaque author identity (email recommended); absent contributes no
239    /// ownership signal. fallow does NOT apply mailmap to imported authors.
240    #[serde(default)]
241    author: Option<String>,
242    /// Lines added in this file in this commit.
243    added: u32,
244    /// Lines deleted in this file in this commit.
245    deleted: u32,
246}
247
248/// Build churn data from a normalized `fallow-churn/v1` JSON import instead of
249/// `git log`. Lets projects on a non-git VCS (Yandex Arc, Mercurial, Perforce)
250/// feed change history into hotspot / ownership / bus-factor analysis: a small
251/// wrapper translates the VCS log into the contract and fallow runs all the
252/// usual recency-weighting, trend, and ownership logic on the imported events.
253///
254/// `root` is the project root that relative event paths are joined to (matching
255/// how the git path joins numstat paths), so the churn keys line up with the
256/// analyzed files. Returns a human-readable error (the CLI maps it to exit code
257/// 2) on a missing file, malformed JSON, wrong `schema`, an empty event path, a
258/// far-future timestamp, or an event count past `MAX_CHURN_EVENTS`. An empty
259/// `events` array is valid (no hotspots), not an error. Never runs `git`.
260pub fn analyze_churn_from_file(path: &Path, root: &Path) -> Result<ChurnResult, String> {
261    let raw = std::fs::read_to_string(path)
262        .map_err(|e| format!("failed to read churn file {}: {e}", path.display()))?;
263    let doc: ChurnFileDoc = serde_json::from_str(&raw)
264        .map_err(|e| format!("failed to parse churn file {}: {e}", path.display()))?;
265    if doc.schema != CHURN_FILE_SCHEMA {
266        return Err(format!(
267            "churn file {} declares schema \"{}\", expected \"{CHURN_FILE_SCHEMA}\"",
268            path.display(),
269            doc.schema
270        ));
271    }
272    if doc.events.len() > MAX_CHURN_EVENTS {
273        return Err(format!(
274            "churn file {} has {} events, exceeding the {MAX_CHURN_EVENTS} limit",
275            path.display(),
276            doc.events.len()
277        ));
278    }
279
280    let now_secs = std::time::SystemTime::now()
281        .duration_since(std::time::UNIX_EPOCH)
282        .unwrap_or_default()
283        .as_secs();
284    let future_limit = now_secs.saturating_add(MAX_FUTURE_TIMESTAMP_SECS);
285
286    let mut files: FxHashMap<PathBuf, FileEvents> = FxHashMap::default();
287    let mut author_pool: Vec<String> = Vec::new();
288    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
289
290    for event in doc.events {
291        let normalized = event.path.replace('\\', "/");
292        let rel = normalized.trim();
293        if rel.is_empty() {
294            return Err(format!(
295                "churn file {} has an event with an empty path",
296                path.display()
297            ));
298        }
299        if event.timestamp > future_limit {
300            return Err(format!(
301                "churn file {} has event timestamp {} for \"{rel}\" more than a year in the \
302                 future; timestamps must be unix SECONDS (not milliseconds), UTC",
303                path.display(),
304                event.timestamp
305            ));
306        }
307        let abs_path = root.join(rel);
308        let author_idx = event
309            .author
310            .as_deref()
311            .map(str::trim)
312            .filter(|email| !email.is_empty())
313            .map(|email| intern_author(email, &mut author_pool, &mut author_index));
314        files
315            .entry(abs_path)
316            .or_insert_with(|| FileEvents { events: Vec::new() })
317            .events
318            .push(CachedCommitEvent {
319                timestamp: event.timestamp,
320                lines_added: event.added,
321                lines_deleted: event.deleted,
322                author_idx,
323            });
324    }
325
326    Ok(build_churn_result(
327        ChurnEventState { files, author_pool },
328        false,
329    ))
330}
331
332/// Check if the repository is a shallow clone.
333#[must_use]
334pub fn is_shallow_clone(root: &Path) -> bool {
335    let mut command = crate::spawn::git();
336    command
337        .args(["rev-parse", "--is-shallow-repository"])
338        .current_dir(root);
339    command.output().is_ok_and(|o| {
340        String::from_utf8_lossy(&o.stdout)
341            .trim()
342            .eq_ignore_ascii_case("true")
343    })
344}
345
346/// Check if the directory is inside a git repository.
347#[must_use]
348pub fn is_git_repo(root: &Path) -> bool {
349    let mut command = crate::spawn::git();
350    command
351        .args(["rev-parse", "--git-dir"])
352        .current_dir(root)
353        .stdout(std::process::Stdio::null())
354        .stderr(std::process::Stdio::null());
355    command.status().is_ok_and(|s| s.success())
356}
357
358/// Maximum size of a churn cache file (64 MB). The incremental cache stores
359/// per-commit events, so it needs more headroom than the old aggregate rows.
360const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
361
362/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
363/// changes so older payloads are rejected on load. Bumped to 3 when the cache
364/// switched from aggregate rows to per-commit events for incremental updates.
365const CHURN_CACHE_VERSION: u8 = 3;
366
367/// Serializable per-commit event for the disk cache.
368#[derive(Clone, bitcode::Encode, bitcode::Decode)]
369struct CachedCommitEvent {
370    timestamp: u64,
371    lines_added: u32,
372    lines_deleted: u32,
373    author_idx: Option<u32>,
374}
375
376/// Serializable per-file churn entry for the disk cache.
377#[derive(Clone, bitcode::Encode, bitcode::Decode)]
378struct CachedFileChurn {
379    path: String,
380    events: Vec<CachedCommitEvent>,
381}
382
383/// Cached churn data keyed by last indexed SHA and since string.
384#[derive(Clone, bitcode::Encode, bitcode::Decode)]
385struct ChurnCache {
386    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
387    version: u8,
388    last_indexed_sha: String,
389    git_after: String,
390    files: Vec<CachedFileChurn>,
391    shallow_clone: bool,
392    /// Author email pool referenced by [`CachedCommitEvent::author_idx`].
393    author_pool: Vec<String>,
394}
395
396/// Per-file commit events retained in memory while building or updating churn.
397struct FileEvents {
398    events: Vec<CachedCommitEvent>,
399}
400
401/// Event-level churn state. Unlike [`ChurnResult`], this preserves commit
402/// timestamps so a cache can merge new commits and recompute trend/recency.
403struct ChurnEventState {
404    files: FxHashMap<PathBuf, FileEvents>,
405    author_pool: Vec<String>,
406}
407
408/// Get the full HEAD SHA for cache keying.
409fn get_head_sha(root: &Path) -> Option<String> {
410    let mut command = crate::spawn::git();
411    command.args(["rev-parse", "HEAD"]).current_dir(root);
412    command
413        .output()
414        .ok()
415        .filter(|o| o.status.success())
416        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
417}
418
419/// Check whether `ancestor` is still reachable from `descendant`.
420fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
421    let mut command = crate::spawn::git();
422    command
423        .args(["merge-base", "--is-ancestor", ancestor, descendant])
424        .current_dir(root);
425    command.status().is_ok_and(|s| s.success())
426}
427
428/// Try to load churn data from disk cache. Returns `None` on cache miss
429/// or version mismatch.
430fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
431    let cache_file = cache_dir.join("churn.bin");
432    let data = std::fs::read(&cache_file).ok()?;
433    if data.len() > MAX_CHURN_CACHE_SIZE {
434        return None;
435    }
436    let cache: ChurnCache = bitcode::decode(&data).ok()?;
437    if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
438        return None;
439    }
440    Some(cache)
441}
442
443/// Save churn data to disk cache.
444fn save_churn_cache(
445    cache_dir: &Path,
446    last_indexed_sha: &str,
447    git_after: &str,
448    state: &ChurnEventState,
449    shallow_clone: bool,
450) {
451    let files: Vec<CachedFileChurn> = state
452        .files
453        .iter()
454        .map(|f| CachedFileChurn {
455            path: f.0.to_string_lossy().to_string(),
456            events: f.1.events.clone(),
457        })
458        .collect();
459    let cache = ChurnCache {
460        version: CHURN_CACHE_VERSION,
461        last_indexed_sha: last_indexed_sha.to_string(),
462        git_after: git_after.to_string(),
463        files,
464        shallow_clone,
465        author_pool: state.author_pool.clone(),
466    };
467    let _ = std::fs::create_dir_all(cache_dir);
468    let data = bitcode::encode(&cache);
469    let tmp = cache_dir.join("churn.bin.tmp");
470    if std::fs::write(&tmp, data).is_ok() {
471        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
472    }
473}
474
475/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
476/// since duration match. If HEAD advanced from the cached SHA, runs an
477/// incremental `git log <cached>..HEAD --numstat` scan and merges it.
478///
479/// Returns `(ChurnResult, bool)` where the bool indicates whether reusable
480/// cache state was used.
481/// Returns `None` if git analysis fails.
482pub fn analyze_churn_cached(
483    root: &Path,
484    since: &SinceDuration,
485    cache_dir: &Path,
486    no_cache: bool,
487) -> Option<(ChurnResult, bool)> {
488    let head_sha = get_head_sha(root)?;
489
490    if !no_cache && let Some(cache) = load_churn_cache(cache_dir, &since.git_after) {
491        if cache.last_indexed_sha == head_sha {
492            let shallow_clone = cache.shallow_clone;
493            let state = cache.into_event_state();
494            return Some((build_churn_result(state, shallow_clone), true));
495        }
496
497        if is_ancestor(root, &cache.last_indexed_sha, &head_sha) {
498            let shallow_clone = is_shallow_clone(root);
499            let range = format!("{}..HEAD", cache.last_indexed_sha);
500            if let Some(delta) = analyze_churn_events(root, since, Some(&range)) {
501                let mut state = cache.into_event_state();
502                merge_churn_states(&mut state, delta);
503                save_churn_cache(
504                    cache_dir,
505                    &head_sha,
506                    &since.git_after,
507                    &state,
508                    shallow_clone,
509                );
510                return Some((build_churn_result(state, shallow_clone), true));
511            }
512        }
513    }
514
515    let shallow_clone = is_shallow_clone(root);
516    let state = analyze_churn_events(root, since, None)?;
517    if !no_cache {
518        save_churn_cache(
519            cache_dir,
520            &head_sha,
521            &since.git_after,
522            &state,
523            shallow_clone,
524        );
525    }
526
527    let result = build_churn_result(state, shallow_clone);
528    Some((result, false))
529}
530
531impl ChurnCache {
532    fn into_event_state(self) -> ChurnEventState {
533        let files = self
534            .files
535            .into_iter()
536            .map(|entry| {
537                (
538                    PathBuf::from(entry.path),
539                    FileEvents {
540                        events: entry.events,
541                    },
542                )
543            })
544            .collect();
545        ChurnEventState {
546            files,
547            author_pool: self.author_pool,
548        }
549    }
550}
551
552/// Run `git log --numstat` and return event-level churn state.
553fn analyze_churn_events(
554    root: &Path,
555    since: &SinceDuration,
556    revision_range: Option<&str>,
557) -> Option<ChurnEventState> {
558    let mut command = crate::spawn::git();
559    command.arg("log");
560    if let Some(range) = revision_range {
561        command.arg(range);
562    }
563    command
564        .args([
565            "--numstat",
566            "--no-merges",
567            "--no-renames",
568            "--use-mailmap",
569            "--format=format:%at|%ae",
570            &format!("--after={}", since.git_after),
571        ])
572        .current_dir(root);
573
574    let output = match spawn_output(&mut command) {
575        Ok(o) => o,
576        Err(e) => {
577            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
578            return None;
579        }
580    };
581
582    if !output.status.success() {
583        let stderr = String::from_utf8_lossy(&output.stderr);
584        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
585        return None;
586    }
587
588    let stdout = String::from_utf8_lossy(&output.stdout);
589    Some(parse_git_log_events(&stdout, root))
590}
591
592/// Merge new churn events into cached event state.
593fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
594    let mut base_author_index: FxHashMap<String, u32> = base
595        .author_pool
596        .iter()
597        .enumerate()
598        .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
599        .collect();
600
601    let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
602    for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
603        let Ok(old_idx) = u32::try_from(old_idx) else {
604            continue;
605        };
606        let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
607        author_mapping.insert(old_idx, new_idx);
608    }
609
610    for (path, mut file) in delta.files {
611        for event in &mut file.events {
612            event.author_idx = event
613                .author_idx
614                .and_then(|idx| author_mapping.get(&idx).copied());
615        }
616        base.files
617            .entry(path)
618            .and_modify(|existing| existing.events.append(&mut file.events))
619            .or_insert(file);
620    }
621}
622
623/// Parse `git log --numstat --format=format:%at|%ae` output into events.
624fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
625    let now_secs = std::time::SystemTime::now()
626        .duration_since(std::time::UNIX_EPOCH)
627        .unwrap_or_default()
628        .as_secs();
629
630    let mut files: FxHashMap<PathBuf, FileEvents> = FxHashMap::default();
631    let mut author_pool: Vec<String> = Vec::new();
632    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
633    let mut current_timestamp: Option<u64> = None;
634    let mut current_author_idx: Option<u32> = None;
635
636    for line in stdout.lines() {
637        let line = line.trim();
638        if line.is_empty() {
639            continue;
640        }
641
642        if let Some((ts_str, email)) = line.split_once('|')
643            && let Ok(ts) = ts_str.parse::<u64>()
644        {
645            current_timestamp = Some(ts);
646            current_author_idx = Some(intern_author(email, &mut author_pool, &mut author_index));
647            continue;
648        }
649
650        if let Ok(ts) = line.parse::<u64>() {
651            current_timestamp = Some(ts);
652            current_author_idx = None;
653            continue;
654        }
655
656        if let Some((added, deleted, path)) = parse_numstat_line(line) {
657            let abs_path = root.join(path);
658            let ts = current_timestamp.unwrap_or(now_secs);
659            files
660                .entry(abs_path)
661                .or_insert_with(|| FileEvents { events: Vec::new() })
662                .events
663                .push(CachedCommitEvent {
664                    timestamp: ts,
665                    lines_added: added,
666                    lines_deleted: deleted,
667                    author_idx: current_author_idx,
668                });
669        }
670    }
671
672    ChurnEventState { files, author_pool }
673}
674
675/// Convert event-level churn state into the public aggregate result.
676#[expect(
677    clippy::cast_possible_truncation,
678    reason = "commit count per file is bounded by git history depth"
679)]
680fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
681    let now_secs = std::time::SystemTime::now()
682        .duration_since(std::time::UNIX_EPOCH)
683        .unwrap_or_default()
684        .as_secs();
685
686    let files = state
687        .files
688        .into_iter()
689        .map(|(path, file)| {
690            let mut timestamps = Vec::with_capacity(file.events.len());
691            let mut weighted_commits = 0.0;
692            let mut lines_added = 0;
693            let mut lines_deleted = 0;
694            let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
695
696            for event in file.events {
697                timestamps.push(event.timestamp);
698                let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
699                let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
700                weighted_commits += weight;
701                lines_added += event.lines_added;
702                lines_deleted += event.lines_deleted;
703
704                if let Some(idx) = event.author_idx {
705                    authors
706                        .entry(idx)
707                        .and_modify(|c| {
708                            c.commits += 1;
709                            c.weighted_commits += weight;
710                            c.first_commit_ts = c.first_commit_ts.min(event.timestamp);
711                            c.last_commit_ts = c.last_commit_ts.max(event.timestamp);
712                        })
713                        .or_insert(AuthorContribution {
714                            commits: 1,
715                            weighted_commits: weight,
716                            first_commit_ts: event.timestamp,
717                            last_commit_ts: event.timestamp,
718                        });
719                }
720            }
721
722            let commits = timestamps.len() as u32;
723            let trend = compute_trend(&timestamps);
724            for c in authors.values_mut() {
725                c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
726            }
727            let churn = FileChurn {
728                path: path.clone(),
729                commits,
730                weighted_commits: (weighted_commits * 100.0).round() / 100.0,
731                lines_added,
732                lines_deleted,
733                trend,
734                authors,
735            };
736            (path, churn)
737        })
738        .collect();
739
740    ChurnResult {
741        files,
742        shallow_clone,
743        author_pool: state.author_pool,
744    }
745}
746
747/// Parse `git log --numstat --format=format:%at|%ae` output.
748///
749/// Returns a per-file churn map plus the author email pool referenced by
750/// interned indices in [`FileChurn::authors`].
751#[cfg(test)]
752fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
753    let result = build_churn_result(parse_git_log_events(stdout, root), false);
754    (result.files, result.author_pool)
755}
756
757/// Intern an author email into the pool, returning its stable index.
758fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
759    if let Some(&idx) = index.get(email) {
760        return idx;
761    }
762    #[expect(
763        clippy::cast_possible_truncation,
764        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
765    )]
766    let idx = pool.len() as u32;
767    let owned = email.to_string();
768    index.insert(owned.clone(), idx);
769    pool.push(owned);
770    idx
771}
772
773/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
774/// Binary files show as `"-\t-\tpath"` — skip those.
775fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
776    let mut parts = line.splitn(3, '\t');
777    let added_str = parts.next()?;
778    let deleted_str = parts.next()?;
779    let path = parts.next()?;
780
781    let added: u32 = added_str.parse().ok()?;
782    let deleted: u32 = deleted_str.parse().ok()?;
783
784    Some((added, deleted, path))
785}
786
787/// Compute churn trend by splitting commits into two temporal halves.
788///
789/// Finds the midpoint between the oldest and newest commit timestamps,
790/// then compares commit counts in each half:
791/// - Recent > 1.5× older → Accelerating
792/// - Recent < 0.67× older → Cooling
793/// - Otherwise → Stable
794fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
795    if timestamps.len() < 2 {
796        return ChurnTrend::Stable;
797    }
798
799    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
800    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
801
802    if max_ts == min_ts {
803        return ChurnTrend::Stable;
804    }
805
806    let midpoint = min_ts + (max_ts - min_ts) / 2;
807    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
808    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
809
810    if older < 1.0 {
811        return ChurnTrend::Stable;
812    }
813
814    let ratio = recent / older;
815    if ratio > 1.5 {
816        ChurnTrend::Accelerating
817    } else if ratio < 0.67 {
818        ChurnTrend::Cooling
819    } else {
820        ChurnTrend::Stable
821    }
822}
823
824fn is_iso_date(input: &str) -> bool {
825    input.len() == 10
826        && input.as_bytes().get(4) == Some(&b'-')
827        && input.as_bytes().get(7) == Some(&b'-')
828        && input[..4].bytes().all(|b| b.is_ascii_digit())
829        && input[5..7].bytes().all(|b| b.is_ascii_digit())
830        && input[8..10].bytes().all(|b| b.is_ascii_digit())
831}
832
833fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
834    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
835        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
836    })?;
837    if pos == 0 {
838        return Err(format!(
839            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
840        ));
841    }
842    Ok((&input[..pos], &input[pos..]))
843}
844
845#[cfg(test)]
846mod tests {
847    use super::*;
848
849    #[test]
850    fn parse_since_months_short() {
851        let d = parse_since("6m").unwrap();
852        assert_eq!(d.git_after, "6 months ago");
853        assert_eq!(d.display, "6 months");
854    }
855
856    #[test]
857    fn parse_since_months_long() {
858        let d = parse_since("6months").unwrap();
859        assert_eq!(d.git_after, "6 months ago");
860        assert_eq!(d.display, "6 months");
861    }
862
863    #[test]
864    fn parse_since_days() {
865        let d = parse_since("90d").unwrap();
866        assert_eq!(d.git_after, "90 days ago");
867        assert_eq!(d.display, "90 days");
868    }
869
870    #[test]
871    fn parse_since_year_singular() {
872        let d = parse_since("1y").unwrap();
873        assert_eq!(d.git_after, "1 year ago");
874        assert_eq!(d.display, "1 year");
875    }
876
877    #[test]
878    fn parse_since_years_plural() {
879        let d = parse_since("2years").unwrap();
880        assert_eq!(d.git_after, "2 years ago");
881        assert_eq!(d.display, "2 years");
882    }
883
884    #[test]
885    fn parse_since_weeks() {
886        let d = parse_since("2w").unwrap();
887        assert_eq!(d.git_after, "2 weeks ago");
888        assert_eq!(d.display, "2 weeks");
889    }
890
891    #[test]
892    fn parse_since_iso_date() {
893        let d = parse_since("2025-06-01").unwrap();
894        assert_eq!(d.git_after, "2025-06-01");
895        assert_eq!(d.display, "2025-06-01");
896    }
897
898    #[test]
899    fn parse_since_month_singular() {
900        let d = parse_since("1month").unwrap();
901        assert_eq!(d.display, "1 month");
902    }
903
904    #[test]
905    fn parse_since_day_singular() {
906        let d = parse_since("1day").unwrap();
907        assert_eq!(d.display, "1 day");
908    }
909
910    #[test]
911    fn parse_since_zero_rejected() {
912        assert!(parse_since("0m").is_err());
913    }
914
915    #[test]
916    fn parse_since_no_unit_rejected() {
917        assert!(parse_since("90").is_err());
918    }
919
920    #[test]
921    fn parse_since_unknown_unit_rejected() {
922        assert!(parse_since("6x").is_err());
923    }
924
925    #[test]
926    fn parse_since_no_number_rejected() {
927        assert!(parse_since("months").is_err());
928    }
929
930    #[test]
931    fn numstat_normal() {
932        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
933        assert_eq!(a, 10);
934        assert_eq!(d, 5);
935        assert_eq!(p, "src/file.ts");
936    }
937
938    #[test]
939    fn numstat_binary_skipped() {
940        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
941    }
942
943    #[test]
944    fn numstat_zero_lines() {
945        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
946        assert_eq!(a, 0);
947        assert_eq!(d, 0);
948        assert_eq!(p, "src/empty.ts");
949    }
950
951    #[test]
952    fn trend_empty_is_stable() {
953        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
954    }
955
956    #[test]
957    fn trend_single_commit_is_stable() {
958        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
959    }
960
961    #[test]
962    fn trend_accelerating() {
963        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
964        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
965    }
966
967    #[test]
968    fn trend_cooling() {
969        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
970        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
971    }
972
973    #[test]
974    fn trend_stable_even_distribution() {
975        let timestamps = vec![100, 200, 300, 700, 800, 900];
976        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
977    }
978
979    #[test]
980    fn trend_same_timestamp_is_stable() {
981        let timestamps = vec![500, 500, 500];
982        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
983    }
984
985    #[test]
986    fn iso_date_valid() {
987        assert!(is_iso_date("2025-06-01"));
988        assert!(is_iso_date("2025-12-31"));
989    }
990
991    #[test]
992    fn iso_date_with_time_rejected() {
993        assert!(!is_iso_date("2025-06-01T00:00:00"));
994    }
995
996    #[test]
997    fn iso_date_invalid() {
998        assert!(!is_iso_date("6months"));
999        assert!(!is_iso_date("2025"));
1000        assert!(!is_iso_date("not-a-date"));
1001        assert!(!is_iso_date("abcd-ef-gh"));
1002    }
1003
1004    #[test]
1005    fn trend_display() {
1006        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
1007        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
1008        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
1009    }
1010
1011    #[test]
1012    fn parse_git_log_single_commit() {
1013        let root = Path::new("/project");
1014        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1015        let (result, _) = parse_git_log(output, root);
1016        assert_eq!(result.len(), 1);
1017        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1018        assert_eq!(churn.commits, 1);
1019        assert_eq!(churn.lines_added, 10);
1020        assert_eq!(churn.lines_deleted, 5);
1021    }
1022
1023    #[test]
1024    fn parse_git_log_multiple_commits_same_file() {
1025        let root = Path::new("/project");
1026        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
1027        let (result, _) = parse_git_log(output, root);
1028        assert_eq!(result.len(), 1);
1029        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1030        assert_eq!(churn.commits, 2);
1031        assert_eq!(churn.lines_added, 13);
1032        assert_eq!(churn.lines_deleted, 7);
1033    }
1034
1035    #[test]
1036    fn parse_git_log_multiple_files() {
1037        let root = Path::new("/project");
1038        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
1039        let (result, _) = parse_git_log(output, root);
1040        assert_eq!(result.len(), 2);
1041        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
1042        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
1043    }
1044
1045    #[test]
1046    fn parse_git_log_empty_output() {
1047        let root = Path::new("/project");
1048        let (result, _) = parse_git_log("", root);
1049        assert!(result.is_empty());
1050    }
1051
1052    #[test]
1053    fn parse_git_log_skips_binary_files() {
1054        let root = Path::new("/project");
1055        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
1056        let (result, _) = parse_git_log(output, root);
1057        assert_eq!(result.len(), 1);
1058        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
1059    }
1060
1061    #[test]
1062    fn parse_git_log_weighted_commits_are_positive() {
1063        let root = Path::new("/project");
1064        let now_secs = std::time::SystemTime::now()
1065            .duration_since(std::time::UNIX_EPOCH)
1066            .unwrap()
1067            .as_secs();
1068        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
1069        let (result, _) = parse_git_log(&output, root);
1070        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1071        assert!(
1072            churn.weighted_commits > 0.0,
1073            "weighted_commits should be positive for recent commits"
1074        );
1075    }
1076
1077    #[test]
1078    fn trend_boundary_1_5x_ratio() {
1079        let timestamps = vec![100, 200, 600, 800, 1000];
1080        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1081    }
1082
1083    #[test]
1084    fn trend_just_above_1_5x() {
1085        let timestamps = vec![100, 600, 800, 1000];
1086        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
1087    }
1088
1089    #[test]
1090    fn trend_boundary_0_67x_ratio() {
1091        let timestamps = vec![100, 200, 300, 600, 1000];
1092        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
1093    }
1094
1095    #[test]
1096    fn trend_two_timestamps_different() {
1097        let timestamps = vec![100, 200];
1098        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1099    }
1100
1101    #[test]
1102    fn parse_since_week_singular() {
1103        let d = parse_since("1week").unwrap();
1104        assert_eq!(d.git_after, "1 week ago");
1105        assert_eq!(d.display, "1 week");
1106    }
1107
1108    #[test]
1109    fn parse_since_weeks_long() {
1110        let d = parse_since("3weeks").unwrap();
1111        assert_eq!(d.git_after, "3 weeks ago");
1112        assert_eq!(d.display, "3 weeks");
1113    }
1114
1115    #[test]
1116    fn parse_since_days_long() {
1117        let d = parse_since("30days").unwrap();
1118        assert_eq!(d.git_after, "30 days ago");
1119        assert_eq!(d.display, "30 days");
1120    }
1121
1122    #[test]
1123    fn parse_since_year_long() {
1124        let d = parse_since("1year").unwrap();
1125        assert_eq!(d.git_after, "1 year ago");
1126        assert_eq!(d.display, "1 year");
1127    }
1128
1129    #[test]
1130    fn parse_since_overflow_number_rejected() {
1131        let result = parse_since("99999999999999999999d");
1132        assert!(result.is_err());
1133        let err = result.unwrap_err();
1134        assert!(err.contains("invalid number"));
1135    }
1136
1137    #[test]
1138    fn parse_since_zero_days_rejected() {
1139        assert!(parse_since("0d").is_err());
1140    }
1141
1142    #[test]
1143    fn parse_since_zero_weeks_rejected() {
1144        assert!(parse_since("0w").is_err());
1145    }
1146
1147    #[test]
1148    fn parse_since_zero_years_rejected() {
1149        assert!(parse_since("0y").is_err());
1150    }
1151
1152    #[test]
1153    fn numstat_missing_path() {
1154        assert!(parse_numstat_line("10\t5").is_none());
1155    }
1156
1157    #[test]
1158    fn numstat_single_field() {
1159        assert!(parse_numstat_line("10").is_none());
1160    }
1161
1162    #[test]
1163    fn numstat_empty_string() {
1164        assert!(parse_numstat_line("").is_none());
1165    }
1166
1167    #[test]
1168    fn numstat_only_added_is_binary() {
1169        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1170    }
1171
1172    #[test]
1173    fn numstat_only_deleted_is_binary() {
1174        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1175    }
1176
1177    #[test]
1178    fn numstat_path_with_spaces() {
1179        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1180        assert_eq!(a, 3);
1181        assert_eq!(d, 1);
1182        assert_eq!(p, "path with spaces/file.ts");
1183    }
1184
1185    #[test]
1186    fn numstat_large_numbers() {
1187        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1188        assert_eq!(a, 9999);
1189        assert_eq!(d, 8888);
1190        assert_eq!(p, "src/big.ts");
1191    }
1192
1193    #[test]
1194    fn iso_date_wrong_separator_positions() {
1195        assert!(!is_iso_date("20-25-0601"));
1196        assert!(!is_iso_date("202506-01-"));
1197    }
1198
1199    #[test]
1200    fn iso_date_too_short() {
1201        assert!(!is_iso_date("2025-06-0"));
1202    }
1203
1204    #[test]
1205    fn iso_date_letters_in_day() {
1206        assert!(!is_iso_date("2025-06-ab"));
1207    }
1208
1209    #[test]
1210    fn iso_date_letters_in_month() {
1211        assert!(!is_iso_date("2025-ab-01"));
1212    }
1213
1214    #[test]
1215    fn split_number_unit_valid() {
1216        let (num, unit) = split_number_unit("42days").unwrap();
1217        assert_eq!(num, "42");
1218        assert_eq!(unit, "days");
1219    }
1220
1221    #[test]
1222    fn split_number_unit_single_digit() {
1223        let (num, unit) = split_number_unit("1m").unwrap();
1224        assert_eq!(num, "1");
1225        assert_eq!(unit, "m");
1226    }
1227
1228    #[test]
1229    fn split_number_unit_no_digits() {
1230        let err = split_number_unit("abc").unwrap_err();
1231        assert!(err.contains("must start with a number"));
1232    }
1233
1234    #[test]
1235    fn split_number_unit_no_unit() {
1236        let err = split_number_unit("123").unwrap_err();
1237        assert!(err.contains("requires a unit suffix"));
1238    }
1239
1240    #[test]
1241    fn parse_git_log_numstat_before_timestamp_uses_now() {
1242        let root = Path::new("/project");
1243        let output = "10\t5\tsrc/no_ts.ts\n";
1244        let (result, _) = parse_git_log(output, root);
1245        assert_eq!(result.len(), 1);
1246        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1247        assert_eq!(churn.commits, 1);
1248        assert_eq!(churn.lines_added, 10);
1249        assert_eq!(churn.lines_deleted, 5);
1250        assert!(
1251            churn.weighted_commits > 0.9,
1252            "weight should be near 1.0 when timestamp defaults to now"
1253        );
1254    }
1255
1256    #[test]
1257    fn parse_git_log_whitespace_lines_ignored() {
1258        let root = Path::new("/project");
1259        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1260        let (result, _) = parse_git_log(output, root);
1261        assert_eq!(result.len(), 1);
1262    }
1263
1264    #[test]
1265    fn parse_git_log_trend_is_computed_per_file() {
1266        let root = Path::new("/project");
1267        let output = "\
12681000\n5\t1\tsrc/old.ts\n\
12692000\n3\t1\tsrc/old.ts\n\
12701000\n1\t0\tsrc/hot.ts\n\
12711800\n1\t0\tsrc/hot.ts\n\
12721900\n1\t0\tsrc/hot.ts\n\
12731950\n1\t0\tsrc/hot.ts\n\
12742000\n1\t0\tsrc/hot.ts\n";
1275        let (result, _) = parse_git_log(output, root);
1276        let old = &result[&PathBuf::from("/project/src/old.ts")];
1277        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1278        assert_eq!(old.commits, 2);
1279        assert_eq!(hot.commits, 5);
1280        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1281    }
1282
1283    #[test]
1284    fn parse_git_log_weighted_decay_for_old_commits() {
1285        let root = Path::new("/project");
1286        let now = std::time::SystemTime::now()
1287            .duration_since(std::time::UNIX_EPOCH)
1288            .unwrap()
1289            .as_secs();
1290        let old_ts = now - (180 * 86_400);
1291        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1292        let (result, _) = parse_git_log(&output, root);
1293        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1294        assert!(
1295            churn.weighted_commits < 0.5,
1296            "180-day-old commit should weigh ~0.25, got {}",
1297            churn.weighted_commits
1298        );
1299        assert!(
1300            churn.weighted_commits > 0.1,
1301            "180-day-old commit should weigh ~0.25, got {}",
1302            churn.weighted_commits
1303        );
1304    }
1305
1306    #[test]
1307    fn parse_git_log_path_stored_as_absolute() {
1308        let root = Path::new("/my/project");
1309        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1310        let (result, _) = parse_git_log(output, root);
1311        let key = PathBuf::from("/my/project/lib/utils.ts");
1312        assert!(result.contains_key(&key));
1313        assert_eq!(result[&key].path, key);
1314    }
1315
1316    #[test]
1317    fn parse_git_log_weighted_commits_rounded() {
1318        let root = Path::new("/project");
1319        let now = std::time::SystemTime::now()
1320            .duration_since(std::time::UNIX_EPOCH)
1321            .unwrap()
1322            .as_secs();
1323        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1324        let (result, _) = parse_git_log(&output, root);
1325        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1326        let decimals = format!("{:.2}", churn.weighted_commits);
1327        assert_eq!(
1328            churn.weighted_commits.to_string().len(),
1329            decimals.len().min(churn.weighted_commits.to_string().len()),
1330            "weighted_commits should be rounded to at most 2 decimal places"
1331        );
1332    }
1333
1334    #[test]
1335    fn trend_serde_serialization() {
1336        assert_eq!(
1337            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1338            "\"accelerating\""
1339        );
1340        assert_eq!(
1341            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1342            "\"stable\""
1343        );
1344        assert_eq!(
1345            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1346            "\"cooling\""
1347        );
1348    }
1349
1350    #[test]
1351    fn parse_git_log_extracts_author_email() {
1352        let root = Path::new("/project");
1353        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1354        let (result, pool) = parse_git_log(output, root);
1355        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1356        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1357        assert_eq!(churn.authors.len(), 1);
1358        let alice = &churn.authors[&0];
1359        assert_eq!(alice.commits, 1);
1360        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1361        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1362    }
1363
1364    #[test]
1365    fn parse_git_log_intern_dedupes_authors() {
1366        let root = Path::new("/project");
1367        let output = "\
13681700000000|alice@example.com
13691\t0\ta.ts
13701700100000|bob@example.com
13712\t1\tb.ts
13721700200000|alice@example.com
13733\t2\tc.ts
1374";
1375        let (_result, pool) = parse_git_log(output, root);
1376        assert_eq!(pool.len(), 2);
1377        assert!(pool.contains(&"alice@example.com".to_string()));
1378        assert!(pool.contains(&"bob@example.com".to_string()));
1379    }
1380
1381    #[test]
1382    fn parse_git_log_aggregates_per_author() {
1383        let root = Path::new("/project");
1384        let output = "\
13851700000000|alice@example.com
13861\t0\tsrc/index.ts
13871700100000|bob@example.com
13882\t0\tsrc/index.ts
13891700200000|alice@example.com
13901\t1\tsrc/index.ts
1391";
1392        let (result, pool) = parse_git_log(output, root);
1393        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1394        assert_eq!(churn.commits, 3);
1395        assert_eq!(churn.authors.len(), 2);
1396
1397        let alice_idx =
1398            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1399        let alice = &churn.authors[&alice_idx];
1400        assert_eq!(alice.commits, 2);
1401        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1402        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1403    }
1404
1405    #[test]
1406    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1407        let root = Path::new("/project");
1408        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1409        let (result, pool) = parse_git_log(output, root);
1410        assert!(pool.is_empty());
1411        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1412        assert_eq!(churn.commits, 1);
1413        assert!(churn.authors.is_empty());
1414    }
1415
1416    #[test]
1417    fn intern_author_returns_existing_index() {
1418        let mut pool = Vec::new();
1419        let mut index = FxHashMap::default();
1420        let i1 = intern_author("alice@x", &mut pool, &mut index);
1421        let i2 = intern_author("alice@x", &mut pool, &mut index);
1422        assert_eq!(i1, i2);
1423        assert_eq!(pool.len(), 1);
1424    }
1425
1426    #[test]
1427    fn intern_author_assigns_sequential_indices() {
1428        let mut pool = Vec::new();
1429        let mut index = FxHashMap::default();
1430        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1431        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1432        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1433        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1434    }
1435
1436    fn git(root: &Path, args: &[&str]) {
1437        let status = std::process::Command::new("git")
1438            .args(args)
1439            .current_dir(root)
1440            .status()
1441            .expect("run git");
1442        assert!(status.success(), "git {args:?} failed");
1443    }
1444
1445    fn write(root: &Path, path: &str, contents: &str) {
1446        let path = root.join(path);
1447        std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1448        std::fs::write(path, contents).unwrap();
1449    }
1450
1451    #[test]
1452    fn cached_churn_merges_new_commits_after_head_advances() {
1453        let repo = tempfile::tempdir().expect("create repo");
1454        let root = repo.path();
1455        git(root, &["init"]);
1456        git(root, &["config", "user.email", "churn@example.test"]);
1457        git(root, &["config", "user.name", "Churn Test"]);
1458        git(root, &["config", "commit.gpgsign", "false"]);
1459
1460        write(root, "src/a.ts", "export const a = 1;\n");
1461        git(root, &["add", "."]);
1462        git(root, &["commit", "-m", "initial"]);
1463
1464        let since = parse_since("1y").unwrap();
1465        let cache = tempfile::tempdir().expect("create cache dir");
1466        let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1467        assert!(!cold_hit);
1468        let file = root.join("src/a.ts");
1469        assert_eq!(cold.files[&file].commits, 1);
1470
1471        let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1472        assert!(warm_hit);
1473
1474        write(
1475            root,
1476            "src/a.ts",
1477            "export const a = 1;\nexport const b = 2;\n",
1478        );
1479        git(root, &["add", "."]);
1480        git(root, &["commit", "-m", "update a"]);
1481        let head = get_head_sha(root).unwrap();
1482
1483        let (incremental, incremental_hit) =
1484            analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1485        assert!(incremental_hit);
1486        assert_eq!(incremental.files[&file].commits, 2);
1487
1488        let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1489        assert_eq!(cache.last_indexed_sha, head);
1490    }
1491
1492    fn write_churn_file(dir: &std::path::Path, contents: &str) -> PathBuf {
1493        let path = dir.join("churn.json");
1494        std::fs::write(&path, contents).unwrap();
1495        path
1496    }
1497
1498    #[test]
1499    fn churn_file_happy_path() {
1500        let dir = tempfile::tempdir().unwrap();
1501        let root = Path::new("/project");
1502        let path = write_churn_file(
1503            dir.path(),
1504            r#"{
1505              "schema": "fallow-churn/v1",
1506              "events": [
1507                { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1508                { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1509              ]
1510            }"#,
1511        );
1512        let result = analyze_churn_from_file(&path, root).unwrap();
1513        let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1514        assert_eq!(churn.commits, 2);
1515        assert_eq!(churn.lines_added, 13);
1516        assert_eq!(churn.lines_deleted, 7);
1517        assert_eq!(churn.authors.len(), 2);
1518        assert!(result.author_pool.contains(&"alice@corp".to_string()));
1519        assert!(result.author_pool.contains(&"bob@corp".to_string()));
1520        assert!(!result.shallow_clone);
1521    }
1522
1523    #[test]
1524    fn churn_file_matches_git_parse() {
1525        // The same events fed via git numstat and via the JSON import must
1526        // produce identical aggregate churn: the import reuses
1527        // build_churn_result, so only the SOURCE differs.
1528        let dir = tempfile::tempdir().unwrap();
1529        let root = Path::new("/project");
1530        let git_output = "1700000000|alice@corp\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n\n1700100000|bob@corp\n3\t2\tsrc/a.ts\n";
1531        let (git_files, git_pool) = parse_git_log(git_output, root);
1532
1533        let path = write_churn_file(
1534            dir.path(),
1535            r#"{
1536              "schema": "fallow-churn/v1",
1537              "events": [
1538                { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1539                { "path": "src/b.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 3, "deleted": 1 },
1540                { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1541              ]
1542            }"#,
1543        );
1544        let imported = analyze_churn_from_file(&path, root).unwrap();
1545
1546        assert_eq!(git_pool, imported.author_pool, "author pools diverge");
1547        assert_eq!(git_files.len(), imported.files.len());
1548        for (file, git_churn) in &git_files {
1549            let imp = &imported.files[file];
1550            assert_eq!(git_churn.commits, imp.commits, "commits for {file:?}");
1551            assert_eq!(git_churn.lines_added, imp.lines_added, "added for {file:?}");
1552            assert_eq!(
1553                git_churn.lines_deleted, imp.lines_deleted,
1554                "deleted for {file:?}"
1555            );
1556            assert_eq!(git_churn.trend, imp.trend, "trend for {file:?}");
1557            assert_eq!(
1558                git_churn.authors.len(),
1559                imp.authors.len(),
1560                "authors for {file:?}"
1561            );
1562            assert!(
1563                (git_churn.weighted_commits - imp.weighted_commits).abs() < 0.02,
1564                "weighted_commits for {file:?}: {} vs {}",
1565                git_churn.weighted_commits,
1566                imp.weighted_commits
1567            );
1568        }
1569    }
1570
1571    #[test]
1572    fn churn_file_empty_events_is_valid() {
1573        let dir = tempfile::tempdir().unwrap();
1574        let path = write_churn_file(
1575            dir.path(),
1576            r#"{ "schema": "fallow-churn/v1", "events": [] }"#,
1577        );
1578        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1579        assert!(result.files.is_empty());
1580        assert!(result.author_pool.is_empty());
1581    }
1582
1583    #[test]
1584    fn churn_file_missing_events_key_is_valid() {
1585        let dir = tempfile::tempdir().unwrap();
1586        let path = write_churn_file(dir.path(), r#"{ "schema": "fallow-churn/v1" }"#);
1587        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1588        assert!(result.files.is_empty());
1589    }
1590
1591    #[test]
1592    fn churn_file_bad_schema_rejected() {
1593        let dir = tempfile::tempdir().unwrap();
1594        let path = write_churn_file(
1595            dir.path(),
1596            r#"{ "schema": "fallow-churn/v2", "events": [] }"#,
1597        );
1598        let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1599        assert!(err.contains("expected \"fallow-churn/v1\""), "{err}");
1600    }
1601
1602    #[test]
1603    fn churn_file_malformed_json_rejected() {
1604        let dir = tempfile::tempdir().unwrap();
1605        let path = write_churn_file(dir.path(), "{ not json");
1606        assert!(analyze_churn_from_file(&path, Path::new("/project")).is_err());
1607    }
1608
1609    #[test]
1610    fn churn_file_missing_file_rejected() {
1611        let err = analyze_churn_from_file(Path::new("/no/such/churn.json"), Path::new("/project"))
1612            .unwrap_err();
1613        assert!(err.contains("failed to read churn file"), "{err}");
1614    }
1615
1616    #[test]
1617    fn churn_file_empty_path_rejected() {
1618        let dir = tempfile::tempdir().unwrap();
1619        let path = write_churn_file(
1620            dir.path(),
1621            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "  ", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1622        );
1623        let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1624        assert!(err.contains("empty path"), "{err}");
1625    }
1626
1627    #[test]
1628    fn churn_file_millisecond_timestamp_rejected() {
1629        let dir = tempfile::tempdir().unwrap();
1630        // 1700000000000 is milliseconds; ~52000 years in the future as seconds.
1631        let path = write_churn_file(
1632            dir.path(),
1633            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000000, "added": 1, "deleted": 0 } ] }"#,
1634        );
1635        let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1636        assert!(err.contains("milliseconds"), "{err}");
1637    }
1638
1639    #[test]
1640    fn churn_file_missing_author_contributes_no_signal() {
1641        let dir = tempfile::tempdir().unwrap();
1642        let path = write_churn_file(
1643            dir.path(),
1644            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1645        );
1646        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1647        let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1648        assert_eq!(churn.commits, 1);
1649        assert!(churn.authors.is_empty());
1650        assert!(result.author_pool.is_empty());
1651    }
1652
1653    #[test]
1654    fn churn_file_empty_author_string_treated_as_absent() {
1655        let dir = tempfile::tempdir().unwrap();
1656        let path = write_churn_file(
1657            dir.path(),
1658            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": "  ", "added": 1, "deleted": 0 } ] }"#,
1659        );
1660        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1661        assert!(result.author_pool.is_empty());
1662    }
1663
1664    #[test]
1665    fn churn_file_unknown_fields_ignored() {
1666        // Extra keys (including the reserved `commit`) are accepted and ignored,
1667        // so a wrapper carrying extra metadata stays forward-compatible.
1668        let dir = tempfile::tempdir().unwrap();
1669        let path = write_churn_file(
1670            dir.path(),
1671            r#"{ "schema": "fallow-churn/v1", "extra": true, "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 1, "deleted": 0, "commit": "abc123", "tz": "+0200" } ] }"#,
1672        );
1673        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1674        assert_eq!(result.files[&PathBuf::from("/project/src/a.ts")].commits, 1);
1675    }
1676
1677    #[test]
1678    fn churn_file_backslash_paths_normalized() {
1679        let dir = tempfile::tempdir().unwrap();
1680        let path = write_churn_file(
1681            dir.path(),
1682            r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src\\a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1683        );
1684        let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1685        assert!(
1686            result
1687                .files
1688                .contains_key(&PathBuf::from("/project/src/a.ts"))
1689        );
1690    }
1691}