Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::{Command, Output};
9use std::sync::OnceLock;
10
11use serde::Serialize;
12
13/// Function pointer signature used by `set_spawn_hook` to intercept the
14/// `git log --numstat` subprocess. Lets the CLI route long-running git
15/// log calls through its `ScopedChild` registry so SIGINT / SIGTERM
16/// reap the subprocess instead of leaving it running after the parent
17/// exits. See `crates/cli/src/signal/` and issue #477.
18pub type ChurnSpawnHook = fn(&mut Command) -> std::io::Result<Output>;
19
20static SPAWN_HOOK: OnceLock<ChurnSpawnHook> = OnceLock::new();
21
22/// Install a spawn-hook that wraps the `git log` subprocess. Idempotent;
23/// subsequent calls are no-ops. Called once from the CLI's `main()` to
24/// route through the signal registry; defaults to `Command::output`
25/// when not set so the function-pointer indirection stays free for tests
26/// and embedders that don't care.
27pub fn set_spawn_hook(hook: ChurnSpawnHook) {
28    let _ = SPAWN_HOOK.set(hook);
29}
30
31fn spawn_output(command: &mut Command) -> std::io::Result<Output> {
32    if let Some(hook) = SPAWN_HOOK.get() {
33        hook(command)
34    } else {
35        command.output()
36    }
37}
38
39/// Number of seconds in one day.
40const SECS_PER_DAY: f64 = 86_400.0;
41
42/// Recency weight half-life in days. A commit from 90 days ago counts half
43/// as much as today's commit; 180 days ago counts 25%.
44const HALF_LIFE_DAYS: f64 = 90.0;
45
46/// Parsed duration for the `--since` flag.
47#[derive(Debug, Clone)]
48pub struct SinceDuration {
49    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
50    pub git_after: String,
51    /// Human-readable display string (e.g., `"6 months"`).
52    pub display: String,
53}
54
55/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
57#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
58#[serde(rename_all = "snake_case")]
59pub enum ChurnTrend {
60    /// Recent half has >1.5× the commits of the older half.
61    Accelerating,
62    /// Churn is roughly stable between halves.
63    Stable,
64    /// Recent half has <0.67× the commits of the older half.
65    Cooling,
66}
67
68impl std::fmt::Display for ChurnTrend {
69    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70        match self {
71            Self::Accelerating => write!(f, "accelerating"),
72            Self::Stable => write!(f, "stable"),
73            Self::Cooling => write!(f, "cooling"),
74        }
75    }
76}
77
78/// Per-author commit aggregation for a single file.
79///
80/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
81/// per-file maps small and the bitcode cache compact.
82#[derive(Debug, Clone, Copy)]
83pub struct AuthorContribution {
84    /// Total commits by this author touching this file in the analysis window.
85    pub commits: u32,
86    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
87    pub weighted_commits: f64,
88    /// Earliest commit timestamp by this author (epoch seconds).
89    pub first_commit_ts: u64,
90    /// Latest commit timestamp by this author (epoch seconds).
91    pub last_commit_ts: u64,
92}
93
94/// Per-file churn data collected from git history.
95#[derive(Debug, Clone)]
96pub struct FileChurn {
97    /// Absolute file path.
98    pub path: PathBuf,
99    /// Total number of commits touching this file in the analysis window.
100    pub commits: u32,
101    /// Recency-weighted commit count (exponential decay, half-life 90 days).
102    pub weighted_commits: f64,
103    /// Total lines added across all commits.
104    pub lines_added: u32,
105    /// Total lines deleted across all commits.
106    pub lines_deleted: u32,
107    /// Churn trend: accelerating, stable, or cooling.
108    pub trend: ChurnTrend,
109    /// Per-author contributions keyed by interned author index.
110    /// Indices reference [`ChurnResult::author_pool`].
111    pub authors: FxHashMap<u32, AuthorContribution>,
112}
113
114/// Result of churn analysis.
115pub struct ChurnResult {
116    /// Per-file churn data, keyed by absolute path.
117    pub files: FxHashMap<PathBuf, FileChurn>,
118    /// Whether the repository is a shallow clone.
119    pub shallow_clone: bool,
120    /// Author email pool. Per-file [`AuthorContribution`] entries reference
121    /// authors by their index into this vector.
122    pub author_pool: Vec<String>,
123}
124
125/// Parse a `--since` value into a git-compatible duration.
126///
127/// Accepts:
128/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
129/// - ISO dates: `2025-06-01`
130///
131/// # Errors
132///
133/// Returns an error if the input is not a recognized duration format or ISO date,
134/// the numeric part is invalid, or the duration is zero.
135pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
136    // Try ISO date first (YYYY-MM-DD)
137    if is_iso_date(input) {
138        return Ok(SinceDuration {
139            git_after: input.to_string(),
140            display: input.to_string(),
141        });
142    }
143
144    // Parse duration: number + unit
145    let (num_str, unit) = split_number_unit(input)?;
146    let num: u64 = num_str
147        .parse()
148        .map_err(|_| format!("invalid number in --since: {input}"))?;
149
150    if num == 0 {
151        return Err("--since duration must be greater than 0".to_string());
152    }
153
154    match unit {
155        "d" | "day" | "days" => {
156            let s = if num == 1 { "" } else { "s" };
157            Ok(SinceDuration {
158                git_after: format!("{num} day{s} ago"),
159                display: format!("{num} day{s}"),
160            })
161        }
162        "w" | "week" | "weeks" => {
163            let s = if num == 1 { "" } else { "s" };
164            Ok(SinceDuration {
165                git_after: format!("{num} week{s} ago"),
166                display: format!("{num} week{s}"),
167            })
168        }
169        "m" | "month" | "months" => {
170            let s = if num == 1 { "" } else { "s" };
171            Ok(SinceDuration {
172                git_after: format!("{num} month{s} ago"),
173                display: format!("{num} month{s}"),
174            })
175        }
176        "y" | "year" | "years" => {
177            let s = if num == 1 { "" } else { "s" };
178            Ok(SinceDuration {
179                git_after: format!("{num} year{s} ago"),
180                display: format!("{num} year{s}"),
181            })
182        }
183        _ => Err(format!(
184            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
185        )),
186    }
187}
188
189/// Analyze git churn for files in the given root directory.
190///
191/// Returns `None` if git is not available or the directory is not a git repository.
192pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
193    let shallow = is_shallow_clone(root);
194    let state = analyze_churn_events(root, since, None)?;
195    Some(build_churn_result(state, shallow))
196}
197
198/// Check if the repository is a shallow clone.
199#[must_use]
200pub fn is_shallow_clone(root: &Path) -> bool {
201    let mut command = crate::spawn::git();
202    command
203        .args(["rev-parse", "--is-shallow-repository"])
204        .current_dir(root);
205    command.output().is_ok_and(|o| {
206        String::from_utf8_lossy(&o.stdout)
207            .trim()
208            .eq_ignore_ascii_case("true")
209    })
210}
211
212/// Check if the directory is inside a git repository.
213#[must_use]
214pub fn is_git_repo(root: &Path) -> bool {
215    let mut command = crate::spawn::git();
216    command
217        .args(["rev-parse", "--git-dir"])
218        .current_dir(root)
219        .stdout(std::process::Stdio::null())
220        .stderr(std::process::Stdio::null());
221    command.status().is_ok_and(|s| s.success())
222}
223
224// ── Churn cache ──────────────────────────────────────────────────
225
226/// Maximum size of a churn cache file (64 MB). The incremental cache stores
227/// per-commit events, so it needs more headroom than the old aggregate rows.
228const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
229
230/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
231/// changes so older payloads are rejected on load. Bumped to 3 when the cache
232/// switched from aggregate rows to per-commit events for incremental updates.
233const CHURN_CACHE_VERSION: u8 = 3;
234
235/// Serializable per-commit event for the disk cache.
236#[derive(Clone, bitcode::Encode, bitcode::Decode)]
237struct CachedCommitEvent {
238    timestamp: u64,
239    lines_added: u32,
240    lines_deleted: u32,
241    author_idx: Option<u32>,
242}
243
244/// Serializable per-file churn entry for the disk cache.
245#[derive(Clone, bitcode::Encode, bitcode::Decode)]
246struct CachedFileChurn {
247    path: String,
248    events: Vec<CachedCommitEvent>,
249}
250
251/// Cached churn data keyed by last indexed SHA and since string.
252#[derive(Clone, bitcode::Encode, bitcode::Decode)]
253struct ChurnCache {
254    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
255    version: u8,
256    last_indexed_sha: String,
257    git_after: String,
258    files: Vec<CachedFileChurn>,
259    shallow_clone: bool,
260    /// Author email pool referenced by [`CachedCommitEvent::author_idx`].
261    author_pool: Vec<String>,
262}
263
264/// Per-file commit events retained in memory while building or updating churn.
265struct FileEvents {
266    events: Vec<CachedCommitEvent>,
267}
268
269/// Event-level churn state. Unlike [`ChurnResult`], this preserves commit
270/// timestamps so a cache can merge new commits and recompute trend/recency.
271struct ChurnEventState {
272    files: FxHashMap<PathBuf, FileEvents>,
273    author_pool: Vec<String>,
274}
275
276/// Get the full HEAD SHA for cache keying.
277fn get_head_sha(root: &Path) -> Option<String> {
278    let mut command = crate::spawn::git();
279    command.args(["rev-parse", "HEAD"]).current_dir(root);
280    command
281        .output()
282        .ok()
283        .filter(|o| o.status.success())
284        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
285}
286
287/// Check whether `ancestor` is still reachable from `descendant`.
288fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
289    let mut command = crate::spawn::git();
290    command
291        .args(["merge-base", "--is-ancestor", ancestor, descendant])
292        .current_dir(root);
293    command.status().is_ok_and(|s| s.success())
294}
295
296/// Try to load churn data from disk cache. Returns `None` on cache miss
297/// or version mismatch.
298fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
299    let cache_file = cache_dir.join("churn.bin");
300    let data = std::fs::read(&cache_file).ok()?;
301    if data.len() > MAX_CHURN_CACHE_SIZE {
302        return None;
303    }
304    let cache: ChurnCache = bitcode::decode(&data).ok()?;
305    if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
306        return None;
307    }
308    Some(cache)
309}
310
311/// Save churn data to disk cache.
312fn save_churn_cache(
313    cache_dir: &Path,
314    last_indexed_sha: &str,
315    git_after: &str,
316    state: &ChurnEventState,
317    shallow_clone: bool,
318) {
319    let files: Vec<CachedFileChurn> = state
320        .files
321        .iter()
322        .map(|f| CachedFileChurn {
323            path: f.0.to_string_lossy().to_string(),
324            events: f.1.events.clone(),
325        })
326        .collect();
327    let cache = ChurnCache {
328        version: CHURN_CACHE_VERSION,
329        last_indexed_sha: last_indexed_sha.to_string(),
330        git_after: git_after.to_string(),
331        files,
332        shallow_clone,
333        author_pool: state.author_pool.clone(),
334    };
335    let _ = std::fs::create_dir_all(cache_dir);
336    let data = bitcode::encode(&cache);
337    // Write to temp file then rename for atomic update (avoids partial reads by concurrent processes)
338    let tmp = cache_dir.join("churn.bin.tmp");
339    if std::fs::write(&tmp, data).is_ok() {
340        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
341    }
342}
343
344/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
345/// since duration match. If HEAD advanced from the cached SHA, runs an
346/// incremental `git log <cached>..HEAD --numstat` scan and merges it.
347///
348/// Returns `(ChurnResult, bool)` where the bool indicates whether reusable
349/// cache state was used.
350/// Returns `None` if git analysis fails.
351pub fn analyze_churn_cached(
352    root: &Path,
353    since: &SinceDuration,
354    cache_dir: &Path,
355    no_cache: bool,
356) -> Option<(ChurnResult, bool)> {
357    let head_sha = get_head_sha(root)?;
358
359    if !no_cache && let Some(cache) = load_churn_cache(cache_dir, &since.git_after) {
360        if cache.last_indexed_sha == head_sha {
361            let shallow_clone = cache.shallow_clone;
362            let state = cache.into_event_state();
363            return Some((build_churn_result(state, shallow_clone), true));
364        }
365
366        if is_ancestor(root, &cache.last_indexed_sha, &head_sha) {
367            let shallow_clone = is_shallow_clone(root);
368            let range = format!("{}..HEAD", cache.last_indexed_sha);
369            if let Some(delta) = analyze_churn_events(root, since, Some(&range)) {
370                let mut state = cache.into_event_state();
371                merge_churn_states(&mut state, delta);
372                save_churn_cache(
373                    cache_dir,
374                    &head_sha,
375                    &since.git_after,
376                    &state,
377                    shallow_clone,
378                );
379                return Some((build_churn_result(state, shallow_clone), true));
380            }
381        }
382    }
383
384    let shallow_clone = is_shallow_clone(root);
385    let state = analyze_churn_events(root, since, None)?;
386    if !no_cache {
387        save_churn_cache(
388            cache_dir,
389            &head_sha,
390            &since.git_after,
391            &state,
392            shallow_clone,
393        );
394    }
395
396    let result = build_churn_result(state, shallow_clone);
397    Some((result, false))
398}
399
400// ── Internal ──────────────────────────────────────────────────────
401
402impl ChurnCache {
403    fn into_event_state(self) -> ChurnEventState {
404        let files = self
405            .files
406            .into_iter()
407            .map(|entry| {
408                (
409                    PathBuf::from(entry.path),
410                    FileEvents {
411                        events: entry.events,
412                    },
413                )
414            })
415            .collect();
416        ChurnEventState {
417            files,
418            author_pool: self.author_pool,
419        }
420    }
421}
422
423/// Run `git log --numstat` and return event-level churn state.
424fn analyze_churn_events(
425    root: &Path,
426    since: &SinceDuration,
427    revision_range: Option<&str>,
428) -> Option<ChurnEventState> {
429    let mut command = crate::spawn::git();
430    command.arg("log");
431    if let Some(range) = revision_range {
432        command.arg(range);
433    }
434    command
435        .args([
436            "--numstat",
437            "--no-merges",
438            "--no-renames",
439            "--use-mailmap",
440            "--format=format:%at|%ae",
441            &format!("--after={}", since.git_after),
442        ])
443        .current_dir(root);
444
445    let output = match spawn_output(&mut command) {
446        Ok(o) => o,
447        Err(e) => {
448            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
449            return None;
450        }
451    };
452
453    if !output.status.success() {
454        let stderr = String::from_utf8_lossy(&output.stderr);
455        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
456        return None;
457    }
458
459    let stdout = String::from_utf8_lossy(&output.stdout);
460    Some(parse_git_log_events(&stdout, root))
461}
462
463/// Merge new churn events into cached event state.
464fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
465    let mut base_author_index: FxHashMap<String, u32> = base
466        .author_pool
467        .iter()
468        .enumerate()
469        .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
470        .collect();
471
472    let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
473    for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
474        let Ok(old_idx) = u32::try_from(old_idx) else {
475            continue;
476        };
477        let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
478        author_mapping.insert(old_idx, new_idx);
479    }
480
481    for (path, mut file) in delta.files {
482        for event in &mut file.events {
483            event.author_idx = event
484                .author_idx
485                .and_then(|idx| author_mapping.get(&idx).copied());
486        }
487        base.files
488            .entry(path)
489            .and_modify(|existing| existing.events.append(&mut file.events))
490            .or_insert(file);
491    }
492}
493
494/// Parse `git log --numstat --format=format:%at|%ae` output into events.
495fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
496    let now_secs = std::time::SystemTime::now()
497        .duration_since(std::time::UNIX_EPOCH)
498        .unwrap_or_default()
499        .as_secs();
500
501    let mut files: FxHashMap<PathBuf, FileEvents> = FxHashMap::default();
502    let mut author_pool: Vec<String> = Vec::new();
503    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
504    let mut current_timestamp: Option<u64> = None;
505    let mut current_author_idx: Option<u32> = None;
506
507    for line in stdout.lines() {
508        let line = line.trim();
509        if line.is_empty() {
510            continue;
511        }
512
513        // Header lines have shape: "<ts>|<email>"
514        if let Some((ts_str, email)) = line.split_once('|')
515            && let Ok(ts) = ts_str.parse::<u64>()
516        {
517            current_timestamp = Some(ts);
518            current_author_idx = Some(intern_author(email, &mut author_pool, &mut author_index));
519            continue;
520        }
521
522        // Backwards-compat: bare timestamp (legacy format or test fixtures).
523        if let Ok(ts) = line.parse::<u64>() {
524            current_timestamp = Some(ts);
525            current_author_idx = None;
526            continue;
527        }
528
529        // Numstat line: "10\t5\tpath/to/file"
530        if let Some((added, deleted, path)) = parse_numstat_line(line) {
531            let abs_path = root.join(path);
532            let ts = current_timestamp.unwrap_or(now_secs);
533            files
534                .entry(abs_path)
535                .or_insert_with(|| FileEvents { events: Vec::new() })
536                .events
537                .push(CachedCommitEvent {
538                    timestamp: ts,
539                    lines_added: added,
540                    lines_deleted: deleted,
541                    author_idx: current_author_idx,
542                });
543        }
544    }
545
546    ChurnEventState { files, author_pool }
547}
548
549/// Convert event-level churn state into the public aggregate result.
550#[expect(
551    clippy::cast_possible_truncation,
552    reason = "commit count per file is bounded by git history depth"
553)]
554fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
555    let now_secs = std::time::SystemTime::now()
556        .duration_since(std::time::UNIX_EPOCH)
557        .unwrap_or_default()
558        .as_secs();
559
560    let files = state
561        .files
562        .into_iter()
563        .map(|(path, file)| {
564            let mut timestamps = Vec::with_capacity(file.events.len());
565            let mut weighted_commits = 0.0;
566            let mut lines_added = 0;
567            let mut lines_deleted = 0;
568            let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
569
570            for event in file.events {
571                timestamps.push(event.timestamp);
572                let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
573                let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
574                weighted_commits += weight;
575                lines_added += event.lines_added;
576                lines_deleted += event.lines_deleted;
577
578                if let Some(idx) = event.author_idx {
579                    authors
580                        .entry(idx)
581                        .and_modify(|c| {
582                            c.commits += 1;
583                            c.weighted_commits += weight;
584                            c.first_commit_ts = c.first_commit_ts.min(event.timestamp);
585                            c.last_commit_ts = c.last_commit_ts.max(event.timestamp);
586                        })
587                        .or_insert(AuthorContribution {
588                            commits: 1,
589                            weighted_commits: weight,
590                            first_commit_ts: event.timestamp,
591                            last_commit_ts: event.timestamp,
592                        });
593                }
594            }
595
596            let commits = timestamps.len() as u32;
597            let trend = compute_trend(&timestamps);
598            // Round per-author weighted sums for cache stability.
599            for c in authors.values_mut() {
600                c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
601            }
602            let churn = FileChurn {
603                path: path.clone(),
604                commits,
605                weighted_commits: (weighted_commits * 100.0).round() / 100.0,
606                lines_added,
607                lines_deleted,
608                trend,
609                authors,
610            };
611            (path, churn)
612        })
613        .collect();
614
615    ChurnResult {
616        files,
617        shallow_clone,
618        author_pool: state.author_pool,
619    }
620}
621
622/// Parse `git log --numstat --format=format:%at|%ae` output.
623///
624/// Returns a per-file churn map plus the author email pool referenced by
625/// interned indices in [`FileChurn::authors`].
626#[cfg(test)]
627fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
628    let result = build_churn_result(parse_git_log_events(stdout, root), false);
629    (result.files, result.author_pool)
630}
631
632/// Intern an author email into the pool, returning its stable index.
633fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
634    if let Some(&idx) = index.get(email) {
635        return idx;
636    }
637    #[expect(
638        clippy::cast_possible_truncation,
639        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
640    )]
641    let idx = pool.len() as u32;
642    let owned = email.to_string();
643    index.insert(owned.clone(), idx);
644    pool.push(owned);
645    idx
646}
647
648/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
649/// Binary files show as `"-\t-\tpath"` — skip those.
650fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
651    let mut parts = line.splitn(3, '\t');
652    let added_str = parts.next()?;
653    let deleted_str = parts.next()?;
654    let path = parts.next()?;
655
656    // Binary files show "-" for added/deleted — skip them
657    let added: u32 = added_str.parse().ok()?;
658    let deleted: u32 = deleted_str.parse().ok()?;
659
660    Some((added, deleted, path))
661}
662
663/// Compute churn trend by splitting commits into two temporal halves.
664///
665/// Finds the midpoint between the oldest and newest commit timestamps,
666/// then compares commit counts in each half:
667/// - Recent > 1.5× older → Accelerating
668/// - Recent < 0.67× older → Cooling
669/// - Otherwise → Stable
670fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
671    if timestamps.len() < 2 {
672        return ChurnTrend::Stable;
673    }
674
675    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
676    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
677
678    if max_ts == min_ts {
679        return ChurnTrend::Stable;
680    }
681
682    let midpoint = min_ts + (max_ts - min_ts) / 2;
683    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
684    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
685
686    if older < 1.0 {
687        return ChurnTrend::Stable;
688    }
689
690    let ratio = recent / older;
691    if ratio > 1.5 {
692        ChurnTrend::Accelerating
693    } else if ratio < 0.67 {
694        ChurnTrend::Cooling
695    } else {
696        ChurnTrend::Stable
697    }
698}
699
700fn is_iso_date(input: &str) -> bool {
701    input.len() == 10
702        && input.as_bytes().get(4) == Some(&b'-')
703        && input.as_bytes().get(7) == Some(&b'-')
704        && input[..4].bytes().all(|b| b.is_ascii_digit())
705        && input[5..7].bytes().all(|b| b.is_ascii_digit())
706        && input[8..10].bytes().all(|b| b.is_ascii_digit())
707}
708
709fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
710    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
711        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
712    })?;
713    if pos == 0 {
714        return Err(format!(
715            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
716        ));
717    }
718    Ok((&input[..pos], &input[pos..]))
719}
720
721#[cfg(test)]
722mod tests {
723    use super::*;
724
725    // ── parse_since ──────────────────────────────────────────────
726
727    #[test]
728    fn parse_since_months_short() {
729        let d = parse_since("6m").unwrap();
730        assert_eq!(d.git_after, "6 months ago");
731        assert_eq!(d.display, "6 months");
732    }
733
734    #[test]
735    fn parse_since_months_long() {
736        let d = parse_since("6months").unwrap();
737        assert_eq!(d.git_after, "6 months ago");
738        assert_eq!(d.display, "6 months");
739    }
740
741    #[test]
742    fn parse_since_days() {
743        let d = parse_since("90d").unwrap();
744        assert_eq!(d.git_after, "90 days ago");
745        assert_eq!(d.display, "90 days");
746    }
747
748    #[test]
749    fn parse_since_year_singular() {
750        let d = parse_since("1y").unwrap();
751        assert_eq!(d.git_after, "1 year ago");
752        assert_eq!(d.display, "1 year");
753    }
754
755    #[test]
756    fn parse_since_years_plural() {
757        let d = parse_since("2years").unwrap();
758        assert_eq!(d.git_after, "2 years ago");
759        assert_eq!(d.display, "2 years");
760    }
761
762    #[test]
763    fn parse_since_weeks() {
764        let d = parse_since("2w").unwrap();
765        assert_eq!(d.git_after, "2 weeks ago");
766        assert_eq!(d.display, "2 weeks");
767    }
768
769    #[test]
770    fn parse_since_iso_date() {
771        let d = parse_since("2025-06-01").unwrap();
772        assert_eq!(d.git_after, "2025-06-01");
773        assert_eq!(d.display, "2025-06-01");
774    }
775
776    #[test]
777    fn parse_since_month_singular() {
778        let d = parse_since("1month").unwrap();
779        assert_eq!(d.display, "1 month");
780    }
781
782    #[test]
783    fn parse_since_day_singular() {
784        let d = parse_since("1day").unwrap();
785        assert_eq!(d.display, "1 day");
786    }
787
788    #[test]
789    fn parse_since_zero_rejected() {
790        assert!(parse_since("0m").is_err());
791    }
792
793    #[test]
794    fn parse_since_no_unit_rejected() {
795        assert!(parse_since("90").is_err());
796    }
797
798    #[test]
799    fn parse_since_unknown_unit_rejected() {
800        assert!(parse_since("6x").is_err());
801    }
802
803    #[test]
804    fn parse_since_no_number_rejected() {
805        assert!(parse_since("months").is_err());
806    }
807
808    // ── parse_numstat_line ───────────────────────────────────────
809
810    #[test]
811    fn numstat_normal() {
812        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
813        assert_eq!(a, 10);
814        assert_eq!(d, 5);
815        assert_eq!(p, "src/file.ts");
816    }
817
818    #[test]
819    fn numstat_binary_skipped() {
820        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
821    }
822
823    #[test]
824    fn numstat_zero_lines() {
825        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
826        assert_eq!(a, 0);
827        assert_eq!(d, 0);
828        assert_eq!(p, "src/empty.ts");
829    }
830
831    // ── compute_trend ────────────────────────────────────────────
832
833    #[test]
834    fn trend_empty_is_stable() {
835        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
836    }
837
838    #[test]
839    fn trend_single_commit_is_stable() {
840        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
841    }
842
843    #[test]
844    fn trend_accelerating() {
845        // 2 old commits, 5 recent commits
846        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
847        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
848    }
849
850    #[test]
851    fn trend_cooling() {
852        // 5 old commits, 2 recent commits
853        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
854        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
855    }
856
857    #[test]
858    fn trend_stable_even_distribution() {
859        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
860        let timestamps = vec![100, 200, 300, 700, 800, 900];
861        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
862    }
863
864    #[test]
865    fn trend_same_timestamp_is_stable() {
866        let timestamps = vec![500, 500, 500];
867        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
868    }
869
870    // ── is_iso_date ──────────────────────────────────────────────
871
872    #[test]
873    fn iso_date_valid() {
874        assert!(is_iso_date("2025-06-01"));
875        assert!(is_iso_date("2025-12-31"));
876    }
877
878    #[test]
879    fn iso_date_with_time_rejected() {
880        // Only exact YYYY-MM-DD (10 chars) is accepted
881        assert!(!is_iso_date("2025-06-01T00:00:00"));
882    }
883
884    #[test]
885    fn iso_date_invalid() {
886        assert!(!is_iso_date("6months"));
887        assert!(!is_iso_date("2025"));
888        assert!(!is_iso_date("not-a-date"));
889        assert!(!is_iso_date("abcd-ef-gh"));
890    }
891
892    // ── Display ──────────────────────────────────────────────────
893
894    #[test]
895    fn trend_display() {
896        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
897        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
898        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
899    }
900
901    // ── parse_git_log ───────────────────────────────────────────
902
903    #[test]
904    fn parse_git_log_single_commit() {
905        let root = Path::new("/project");
906        let output = "1700000000\n10\t5\tsrc/index.ts\n";
907        let (result, _) = parse_git_log(output, root);
908        assert_eq!(result.len(), 1);
909        let churn = &result[&PathBuf::from("/project/src/index.ts")];
910        assert_eq!(churn.commits, 1);
911        assert_eq!(churn.lines_added, 10);
912        assert_eq!(churn.lines_deleted, 5);
913    }
914
915    #[test]
916    fn parse_git_log_multiple_commits_same_file() {
917        let root = Path::new("/project");
918        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
919        let (result, _) = parse_git_log(output, root);
920        assert_eq!(result.len(), 1);
921        let churn = &result[&PathBuf::from("/project/src/index.ts")];
922        assert_eq!(churn.commits, 2);
923        assert_eq!(churn.lines_added, 13);
924        assert_eq!(churn.lines_deleted, 7);
925    }
926
927    #[test]
928    fn parse_git_log_multiple_files() {
929        let root = Path::new("/project");
930        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
931        let (result, _) = parse_git_log(output, root);
932        assert_eq!(result.len(), 2);
933        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
934        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
935    }
936
937    #[test]
938    fn parse_git_log_empty_output() {
939        let root = Path::new("/project");
940        let (result, _) = parse_git_log("", root);
941        assert!(result.is_empty());
942    }
943
944    #[test]
945    fn parse_git_log_skips_binary_files() {
946        let root = Path::new("/project");
947        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
948        let (result, _) = parse_git_log(output, root);
949        assert_eq!(result.len(), 1);
950        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
951    }
952
953    #[test]
954    fn parse_git_log_weighted_commits_are_positive() {
955        let root = Path::new("/project");
956        // Use a timestamp near "now" to ensure weight doesn't decay to zero
957        let now_secs = std::time::SystemTime::now()
958            .duration_since(std::time::UNIX_EPOCH)
959            .unwrap()
960            .as_secs();
961        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
962        let (result, _) = parse_git_log(&output, root);
963        let churn = &result[&PathBuf::from("/project/src/a.ts")];
964        assert!(
965            churn.weighted_commits > 0.0,
966            "weighted_commits should be positive for recent commits"
967        );
968    }
969
970    // ── compute_trend edge cases ─────────────────────────────────
971
972    #[test]
973    fn trend_boundary_1_5x_ratio() {
974        // Exactly 1.5x ratio (3 recent : 2 old) → boundary between stable and accelerating
975        // midpoint = 100 + (1000-100)/2 = 550
976        // old: 100, 200 (2 timestamps <= 550)
977        // recent: 600, 800, 1000 (3 timestamps > 550)
978        // ratio = 3/2 = 1.5 — NOT > 1.5, so stable
979        let timestamps = vec![100, 200, 600, 800, 1000];
980        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
981    }
982
983    #[test]
984    fn trend_just_above_1_5x() {
985        // midpoint = 100 + (1000-100)/2 = 550
986        // old: 100 (1 timestamp <= 550)
987        // recent: 600, 800, 1000 (3 timestamps > 550)
988        // ratio = 3/1 = 3.0 → accelerating
989        let timestamps = vec![100, 600, 800, 1000];
990        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
991    }
992
993    #[test]
994    fn trend_boundary_0_67x_ratio() {
995        // Exactly 0.67x ratio → boundary between cooling and stable
996        // midpoint = 100 + (1000-100)/2 = 550
997        // old: 100, 200, 300 (3 timestamps <= 550)
998        // recent: 600, 1000 (2 timestamps > 550)
999        // ratio = 2/3 = 0.666... < 0.67 → cooling
1000        let timestamps = vec![100, 200, 300, 600, 1000];
1001        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
1002    }
1003
1004    #[test]
1005    fn trend_two_timestamps_different() {
1006        // Only 2 timestamps: midpoint = 100 + (200-100)/2 = 150
1007        // old: 100 (1 timestamp <= 150)
1008        // recent: 200 (1 timestamp > 150)
1009        // ratio = 1/1 = 1.0 → stable
1010        let timestamps = vec![100, 200];
1011        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1012    }
1013
1014    // ── parse_since additional coverage ─────────────────────────
1015
1016    #[test]
1017    fn parse_since_week_singular() {
1018        let d = parse_since("1week").unwrap();
1019        assert_eq!(d.git_after, "1 week ago");
1020        assert_eq!(d.display, "1 week");
1021    }
1022
1023    #[test]
1024    fn parse_since_weeks_long() {
1025        let d = parse_since("3weeks").unwrap();
1026        assert_eq!(d.git_after, "3 weeks ago");
1027        assert_eq!(d.display, "3 weeks");
1028    }
1029
1030    #[test]
1031    fn parse_since_days_long() {
1032        let d = parse_since("30days").unwrap();
1033        assert_eq!(d.git_after, "30 days ago");
1034        assert_eq!(d.display, "30 days");
1035    }
1036
1037    #[test]
1038    fn parse_since_year_long() {
1039        let d = parse_since("1year").unwrap();
1040        assert_eq!(d.git_after, "1 year ago");
1041        assert_eq!(d.display, "1 year");
1042    }
1043
1044    #[test]
1045    fn parse_since_overflow_number_rejected() {
1046        // Number too large for u64
1047        let result = parse_since("99999999999999999999d");
1048        assert!(result.is_err());
1049        let err = result.unwrap_err();
1050        assert!(err.contains("invalid number"));
1051    }
1052
1053    #[test]
1054    fn parse_since_zero_days_rejected() {
1055        assert!(parse_since("0d").is_err());
1056    }
1057
1058    #[test]
1059    fn parse_since_zero_weeks_rejected() {
1060        assert!(parse_since("0w").is_err());
1061    }
1062
1063    #[test]
1064    fn parse_since_zero_years_rejected() {
1065        assert!(parse_since("0y").is_err());
1066    }
1067
1068    // ── parse_numstat_line additional coverage ──────────────────
1069
1070    #[test]
1071    fn numstat_missing_path() {
1072        // Only two tab-separated fields, no path
1073        assert!(parse_numstat_line("10\t5").is_none());
1074    }
1075
1076    #[test]
1077    fn numstat_single_field() {
1078        assert!(parse_numstat_line("10").is_none());
1079    }
1080
1081    #[test]
1082    fn numstat_empty_string() {
1083        assert!(parse_numstat_line("").is_none());
1084    }
1085
1086    #[test]
1087    fn numstat_only_added_is_binary() {
1088        // Added is "-" but deleted is numeric
1089        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1090    }
1091
1092    #[test]
1093    fn numstat_only_deleted_is_binary() {
1094        // Added is numeric but deleted is "-"
1095        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1096    }
1097
1098    #[test]
1099    fn numstat_path_with_spaces() {
1100        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1101        assert_eq!(a, 3);
1102        assert_eq!(d, 1);
1103        assert_eq!(p, "path with spaces/file.ts");
1104    }
1105
1106    #[test]
1107    fn numstat_large_numbers() {
1108        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1109        assert_eq!(a, 9999);
1110        assert_eq!(d, 8888);
1111        assert_eq!(p, "src/big.ts");
1112    }
1113
1114    // ── is_iso_date additional coverage ─────────────────────────
1115
1116    #[test]
1117    fn iso_date_wrong_separator_positions() {
1118        // Dashes in wrong positions
1119        assert!(!is_iso_date("20-25-0601"));
1120        assert!(!is_iso_date("202506-01-"));
1121    }
1122
1123    #[test]
1124    fn iso_date_too_short() {
1125        assert!(!is_iso_date("2025-06-0"));
1126    }
1127
1128    #[test]
1129    fn iso_date_letters_in_day() {
1130        assert!(!is_iso_date("2025-06-ab"));
1131    }
1132
1133    #[test]
1134    fn iso_date_letters_in_month() {
1135        assert!(!is_iso_date("2025-ab-01"));
1136    }
1137
1138    // ── split_number_unit additional coverage ───────────────────
1139
1140    #[test]
1141    fn split_number_unit_valid() {
1142        let (num, unit) = split_number_unit("42days").unwrap();
1143        assert_eq!(num, "42");
1144        assert_eq!(unit, "days");
1145    }
1146
1147    #[test]
1148    fn split_number_unit_single_digit() {
1149        let (num, unit) = split_number_unit("1m").unwrap();
1150        assert_eq!(num, "1");
1151        assert_eq!(unit, "m");
1152    }
1153
1154    #[test]
1155    fn split_number_unit_no_digits() {
1156        let err = split_number_unit("abc").unwrap_err();
1157        assert!(err.contains("must start with a number"));
1158    }
1159
1160    #[test]
1161    fn split_number_unit_no_unit() {
1162        let err = split_number_unit("123").unwrap_err();
1163        assert!(err.contains("requires a unit suffix"));
1164    }
1165
1166    // ── parse_git_log additional coverage ───────────────────────
1167
1168    #[test]
1169    fn parse_git_log_numstat_before_timestamp_uses_now() {
1170        let root = Path::new("/project");
1171        // No timestamp line before the numstat line
1172        let output = "10\t5\tsrc/no_ts.ts\n";
1173        let (result, _) = parse_git_log(output, root);
1174        assert_eq!(result.len(), 1);
1175        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1176        assert_eq!(churn.commits, 1);
1177        assert_eq!(churn.lines_added, 10);
1178        assert_eq!(churn.lines_deleted, 5);
1179        // Without a timestamp, it falls back to now_secs, so weight should be ~1.0
1180        assert!(
1181            churn.weighted_commits > 0.9,
1182            "weight should be near 1.0 when timestamp defaults to now"
1183        );
1184    }
1185
1186    #[test]
1187    fn parse_git_log_whitespace_lines_ignored() {
1188        let root = Path::new("/project");
1189        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1190        let (result, _) = parse_git_log(output, root);
1191        assert_eq!(result.len(), 1);
1192    }
1193
1194    #[test]
1195    fn parse_git_log_trend_is_computed_per_file() {
1196        let root = Path::new("/project");
1197        // Two commits far apart for one file, recent-heavy for another
1198        let output = "\
11991000\n5\t1\tsrc/old.ts\n\
12002000\n3\t1\tsrc/old.ts\n\
12011000\n1\t0\tsrc/hot.ts\n\
12021800\n1\t0\tsrc/hot.ts\n\
12031900\n1\t0\tsrc/hot.ts\n\
12041950\n1\t0\tsrc/hot.ts\n\
12052000\n1\t0\tsrc/hot.ts\n";
1206        let (result, _) = parse_git_log(output, root);
1207        let old = &result[&PathBuf::from("/project/src/old.ts")];
1208        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1209        assert_eq!(old.commits, 2);
1210        assert_eq!(hot.commits, 5);
1211        // hot.ts has 4 recent vs 1 old => accelerating
1212        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1213    }
1214
1215    #[test]
1216    fn parse_git_log_weighted_decay_for_old_commits() {
1217        let root = Path::new("/project");
1218        let now = std::time::SystemTime::now()
1219            .duration_since(std::time::UNIX_EPOCH)
1220            .unwrap()
1221            .as_secs();
1222        // One commit from 180 days ago (two half-lives) should weigh ~0.25
1223        let old_ts = now - (180 * 86_400);
1224        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1225        let (result, _) = parse_git_log(&output, root);
1226        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1227        assert!(
1228            churn.weighted_commits < 0.5,
1229            "180-day-old commit should weigh ~0.25, got {}",
1230            churn.weighted_commits
1231        );
1232        assert!(
1233            churn.weighted_commits > 0.1,
1234            "180-day-old commit should weigh ~0.25, got {}",
1235            churn.weighted_commits
1236        );
1237    }
1238
1239    #[test]
1240    fn parse_git_log_path_stored_as_absolute() {
1241        let root = Path::new("/my/project");
1242        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1243        let (result, _) = parse_git_log(output, root);
1244        let key = PathBuf::from("/my/project/lib/utils.ts");
1245        assert!(result.contains_key(&key));
1246        assert_eq!(result[&key].path, key);
1247    }
1248
1249    #[test]
1250    fn parse_git_log_weighted_commits_rounded() {
1251        let root = Path::new("/project");
1252        let now = std::time::SystemTime::now()
1253            .duration_since(std::time::UNIX_EPOCH)
1254            .unwrap()
1255            .as_secs();
1256        // A commit right now should weigh exactly 1.00
1257        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1258        let (result, _) = parse_git_log(&output, root);
1259        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1260        // Weighted commits are rounded to 2 decimal places
1261        let decimals = format!("{:.2}", churn.weighted_commits);
1262        assert_eq!(
1263            churn.weighted_commits.to_string().len(),
1264            decimals.len().min(churn.weighted_commits.to_string().len()),
1265            "weighted_commits should be rounded to at most 2 decimal places"
1266        );
1267    }
1268
1269    // ── ChurnTrend serde ────────────────────────────────────────
1270
1271    #[test]
1272    fn trend_serde_serialization() {
1273        assert_eq!(
1274            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1275            "\"accelerating\""
1276        );
1277        assert_eq!(
1278            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1279            "\"stable\""
1280        );
1281        assert_eq!(
1282            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1283            "\"cooling\""
1284        );
1285    }
1286
1287    // ── parse_git_log: author tracking ──────────────────────────
1288
1289    #[test]
1290    fn parse_git_log_extracts_author_email() {
1291        let root = Path::new("/project");
1292        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1293        let (result, pool) = parse_git_log(output, root);
1294        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1295        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1296        assert_eq!(churn.authors.len(), 1);
1297        let alice = &churn.authors[&0];
1298        assert_eq!(alice.commits, 1);
1299        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1300        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1301    }
1302
1303    #[test]
1304    fn parse_git_log_intern_dedupes_authors() {
1305        let root = Path::new("/project");
1306        let output = "\
13071700000000|alice@example.com
13081\t0\ta.ts
13091700100000|bob@example.com
13102\t1\tb.ts
13111700200000|alice@example.com
13123\t2\tc.ts
1313";
1314        let (_result, pool) = parse_git_log(output, root);
1315        assert_eq!(pool.len(), 2);
1316        assert!(pool.contains(&"alice@example.com".to_string()));
1317        assert!(pool.contains(&"bob@example.com".to_string()));
1318    }
1319
1320    #[test]
1321    fn parse_git_log_aggregates_per_author() {
1322        let root = Path::new("/project");
1323        // alice touches index.ts twice, bob once.
1324        let output = "\
13251700000000|alice@example.com
13261\t0\tsrc/index.ts
13271700100000|bob@example.com
13282\t0\tsrc/index.ts
13291700200000|alice@example.com
13301\t1\tsrc/index.ts
1331";
1332        let (result, pool) = parse_git_log(output, root);
1333        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1334        assert_eq!(churn.commits, 3);
1335        assert_eq!(churn.authors.len(), 2);
1336
1337        let alice_idx =
1338            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1339        let alice = &churn.authors[&alice_idx];
1340        assert_eq!(alice.commits, 2);
1341        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1342        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1343    }
1344
1345    #[test]
1346    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1347        // Backwards-compat path: header has no `|email` suffix.
1348        let root = Path::new("/project");
1349        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1350        let (result, pool) = parse_git_log(output, root);
1351        assert!(pool.is_empty());
1352        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1353        assert_eq!(churn.commits, 1);
1354        assert!(churn.authors.is_empty());
1355    }
1356
1357    // ── intern_author ──────────────────────────────────────────
1358
1359    #[test]
1360    fn intern_author_returns_existing_index() {
1361        let mut pool = Vec::new();
1362        let mut index = FxHashMap::default();
1363        let i1 = intern_author("alice@x", &mut pool, &mut index);
1364        let i2 = intern_author("alice@x", &mut pool, &mut index);
1365        assert_eq!(i1, i2);
1366        assert_eq!(pool.len(), 1);
1367    }
1368
1369    #[test]
1370    fn intern_author_assigns_sequential_indices() {
1371        let mut pool = Vec::new();
1372        let mut index = FxHashMap::default();
1373        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1374        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1375        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1376        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1377    }
1378
1379    // ── incremental cache ───────────────────────────────────────
1380
1381    fn git(root: &Path, args: &[&str]) {
1382        let status = std::process::Command::new("git")
1383            .args(args)
1384            .current_dir(root)
1385            .status()
1386            .expect("run git");
1387        assert!(status.success(), "git {args:?} failed");
1388    }
1389
1390    fn write(root: &Path, path: &str, contents: &str) {
1391        let path = root.join(path);
1392        std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1393        std::fs::write(path, contents).unwrap();
1394    }
1395
1396    #[test]
1397    fn cached_churn_merges_new_commits_after_head_advances() {
1398        let repo = tempfile::tempdir().expect("create repo");
1399        let root = repo.path();
1400        git(root, &["init"]);
1401        git(root, &["config", "user.email", "churn@example.test"]);
1402        git(root, &["config", "user.name", "Churn Test"]);
1403        git(root, &["config", "commit.gpgsign", "false"]);
1404
1405        write(root, "src/a.ts", "export const a = 1;\n");
1406        git(root, &["add", "."]);
1407        git(root, &["commit", "-m", "initial"]);
1408
1409        let since = parse_since("1y").unwrap();
1410        let cache = tempfile::tempdir().expect("create cache dir");
1411        let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1412        assert!(!cold_hit);
1413        let file = root.join("src/a.ts");
1414        assert_eq!(cold.files[&file].commits, 1);
1415
1416        let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1417        assert!(warm_hit);
1418
1419        write(
1420            root,
1421            "src/a.ts",
1422            "export const a = 1;\nexport const b = 2;\n",
1423        );
1424        git(root, &["add", "."]);
1425        git(root, &["commit", "-m", "update a"]);
1426        let head = get_head_sha(root).unwrap();
1427
1428        let (incremental, incremental_hit) =
1429            analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1430        assert!(incremental_hit);
1431        assert_eq!(incremental.files[&file].commits, 2);
1432
1433        let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1434        assert_eq!(cache.last_indexed_sha, head);
1435    }
1436}