Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use serde::Serialize;
11
12use crate::git_env::clear_ambient_git_env;
13
14/// Number of seconds in one day.
15const SECS_PER_DAY: f64 = 86_400.0;
16
17/// Recency weight half-life in days. A commit from 90 days ago counts half
18/// as much as today's commit; 180 days ago counts 25%.
19const HALF_LIFE_DAYS: f64 = 90.0;
20
21/// Parsed duration for the `--since` flag.
22#[derive(Debug, Clone)]
23pub struct SinceDuration {
24    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
25    pub git_after: String,
26    /// Human-readable display string (e.g., `"6 months"`).
27    pub display: String,
28}
29
30/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
32#[serde(rename_all = "snake_case")]
33pub enum ChurnTrend {
34    /// Recent half has >1.5× the commits of the older half.
35    Accelerating,
36    /// Churn is roughly stable between halves.
37    Stable,
38    /// Recent half has <0.67× the commits of the older half.
39    Cooling,
40}
41
42impl std::fmt::Display for ChurnTrend {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::Accelerating => write!(f, "accelerating"),
46            Self::Stable => write!(f, "stable"),
47            Self::Cooling => write!(f, "cooling"),
48        }
49    }
50}
51
52/// Per-author commit aggregation for a single file.
53///
54/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
55/// per-file maps small and the bitcode cache compact.
56#[derive(Debug, Clone, Copy)]
57pub struct AuthorContribution {
58    /// Total commits by this author touching this file in the analysis window.
59    pub commits: u32,
60    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
61    pub weighted_commits: f64,
62    /// Earliest commit timestamp by this author (epoch seconds).
63    pub first_commit_ts: u64,
64    /// Latest commit timestamp by this author (epoch seconds).
65    pub last_commit_ts: u64,
66}
67
68/// Per-file churn data collected from git history.
69#[derive(Debug, Clone)]
70pub struct FileChurn {
71    /// Absolute file path.
72    pub path: PathBuf,
73    /// Total number of commits touching this file in the analysis window.
74    pub commits: u32,
75    /// Recency-weighted commit count (exponential decay, half-life 90 days).
76    pub weighted_commits: f64,
77    /// Total lines added across all commits.
78    pub lines_added: u32,
79    /// Total lines deleted across all commits.
80    pub lines_deleted: u32,
81    /// Churn trend: accelerating, stable, or cooling.
82    pub trend: ChurnTrend,
83    /// Per-author contributions keyed by interned author index.
84    /// Indices reference [`ChurnResult::author_pool`].
85    pub authors: FxHashMap<u32, AuthorContribution>,
86}
87
88/// Result of churn analysis.
89pub struct ChurnResult {
90    /// Per-file churn data, keyed by absolute path.
91    pub files: FxHashMap<PathBuf, FileChurn>,
92    /// Whether the repository is a shallow clone.
93    pub shallow_clone: bool,
94    /// Author email pool. Per-file [`AuthorContribution`] entries reference
95    /// authors by their index into this vector.
96    pub author_pool: Vec<String>,
97}
98
99/// Parse a `--since` value into a git-compatible duration.
100///
101/// Accepts:
102/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
103/// - ISO dates: `2025-06-01`
104///
105/// # Errors
106///
107/// Returns an error if the input is not a recognized duration format or ISO date,
108/// the numeric part is invalid, or the duration is zero.
109pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
110    // Try ISO date first (YYYY-MM-DD)
111    if is_iso_date(input) {
112        return Ok(SinceDuration {
113            git_after: input.to_string(),
114            display: input.to_string(),
115        });
116    }
117
118    // Parse duration: number + unit
119    let (num_str, unit) = split_number_unit(input)?;
120    let num: u64 = num_str
121        .parse()
122        .map_err(|_| format!("invalid number in --since: {input}"))?;
123
124    if num == 0 {
125        return Err("--since duration must be greater than 0".to_string());
126    }
127
128    match unit {
129        "d" | "day" | "days" => {
130            let s = if num == 1 { "" } else { "s" };
131            Ok(SinceDuration {
132                git_after: format!("{num} day{s} ago"),
133                display: format!("{num} day{s}"),
134            })
135        }
136        "w" | "week" | "weeks" => {
137            let s = if num == 1 { "" } else { "s" };
138            Ok(SinceDuration {
139                git_after: format!("{num} week{s} ago"),
140                display: format!("{num} week{s}"),
141            })
142        }
143        "m" | "month" | "months" => {
144            let s = if num == 1 { "" } else { "s" };
145            Ok(SinceDuration {
146                git_after: format!("{num} month{s} ago"),
147                display: format!("{num} month{s}"),
148            })
149        }
150        "y" | "year" | "years" => {
151            let s = if num == 1 { "" } else { "s" };
152            Ok(SinceDuration {
153                git_after: format!("{num} year{s} ago"),
154                display: format!("{num} year{s}"),
155            })
156        }
157        _ => Err(format!(
158            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
159        )),
160    }
161}
162
163/// Analyze git churn for files in the given root directory.
164///
165/// Returns `None` if git is not available or the directory is not a git repository.
166pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
167    let shallow = is_shallow_clone(root);
168    let state = analyze_churn_events(root, since, None)?;
169    Some(build_churn_result(state, shallow))
170}
171
172/// Check if the repository is a shallow clone.
173#[must_use]
174pub fn is_shallow_clone(root: &Path) -> bool {
175    let mut command = Command::new("git");
176    command
177        .args(["rev-parse", "--is-shallow-repository"])
178        .current_dir(root);
179    clear_ambient_git_env(&mut command);
180    command.output().is_ok_and(|o| {
181        String::from_utf8_lossy(&o.stdout)
182            .trim()
183            .eq_ignore_ascii_case("true")
184    })
185}
186
187/// Check if the directory is inside a git repository.
188#[must_use]
189pub fn is_git_repo(root: &Path) -> bool {
190    let mut command = Command::new("git");
191    command
192        .args(["rev-parse", "--git-dir"])
193        .current_dir(root)
194        .stdout(std::process::Stdio::null())
195        .stderr(std::process::Stdio::null());
196    clear_ambient_git_env(&mut command);
197    command.status().is_ok_and(|s| s.success())
198}
199
200// ── Churn cache ──────────────────────────────────────────────────
201
202/// Maximum size of a churn cache file (64 MB). The incremental cache stores
203/// per-commit events, so it needs more headroom than the old aggregate rows.
204const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
205
206/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
207/// changes so older payloads are rejected on load. Bumped to 3 when the cache
208/// switched from aggregate rows to per-commit events for incremental updates.
209const CHURN_CACHE_VERSION: u8 = 3;
210
211/// Serializable per-commit event for the disk cache.
212#[derive(Clone, bitcode::Encode, bitcode::Decode)]
213struct CachedCommitEvent {
214    timestamp: u64,
215    lines_added: u32,
216    lines_deleted: u32,
217    author_idx: Option<u32>,
218}
219
220/// Serializable per-file churn entry for the disk cache.
221#[derive(Clone, bitcode::Encode, bitcode::Decode)]
222struct CachedFileChurn {
223    path: String,
224    events: Vec<CachedCommitEvent>,
225}
226
227/// Cached churn data keyed by last indexed SHA and since string.
228#[derive(Clone, bitcode::Encode, bitcode::Decode)]
229struct ChurnCache {
230    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
231    version: u8,
232    last_indexed_sha: String,
233    git_after: String,
234    files: Vec<CachedFileChurn>,
235    shallow_clone: bool,
236    /// Author email pool referenced by [`CachedCommitEvent::author_idx`].
237    author_pool: Vec<String>,
238}
239
240/// Per-file commit events retained in memory while building or updating churn.
241struct FileEvents {
242    events: Vec<CachedCommitEvent>,
243}
244
245/// Event-level churn state. Unlike [`ChurnResult`], this preserves commit
246/// timestamps so a cache can merge new commits and recompute trend/recency.
247struct ChurnEventState {
248    files: FxHashMap<PathBuf, FileEvents>,
249    author_pool: Vec<String>,
250}
251
252/// Get the full HEAD SHA for cache keying.
253fn get_head_sha(root: &Path) -> Option<String> {
254    let mut command = Command::new("git");
255    command.args(["rev-parse", "HEAD"]).current_dir(root);
256    clear_ambient_git_env(&mut command);
257    command
258        .output()
259        .ok()
260        .filter(|o| o.status.success())
261        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
262}
263
264/// Check whether `ancestor` is still reachable from `descendant`.
265fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
266    let mut command = Command::new("git");
267    command
268        .args(["merge-base", "--is-ancestor", ancestor, descendant])
269        .current_dir(root);
270    clear_ambient_git_env(&mut command);
271    command.status().is_ok_and(|s| s.success())
272}
273
274/// Try to load churn data from disk cache. Returns `None` on cache miss
275/// or version mismatch.
276fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
277    let cache_file = cache_dir.join("churn.bin");
278    let data = std::fs::read(&cache_file).ok()?;
279    if data.len() > MAX_CHURN_CACHE_SIZE {
280        return None;
281    }
282    let cache: ChurnCache = bitcode::decode(&data).ok()?;
283    if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
284        return None;
285    }
286    Some(cache)
287}
288
289/// Save churn data to disk cache.
290fn save_churn_cache(
291    cache_dir: &Path,
292    last_indexed_sha: &str,
293    git_after: &str,
294    state: &ChurnEventState,
295    shallow_clone: bool,
296) {
297    let files: Vec<CachedFileChurn> = state
298        .files
299        .iter()
300        .map(|f| CachedFileChurn {
301            path: f.0.to_string_lossy().to_string(),
302            events: f.1.events.clone(),
303        })
304        .collect();
305    let cache = ChurnCache {
306        version: CHURN_CACHE_VERSION,
307        last_indexed_sha: last_indexed_sha.to_string(),
308        git_after: git_after.to_string(),
309        files,
310        shallow_clone,
311        author_pool: state.author_pool.clone(),
312    };
313    let _ = std::fs::create_dir_all(cache_dir);
314    let data = bitcode::encode(&cache);
315    // Write to temp file then rename for atomic update (avoids partial reads by concurrent processes)
316    let tmp = cache_dir.join("churn.bin.tmp");
317    if std::fs::write(&tmp, data).is_ok() {
318        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
319    }
320}
321
322/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
323/// since duration match. If HEAD advanced from the cached SHA, runs an
324/// incremental `git log <cached>..HEAD --numstat` scan and merges it.
325///
326/// Returns `(ChurnResult, bool)` where the bool indicates whether reusable
327/// cache state was used.
328/// Returns `None` if git analysis fails.
329pub fn analyze_churn_cached(
330    root: &Path,
331    since: &SinceDuration,
332    cache_dir: &Path,
333    no_cache: bool,
334) -> Option<(ChurnResult, bool)> {
335    let head_sha = get_head_sha(root)?;
336
337    if !no_cache && let Some(cache) = load_churn_cache(cache_dir, &since.git_after) {
338        if cache.last_indexed_sha == head_sha {
339            let shallow_clone = cache.shallow_clone;
340            let state = cache.into_event_state();
341            return Some((build_churn_result(state, shallow_clone), true));
342        }
343
344        if is_ancestor(root, &cache.last_indexed_sha, &head_sha) {
345            let shallow_clone = is_shallow_clone(root);
346            let range = format!("{}..HEAD", cache.last_indexed_sha);
347            if let Some(delta) = analyze_churn_events(root, since, Some(&range)) {
348                let mut state = cache.into_event_state();
349                merge_churn_states(&mut state, delta);
350                save_churn_cache(
351                    cache_dir,
352                    &head_sha,
353                    &since.git_after,
354                    &state,
355                    shallow_clone,
356                );
357                return Some((build_churn_result(state, shallow_clone), true));
358            }
359        }
360    }
361
362    let shallow_clone = is_shallow_clone(root);
363    let state = analyze_churn_events(root, since, None)?;
364    if !no_cache {
365        save_churn_cache(
366            cache_dir,
367            &head_sha,
368            &since.git_after,
369            &state,
370            shallow_clone,
371        );
372    }
373
374    let result = build_churn_result(state, shallow_clone);
375    Some((result, false))
376}
377
378// ── Internal ──────────────────────────────────────────────────────
379
380impl ChurnCache {
381    fn into_event_state(self) -> ChurnEventState {
382        let files = self
383            .files
384            .into_iter()
385            .map(|entry| {
386                (
387                    PathBuf::from(entry.path),
388                    FileEvents {
389                        events: entry.events,
390                    },
391                )
392            })
393            .collect();
394        ChurnEventState {
395            files,
396            author_pool: self.author_pool,
397        }
398    }
399}
400
401/// Run `git log --numstat` and return event-level churn state.
402fn analyze_churn_events(
403    root: &Path,
404    since: &SinceDuration,
405    revision_range: Option<&str>,
406) -> Option<ChurnEventState> {
407    let mut command = Command::new("git");
408    command.arg("log");
409    if let Some(range) = revision_range {
410        command.arg(range);
411    }
412    command
413        .args([
414            "--numstat",
415            "--no-merges",
416            "--no-renames",
417            "--use-mailmap",
418            "--format=format:%at|%ae",
419            &format!("--after={}", since.git_after),
420        ])
421        .current_dir(root);
422    clear_ambient_git_env(&mut command);
423
424    let output = match command.output() {
425        Ok(o) => o,
426        Err(e) => {
427            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
428            return None;
429        }
430    };
431
432    if !output.status.success() {
433        let stderr = String::from_utf8_lossy(&output.stderr);
434        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
435        return None;
436    }
437
438    let stdout = String::from_utf8_lossy(&output.stdout);
439    Some(parse_git_log_events(&stdout, root))
440}
441
442/// Merge new churn events into cached event state.
443fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
444    let mut base_author_index: FxHashMap<String, u32> = base
445        .author_pool
446        .iter()
447        .enumerate()
448        .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
449        .collect();
450
451    let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
452    for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
453        let Ok(old_idx) = u32::try_from(old_idx) else {
454            continue;
455        };
456        let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
457        author_mapping.insert(old_idx, new_idx);
458    }
459
460    for (path, mut file) in delta.files {
461        for event in &mut file.events {
462            event.author_idx = event
463                .author_idx
464                .and_then(|idx| author_mapping.get(&idx).copied());
465        }
466        base.files
467            .entry(path)
468            .and_modify(|existing| existing.events.append(&mut file.events))
469            .or_insert(file);
470    }
471}
472
473/// Parse `git log --numstat --format=format:%at|%ae` output into events.
474fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
475    let now_secs = std::time::SystemTime::now()
476        .duration_since(std::time::UNIX_EPOCH)
477        .unwrap_or_default()
478        .as_secs();
479
480    let mut files: FxHashMap<PathBuf, FileEvents> = FxHashMap::default();
481    let mut author_pool: Vec<String> = Vec::new();
482    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
483    let mut current_timestamp: Option<u64> = None;
484    let mut current_author_idx: Option<u32> = None;
485
486    for line in stdout.lines() {
487        let line = line.trim();
488        if line.is_empty() {
489            continue;
490        }
491
492        // Header lines have shape: "<ts>|<email>"
493        if let Some((ts_str, email)) = line.split_once('|')
494            && let Ok(ts) = ts_str.parse::<u64>()
495        {
496            current_timestamp = Some(ts);
497            current_author_idx = Some(intern_author(email, &mut author_pool, &mut author_index));
498            continue;
499        }
500
501        // Backwards-compat: bare timestamp (legacy format or test fixtures).
502        if let Ok(ts) = line.parse::<u64>() {
503            current_timestamp = Some(ts);
504            current_author_idx = None;
505            continue;
506        }
507
508        // Numstat line: "10\t5\tpath/to/file"
509        if let Some((added, deleted, path)) = parse_numstat_line(line) {
510            let abs_path = root.join(path);
511            let ts = current_timestamp.unwrap_or(now_secs);
512            files
513                .entry(abs_path)
514                .or_insert_with(|| FileEvents { events: Vec::new() })
515                .events
516                .push(CachedCommitEvent {
517                    timestamp: ts,
518                    lines_added: added,
519                    lines_deleted: deleted,
520                    author_idx: current_author_idx,
521                });
522        }
523    }
524
525    ChurnEventState { files, author_pool }
526}
527
528/// Convert event-level churn state into the public aggregate result.
529#[expect(
530    clippy::cast_possible_truncation,
531    reason = "commit count per file is bounded by git history depth"
532)]
533fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
534    let now_secs = std::time::SystemTime::now()
535        .duration_since(std::time::UNIX_EPOCH)
536        .unwrap_or_default()
537        .as_secs();
538
539    let files = state
540        .files
541        .into_iter()
542        .map(|(path, file)| {
543            let mut timestamps = Vec::with_capacity(file.events.len());
544            let mut weighted_commits = 0.0;
545            let mut lines_added = 0;
546            let mut lines_deleted = 0;
547            let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
548
549            for event in file.events {
550                timestamps.push(event.timestamp);
551                let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
552                let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
553                weighted_commits += weight;
554                lines_added += event.lines_added;
555                lines_deleted += event.lines_deleted;
556
557                if let Some(idx) = event.author_idx {
558                    authors
559                        .entry(idx)
560                        .and_modify(|c| {
561                            c.commits += 1;
562                            c.weighted_commits += weight;
563                            c.first_commit_ts = c.first_commit_ts.min(event.timestamp);
564                            c.last_commit_ts = c.last_commit_ts.max(event.timestamp);
565                        })
566                        .or_insert(AuthorContribution {
567                            commits: 1,
568                            weighted_commits: weight,
569                            first_commit_ts: event.timestamp,
570                            last_commit_ts: event.timestamp,
571                        });
572                }
573            }
574
575            let commits = timestamps.len() as u32;
576            let trend = compute_trend(&timestamps);
577            // Round per-author weighted sums for cache stability.
578            for c in authors.values_mut() {
579                c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
580            }
581            let churn = FileChurn {
582                path: path.clone(),
583                commits,
584                weighted_commits: (weighted_commits * 100.0).round() / 100.0,
585                lines_added,
586                lines_deleted,
587                trend,
588                authors,
589            };
590            (path, churn)
591        })
592        .collect();
593
594    ChurnResult {
595        files,
596        shallow_clone,
597        author_pool: state.author_pool,
598    }
599}
600
601/// Parse `git log --numstat --format=format:%at|%ae` output.
602///
603/// Returns a per-file churn map plus the author email pool referenced by
604/// interned indices in [`FileChurn::authors`].
605#[cfg(test)]
606fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
607    let result = build_churn_result(parse_git_log_events(stdout, root), false);
608    (result.files, result.author_pool)
609}
610
611/// Intern an author email into the pool, returning its stable index.
612fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
613    if let Some(&idx) = index.get(email) {
614        return idx;
615    }
616    #[expect(
617        clippy::cast_possible_truncation,
618        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
619    )]
620    let idx = pool.len() as u32;
621    let owned = email.to_string();
622    index.insert(owned.clone(), idx);
623    pool.push(owned);
624    idx
625}
626
627/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
628/// Binary files show as `"-\t-\tpath"` — skip those.
629fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
630    let mut parts = line.splitn(3, '\t');
631    let added_str = parts.next()?;
632    let deleted_str = parts.next()?;
633    let path = parts.next()?;
634
635    // Binary files show "-" for added/deleted — skip them
636    let added: u32 = added_str.parse().ok()?;
637    let deleted: u32 = deleted_str.parse().ok()?;
638
639    Some((added, deleted, path))
640}
641
642/// Compute churn trend by splitting commits into two temporal halves.
643///
644/// Finds the midpoint between the oldest and newest commit timestamps,
645/// then compares commit counts in each half:
646/// - Recent > 1.5× older → Accelerating
647/// - Recent < 0.67× older → Cooling
648/// - Otherwise → Stable
649fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
650    if timestamps.len() < 2 {
651        return ChurnTrend::Stable;
652    }
653
654    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
655    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
656
657    if max_ts == min_ts {
658        return ChurnTrend::Stable;
659    }
660
661    let midpoint = min_ts + (max_ts - min_ts) / 2;
662    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
663    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
664
665    if older < 1.0 {
666        return ChurnTrend::Stable;
667    }
668
669    let ratio = recent / older;
670    if ratio > 1.5 {
671        ChurnTrend::Accelerating
672    } else if ratio < 0.67 {
673        ChurnTrend::Cooling
674    } else {
675        ChurnTrend::Stable
676    }
677}
678
679fn is_iso_date(input: &str) -> bool {
680    input.len() == 10
681        && input.as_bytes().get(4) == Some(&b'-')
682        && input.as_bytes().get(7) == Some(&b'-')
683        && input[..4].bytes().all(|b| b.is_ascii_digit())
684        && input[5..7].bytes().all(|b| b.is_ascii_digit())
685        && input[8..10].bytes().all(|b| b.is_ascii_digit())
686}
687
688fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
689    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
690        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
691    })?;
692    if pos == 0 {
693        return Err(format!(
694            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
695        ));
696    }
697    Ok((&input[..pos], &input[pos..]))
698}
699
700#[cfg(test)]
701mod tests {
702    use super::*;
703
704    // ── parse_since ──────────────────────────────────────────────
705
706    #[test]
707    fn parse_since_months_short() {
708        let d = parse_since("6m").unwrap();
709        assert_eq!(d.git_after, "6 months ago");
710        assert_eq!(d.display, "6 months");
711    }
712
713    #[test]
714    fn parse_since_months_long() {
715        let d = parse_since("6months").unwrap();
716        assert_eq!(d.git_after, "6 months ago");
717        assert_eq!(d.display, "6 months");
718    }
719
720    #[test]
721    fn parse_since_days() {
722        let d = parse_since("90d").unwrap();
723        assert_eq!(d.git_after, "90 days ago");
724        assert_eq!(d.display, "90 days");
725    }
726
727    #[test]
728    fn parse_since_year_singular() {
729        let d = parse_since("1y").unwrap();
730        assert_eq!(d.git_after, "1 year ago");
731        assert_eq!(d.display, "1 year");
732    }
733
734    #[test]
735    fn parse_since_years_plural() {
736        let d = parse_since("2years").unwrap();
737        assert_eq!(d.git_after, "2 years ago");
738        assert_eq!(d.display, "2 years");
739    }
740
741    #[test]
742    fn parse_since_weeks() {
743        let d = parse_since("2w").unwrap();
744        assert_eq!(d.git_after, "2 weeks ago");
745        assert_eq!(d.display, "2 weeks");
746    }
747
748    #[test]
749    fn parse_since_iso_date() {
750        let d = parse_since("2025-06-01").unwrap();
751        assert_eq!(d.git_after, "2025-06-01");
752        assert_eq!(d.display, "2025-06-01");
753    }
754
755    #[test]
756    fn parse_since_month_singular() {
757        let d = parse_since("1month").unwrap();
758        assert_eq!(d.display, "1 month");
759    }
760
761    #[test]
762    fn parse_since_day_singular() {
763        let d = parse_since("1day").unwrap();
764        assert_eq!(d.display, "1 day");
765    }
766
767    #[test]
768    fn parse_since_zero_rejected() {
769        assert!(parse_since("0m").is_err());
770    }
771
772    #[test]
773    fn parse_since_no_unit_rejected() {
774        assert!(parse_since("90").is_err());
775    }
776
777    #[test]
778    fn parse_since_unknown_unit_rejected() {
779        assert!(parse_since("6x").is_err());
780    }
781
782    #[test]
783    fn parse_since_no_number_rejected() {
784        assert!(parse_since("months").is_err());
785    }
786
787    // ── parse_numstat_line ───────────────────────────────────────
788
789    #[test]
790    fn numstat_normal() {
791        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
792        assert_eq!(a, 10);
793        assert_eq!(d, 5);
794        assert_eq!(p, "src/file.ts");
795    }
796
797    #[test]
798    fn numstat_binary_skipped() {
799        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
800    }
801
802    #[test]
803    fn numstat_zero_lines() {
804        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
805        assert_eq!(a, 0);
806        assert_eq!(d, 0);
807        assert_eq!(p, "src/empty.ts");
808    }
809
810    // ── compute_trend ────────────────────────────────────────────
811
812    #[test]
813    fn trend_empty_is_stable() {
814        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
815    }
816
817    #[test]
818    fn trend_single_commit_is_stable() {
819        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
820    }
821
822    #[test]
823    fn trend_accelerating() {
824        // 2 old commits, 5 recent commits
825        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
826        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
827    }
828
829    #[test]
830    fn trend_cooling() {
831        // 5 old commits, 2 recent commits
832        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
833        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
834    }
835
836    #[test]
837    fn trend_stable_even_distribution() {
838        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
839        let timestamps = vec![100, 200, 300, 700, 800, 900];
840        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
841    }
842
843    #[test]
844    fn trend_same_timestamp_is_stable() {
845        let timestamps = vec![500, 500, 500];
846        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
847    }
848
849    // ── is_iso_date ──────────────────────────────────────────────
850
851    #[test]
852    fn iso_date_valid() {
853        assert!(is_iso_date("2025-06-01"));
854        assert!(is_iso_date("2025-12-31"));
855    }
856
857    #[test]
858    fn iso_date_with_time_rejected() {
859        // Only exact YYYY-MM-DD (10 chars) is accepted
860        assert!(!is_iso_date("2025-06-01T00:00:00"));
861    }
862
863    #[test]
864    fn iso_date_invalid() {
865        assert!(!is_iso_date("6months"));
866        assert!(!is_iso_date("2025"));
867        assert!(!is_iso_date("not-a-date"));
868        assert!(!is_iso_date("abcd-ef-gh"));
869    }
870
871    // ── Display ──────────────────────────────────────────────────
872
873    #[test]
874    fn trend_display() {
875        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
876        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
877        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
878    }
879
880    // ── parse_git_log ───────────────────────────────────────────
881
882    #[test]
883    fn parse_git_log_single_commit() {
884        let root = Path::new("/project");
885        let output = "1700000000\n10\t5\tsrc/index.ts\n";
886        let (result, _) = parse_git_log(output, root);
887        assert_eq!(result.len(), 1);
888        let churn = &result[&PathBuf::from("/project/src/index.ts")];
889        assert_eq!(churn.commits, 1);
890        assert_eq!(churn.lines_added, 10);
891        assert_eq!(churn.lines_deleted, 5);
892    }
893
894    #[test]
895    fn parse_git_log_multiple_commits_same_file() {
896        let root = Path::new("/project");
897        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
898        let (result, _) = parse_git_log(output, root);
899        assert_eq!(result.len(), 1);
900        let churn = &result[&PathBuf::from("/project/src/index.ts")];
901        assert_eq!(churn.commits, 2);
902        assert_eq!(churn.lines_added, 13);
903        assert_eq!(churn.lines_deleted, 7);
904    }
905
906    #[test]
907    fn parse_git_log_multiple_files() {
908        let root = Path::new("/project");
909        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
910        let (result, _) = parse_git_log(output, root);
911        assert_eq!(result.len(), 2);
912        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
913        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
914    }
915
916    #[test]
917    fn parse_git_log_empty_output() {
918        let root = Path::new("/project");
919        let (result, _) = parse_git_log("", root);
920        assert!(result.is_empty());
921    }
922
923    #[test]
924    fn parse_git_log_skips_binary_files() {
925        let root = Path::new("/project");
926        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
927        let (result, _) = parse_git_log(output, root);
928        assert_eq!(result.len(), 1);
929        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
930    }
931
932    #[test]
933    fn parse_git_log_weighted_commits_are_positive() {
934        let root = Path::new("/project");
935        // Use a timestamp near "now" to ensure weight doesn't decay to zero
936        let now_secs = std::time::SystemTime::now()
937            .duration_since(std::time::UNIX_EPOCH)
938            .unwrap()
939            .as_secs();
940        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
941        let (result, _) = parse_git_log(&output, root);
942        let churn = &result[&PathBuf::from("/project/src/a.ts")];
943        assert!(
944            churn.weighted_commits > 0.0,
945            "weighted_commits should be positive for recent commits"
946        );
947    }
948
949    // ── compute_trend edge cases ─────────────────────────────────
950
951    #[test]
952    fn trend_boundary_1_5x_ratio() {
953        // Exactly 1.5x ratio (3 recent : 2 old) → boundary between stable and accelerating
954        // midpoint = 100 + (1000-100)/2 = 550
955        // old: 100, 200 (2 timestamps <= 550)
956        // recent: 600, 800, 1000 (3 timestamps > 550)
957        // ratio = 3/2 = 1.5 — NOT > 1.5, so stable
958        let timestamps = vec![100, 200, 600, 800, 1000];
959        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
960    }
961
962    #[test]
963    fn trend_just_above_1_5x() {
964        // midpoint = 100 + (1000-100)/2 = 550
965        // old: 100 (1 timestamp <= 550)
966        // recent: 600, 800, 1000 (3 timestamps > 550)
967        // ratio = 3/1 = 3.0 → accelerating
968        let timestamps = vec![100, 600, 800, 1000];
969        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
970    }
971
972    #[test]
973    fn trend_boundary_0_67x_ratio() {
974        // Exactly 0.67x ratio → boundary between cooling and stable
975        // midpoint = 100 + (1000-100)/2 = 550
976        // old: 100, 200, 300 (3 timestamps <= 550)
977        // recent: 600, 1000 (2 timestamps > 550)
978        // ratio = 2/3 = 0.666... < 0.67 → cooling
979        let timestamps = vec![100, 200, 300, 600, 1000];
980        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
981    }
982
983    #[test]
984    fn trend_two_timestamps_different() {
985        // Only 2 timestamps: midpoint = 100 + (200-100)/2 = 150
986        // old: 100 (1 timestamp <= 150)
987        // recent: 200 (1 timestamp > 150)
988        // ratio = 1/1 = 1.0 → stable
989        let timestamps = vec![100, 200];
990        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
991    }
992
993    // ── parse_since additional coverage ─────────────────────────
994
995    #[test]
996    fn parse_since_week_singular() {
997        let d = parse_since("1week").unwrap();
998        assert_eq!(d.git_after, "1 week ago");
999        assert_eq!(d.display, "1 week");
1000    }
1001
1002    #[test]
1003    fn parse_since_weeks_long() {
1004        let d = parse_since("3weeks").unwrap();
1005        assert_eq!(d.git_after, "3 weeks ago");
1006        assert_eq!(d.display, "3 weeks");
1007    }
1008
1009    #[test]
1010    fn parse_since_days_long() {
1011        let d = parse_since("30days").unwrap();
1012        assert_eq!(d.git_after, "30 days ago");
1013        assert_eq!(d.display, "30 days");
1014    }
1015
1016    #[test]
1017    fn parse_since_year_long() {
1018        let d = parse_since("1year").unwrap();
1019        assert_eq!(d.git_after, "1 year ago");
1020        assert_eq!(d.display, "1 year");
1021    }
1022
1023    #[test]
1024    fn parse_since_overflow_number_rejected() {
1025        // Number too large for u64
1026        let result = parse_since("99999999999999999999d");
1027        assert!(result.is_err());
1028        let err = result.unwrap_err();
1029        assert!(err.contains("invalid number"));
1030    }
1031
1032    #[test]
1033    fn parse_since_zero_days_rejected() {
1034        assert!(parse_since("0d").is_err());
1035    }
1036
1037    #[test]
1038    fn parse_since_zero_weeks_rejected() {
1039        assert!(parse_since("0w").is_err());
1040    }
1041
1042    #[test]
1043    fn parse_since_zero_years_rejected() {
1044        assert!(parse_since("0y").is_err());
1045    }
1046
1047    // ── parse_numstat_line additional coverage ──────────────────
1048
1049    #[test]
1050    fn numstat_missing_path() {
1051        // Only two tab-separated fields, no path
1052        assert!(parse_numstat_line("10\t5").is_none());
1053    }
1054
1055    #[test]
1056    fn numstat_single_field() {
1057        assert!(parse_numstat_line("10").is_none());
1058    }
1059
1060    #[test]
1061    fn numstat_empty_string() {
1062        assert!(parse_numstat_line("").is_none());
1063    }
1064
1065    #[test]
1066    fn numstat_only_added_is_binary() {
1067        // Added is "-" but deleted is numeric
1068        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1069    }
1070
1071    #[test]
1072    fn numstat_only_deleted_is_binary() {
1073        // Added is numeric but deleted is "-"
1074        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1075    }
1076
1077    #[test]
1078    fn numstat_path_with_spaces() {
1079        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1080        assert_eq!(a, 3);
1081        assert_eq!(d, 1);
1082        assert_eq!(p, "path with spaces/file.ts");
1083    }
1084
1085    #[test]
1086    fn numstat_large_numbers() {
1087        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1088        assert_eq!(a, 9999);
1089        assert_eq!(d, 8888);
1090        assert_eq!(p, "src/big.ts");
1091    }
1092
1093    // ── is_iso_date additional coverage ─────────────────────────
1094
1095    #[test]
1096    fn iso_date_wrong_separator_positions() {
1097        // Dashes in wrong positions
1098        assert!(!is_iso_date("20-25-0601"));
1099        assert!(!is_iso_date("202506-01-"));
1100    }
1101
1102    #[test]
1103    fn iso_date_too_short() {
1104        assert!(!is_iso_date("2025-06-0"));
1105    }
1106
1107    #[test]
1108    fn iso_date_letters_in_day() {
1109        assert!(!is_iso_date("2025-06-ab"));
1110    }
1111
1112    #[test]
1113    fn iso_date_letters_in_month() {
1114        assert!(!is_iso_date("2025-ab-01"));
1115    }
1116
1117    // ── split_number_unit additional coverage ───────────────────
1118
1119    #[test]
1120    fn split_number_unit_valid() {
1121        let (num, unit) = split_number_unit("42days").unwrap();
1122        assert_eq!(num, "42");
1123        assert_eq!(unit, "days");
1124    }
1125
1126    #[test]
1127    fn split_number_unit_single_digit() {
1128        let (num, unit) = split_number_unit("1m").unwrap();
1129        assert_eq!(num, "1");
1130        assert_eq!(unit, "m");
1131    }
1132
1133    #[test]
1134    fn split_number_unit_no_digits() {
1135        let err = split_number_unit("abc").unwrap_err();
1136        assert!(err.contains("must start with a number"));
1137    }
1138
1139    #[test]
1140    fn split_number_unit_no_unit() {
1141        let err = split_number_unit("123").unwrap_err();
1142        assert!(err.contains("requires a unit suffix"));
1143    }
1144
1145    // ── parse_git_log additional coverage ───────────────────────
1146
1147    #[test]
1148    fn parse_git_log_numstat_before_timestamp_uses_now() {
1149        let root = Path::new("/project");
1150        // No timestamp line before the numstat line
1151        let output = "10\t5\tsrc/no_ts.ts\n";
1152        let (result, _) = parse_git_log(output, root);
1153        assert_eq!(result.len(), 1);
1154        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1155        assert_eq!(churn.commits, 1);
1156        assert_eq!(churn.lines_added, 10);
1157        assert_eq!(churn.lines_deleted, 5);
1158        // Without a timestamp, it falls back to now_secs, so weight should be ~1.0
1159        assert!(
1160            churn.weighted_commits > 0.9,
1161            "weight should be near 1.0 when timestamp defaults to now"
1162        );
1163    }
1164
1165    #[test]
1166    fn parse_git_log_whitespace_lines_ignored() {
1167        let root = Path::new("/project");
1168        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1169        let (result, _) = parse_git_log(output, root);
1170        assert_eq!(result.len(), 1);
1171    }
1172
1173    #[test]
1174    fn parse_git_log_trend_is_computed_per_file() {
1175        let root = Path::new("/project");
1176        // Two commits far apart for one file, recent-heavy for another
1177        let output = "\
11781000\n5\t1\tsrc/old.ts\n\
11792000\n3\t1\tsrc/old.ts\n\
11801000\n1\t0\tsrc/hot.ts\n\
11811800\n1\t0\tsrc/hot.ts\n\
11821900\n1\t0\tsrc/hot.ts\n\
11831950\n1\t0\tsrc/hot.ts\n\
11842000\n1\t0\tsrc/hot.ts\n";
1185        let (result, _) = parse_git_log(output, root);
1186        let old = &result[&PathBuf::from("/project/src/old.ts")];
1187        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1188        assert_eq!(old.commits, 2);
1189        assert_eq!(hot.commits, 5);
1190        // hot.ts has 4 recent vs 1 old => accelerating
1191        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1192    }
1193
1194    #[test]
1195    fn parse_git_log_weighted_decay_for_old_commits() {
1196        let root = Path::new("/project");
1197        let now = std::time::SystemTime::now()
1198            .duration_since(std::time::UNIX_EPOCH)
1199            .unwrap()
1200            .as_secs();
1201        // One commit from 180 days ago (two half-lives) should weigh ~0.25
1202        let old_ts = now - (180 * 86_400);
1203        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1204        let (result, _) = parse_git_log(&output, root);
1205        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1206        assert!(
1207            churn.weighted_commits < 0.5,
1208            "180-day-old commit should weigh ~0.25, got {}",
1209            churn.weighted_commits
1210        );
1211        assert!(
1212            churn.weighted_commits > 0.1,
1213            "180-day-old commit should weigh ~0.25, got {}",
1214            churn.weighted_commits
1215        );
1216    }
1217
1218    #[test]
1219    fn parse_git_log_path_stored_as_absolute() {
1220        let root = Path::new("/my/project");
1221        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1222        let (result, _) = parse_git_log(output, root);
1223        let key = PathBuf::from("/my/project/lib/utils.ts");
1224        assert!(result.contains_key(&key));
1225        assert_eq!(result[&key].path, key);
1226    }
1227
1228    #[test]
1229    fn parse_git_log_weighted_commits_rounded() {
1230        let root = Path::new("/project");
1231        let now = std::time::SystemTime::now()
1232            .duration_since(std::time::UNIX_EPOCH)
1233            .unwrap()
1234            .as_secs();
1235        // A commit right now should weigh exactly 1.00
1236        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1237        let (result, _) = parse_git_log(&output, root);
1238        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1239        // Weighted commits are rounded to 2 decimal places
1240        let decimals = format!("{:.2}", churn.weighted_commits);
1241        assert_eq!(
1242            churn.weighted_commits.to_string().len(),
1243            decimals.len().min(churn.weighted_commits.to_string().len()),
1244            "weighted_commits should be rounded to at most 2 decimal places"
1245        );
1246    }
1247
1248    // ── ChurnTrend serde ────────────────────────────────────────
1249
1250    #[test]
1251    fn trend_serde_serialization() {
1252        assert_eq!(
1253            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1254            "\"accelerating\""
1255        );
1256        assert_eq!(
1257            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1258            "\"stable\""
1259        );
1260        assert_eq!(
1261            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1262            "\"cooling\""
1263        );
1264    }
1265
1266    // ── parse_git_log: author tracking ──────────────────────────
1267
1268    #[test]
1269    fn parse_git_log_extracts_author_email() {
1270        let root = Path::new("/project");
1271        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1272        let (result, pool) = parse_git_log(output, root);
1273        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1274        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1275        assert_eq!(churn.authors.len(), 1);
1276        let alice = &churn.authors[&0];
1277        assert_eq!(alice.commits, 1);
1278        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1279        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1280    }
1281
1282    #[test]
1283    fn parse_git_log_intern_dedupes_authors() {
1284        let root = Path::new("/project");
1285        let output = "\
12861700000000|alice@example.com
12871\t0\ta.ts
12881700100000|bob@example.com
12892\t1\tb.ts
12901700200000|alice@example.com
12913\t2\tc.ts
1292";
1293        let (_result, pool) = parse_git_log(output, root);
1294        assert_eq!(pool.len(), 2);
1295        assert!(pool.contains(&"alice@example.com".to_string()));
1296        assert!(pool.contains(&"bob@example.com".to_string()));
1297    }
1298
1299    #[test]
1300    fn parse_git_log_aggregates_per_author() {
1301        let root = Path::new("/project");
1302        // alice touches index.ts twice, bob once.
1303        let output = "\
13041700000000|alice@example.com
13051\t0\tsrc/index.ts
13061700100000|bob@example.com
13072\t0\tsrc/index.ts
13081700200000|alice@example.com
13091\t1\tsrc/index.ts
1310";
1311        let (result, pool) = parse_git_log(output, root);
1312        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1313        assert_eq!(churn.commits, 3);
1314        assert_eq!(churn.authors.len(), 2);
1315
1316        let alice_idx =
1317            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1318        let alice = &churn.authors[&alice_idx];
1319        assert_eq!(alice.commits, 2);
1320        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1321        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1322    }
1323
1324    #[test]
1325    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1326        // Backwards-compat path: header has no `|email` suffix.
1327        let root = Path::new("/project");
1328        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1329        let (result, pool) = parse_git_log(output, root);
1330        assert!(pool.is_empty());
1331        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1332        assert_eq!(churn.commits, 1);
1333        assert!(churn.authors.is_empty());
1334    }
1335
1336    // ── intern_author ──────────────────────────────────────────
1337
1338    #[test]
1339    fn intern_author_returns_existing_index() {
1340        let mut pool = Vec::new();
1341        let mut index = FxHashMap::default();
1342        let i1 = intern_author("alice@x", &mut pool, &mut index);
1343        let i2 = intern_author("alice@x", &mut pool, &mut index);
1344        assert_eq!(i1, i2);
1345        assert_eq!(pool.len(), 1);
1346    }
1347
1348    #[test]
1349    fn intern_author_assigns_sequential_indices() {
1350        let mut pool = Vec::new();
1351        let mut index = FxHashMap::default();
1352        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1353        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1354        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1355        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1356    }
1357
1358    // ── incremental cache ───────────────────────────────────────
1359
1360    fn git(root: &Path, args: &[&str]) {
1361        let status = std::process::Command::new("git")
1362            .args(args)
1363            .current_dir(root)
1364            .status()
1365            .expect("run git");
1366        assert!(status.success(), "git {args:?} failed");
1367    }
1368
1369    fn write(root: &Path, path: &str, contents: &str) {
1370        let path = root.join(path);
1371        std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1372        std::fs::write(path, contents).unwrap();
1373    }
1374
1375    #[test]
1376    fn cached_churn_merges_new_commits_after_head_advances() {
1377        let repo = tempfile::tempdir().expect("create repo");
1378        let root = repo.path();
1379        git(root, &["init"]);
1380        git(root, &["config", "user.email", "churn@example.test"]);
1381        git(root, &["config", "user.name", "Churn Test"]);
1382        git(root, &["config", "commit.gpgsign", "false"]);
1383
1384        write(root, "src/a.ts", "export const a = 1;\n");
1385        git(root, &["add", "."]);
1386        git(root, &["commit", "-m", "initial"]);
1387
1388        let since = parse_since("1y").unwrap();
1389        let cache = tempfile::tempdir().expect("create cache dir");
1390        let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1391        assert!(!cold_hit);
1392        let file = root.join("src/a.ts");
1393        assert_eq!(cold.files[&file].commits, 1);
1394
1395        let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1396        assert!(warm_hit);
1397
1398        write(
1399            root,
1400            "src/a.ts",
1401            "export const a = 1;\nexport const b = 2;\n",
1402        );
1403        git(root, &["add", "."]);
1404        git(root, &["commit", "-m", "update a"]);
1405        let head = get_head_sha(root).unwrap();
1406
1407        let (incremental, incremental_hit) =
1408            analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1409        assert!(incremental_hit);
1410        assert_eq!(incremental.files[&file].commits, 2);
1411
1412        let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1413        assert_eq!(cache.last_indexed_sha, head);
1414    }
1415}