Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use serde::Serialize;
11
12use crate::git_env::clear_ambient_git_env;
13
14/// Number of seconds in one day.
15const SECS_PER_DAY: f64 = 86_400.0;
16
17/// Recency weight half-life in days. A commit from 90 days ago counts half
18/// as much as today's commit; 180 days ago counts 25%.
19const HALF_LIFE_DAYS: f64 = 90.0;
20
21/// Parsed duration for the `--since` flag.
22#[derive(Debug, Clone)]
23pub struct SinceDuration {
24    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
25    pub git_after: String,
26    /// Human-readable display string (e.g., `"6 months"`).
27    pub display: String,
28}
29
30/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
31#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
32#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
33#[serde(rename_all = "snake_case")]
34pub enum ChurnTrend {
35    /// Recent half has >1.5× the commits of the older half.
36    Accelerating,
37    /// Churn is roughly stable between halves.
38    Stable,
39    /// Recent half has <0.67× the commits of the older half.
40    Cooling,
41}
42
43impl std::fmt::Display for ChurnTrend {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        match self {
46            Self::Accelerating => write!(f, "accelerating"),
47            Self::Stable => write!(f, "stable"),
48            Self::Cooling => write!(f, "cooling"),
49        }
50    }
51}
52
53/// Per-author commit aggregation for a single file.
54///
55/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
56/// per-file maps small and the bitcode cache compact.
57#[derive(Debug, Clone, Copy)]
58pub struct AuthorContribution {
59    /// Total commits by this author touching this file in the analysis window.
60    pub commits: u32,
61    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
62    pub weighted_commits: f64,
63    /// Earliest commit timestamp by this author (epoch seconds).
64    pub first_commit_ts: u64,
65    /// Latest commit timestamp by this author (epoch seconds).
66    pub last_commit_ts: u64,
67}
68
69/// Per-file churn data collected from git history.
70#[derive(Debug, Clone)]
71pub struct FileChurn {
72    /// Absolute file path.
73    pub path: PathBuf,
74    /// Total number of commits touching this file in the analysis window.
75    pub commits: u32,
76    /// Recency-weighted commit count (exponential decay, half-life 90 days).
77    pub weighted_commits: f64,
78    /// Total lines added across all commits.
79    pub lines_added: u32,
80    /// Total lines deleted across all commits.
81    pub lines_deleted: u32,
82    /// Churn trend: accelerating, stable, or cooling.
83    pub trend: ChurnTrend,
84    /// Per-author contributions keyed by interned author index.
85    /// Indices reference [`ChurnResult::author_pool`].
86    pub authors: FxHashMap<u32, AuthorContribution>,
87}
88
89/// Result of churn analysis.
90pub struct ChurnResult {
91    /// Per-file churn data, keyed by absolute path.
92    pub files: FxHashMap<PathBuf, FileChurn>,
93    /// Whether the repository is a shallow clone.
94    pub shallow_clone: bool,
95    /// Author email pool. Per-file [`AuthorContribution`] entries reference
96    /// authors by their index into this vector.
97    pub author_pool: Vec<String>,
98}
99
100/// Parse a `--since` value into a git-compatible duration.
101///
102/// Accepts:
103/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
104/// - ISO dates: `2025-06-01`
105///
106/// # Errors
107///
108/// Returns an error if the input is not a recognized duration format or ISO date,
109/// the numeric part is invalid, or the duration is zero.
110pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
111    // Try ISO date first (YYYY-MM-DD)
112    if is_iso_date(input) {
113        return Ok(SinceDuration {
114            git_after: input.to_string(),
115            display: input.to_string(),
116        });
117    }
118
119    // Parse duration: number + unit
120    let (num_str, unit) = split_number_unit(input)?;
121    let num: u64 = num_str
122        .parse()
123        .map_err(|_| format!("invalid number in --since: {input}"))?;
124
125    if num == 0 {
126        return Err("--since duration must be greater than 0".to_string());
127    }
128
129    match unit {
130        "d" | "day" | "days" => {
131            let s = if num == 1 { "" } else { "s" };
132            Ok(SinceDuration {
133                git_after: format!("{num} day{s} ago"),
134                display: format!("{num} day{s}"),
135            })
136        }
137        "w" | "week" | "weeks" => {
138            let s = if num == 1 { "" } else { "s" };
139            Ok(SinceDuration {
140                git_after: format!("{num} week{s} ago"),
141                display: format!("{num} week{s}"),
142            })
143        }
144        "m" | "month" | "months" => {
145            let s = if num == 1 { "" } else { "s" };
146            Ok(SinceDuration {
147                git_after: format!("{num} month{s} ago"),
148                display: format!("{num} month{s}"),
149            })
150        }
151        "y" | "year" | "years" => {
152            let s = if num == 1 { "" } else { "s" };
153            Ok(SinceDuration {
154                git_after: format!("{num} year{s} ago"),
155                display: format!("{num} year{s}"),
156            })
157        }
158        _ => Err(format!(
159            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
160        )),
161    }
162}
163
164/// Analyze git churn for files in the given root directory.
165///
166/// Returns `None` if git is not available or the directory is not a git repository.
167pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
168    let shallow = is_shallow_clone(root);
169    let state = analyze_churn_events(root, since, None)?;
170    Some(build_churn_result(state, shallow))
171}
172
173/// Check if the repository is a shallow clone.
174#[must_use]
175pub fn is_shallow_clone(root: &Path) -> bool {
176    let mut command = Command::new("git");
177    command
178        .args(["rev-parse", "--is-shallow-repository"])
179        .current_dir(root);
180    clear_ambient_git_env(&mut command);
181    command.output().is_ok_and(|o| {
182        String::from_utf8_lossy(&o.stdout)
183            .trim()
184            .eq_ignore_ascii_case("true")
185    })
186}
187
188/// Check if the directory is inside a git repository.
189#[must_use]
190pub fn is_git_repo(root: &Path) -> bool {
191    let mut command = Command::new("git");
192    command
193        .args(["rev-parse", "--git-dir"])
194        .current_dir(root)
195        .stdout(std::process::Stdio::null())
196        .stderr(std::process::Stdio::null());
197    clear_ambient_git_env(&mut command);
198    command.status().is_ok_and(|s| s.success())
199}
200
201// ── Churn cache ──────────────────────────────────────────────────
202
203/// Maximum size of a churn cache file (64 MB). The incremental cache stores
204/// per-commit events, so it needs more headroom than the old aggregate rows.
205const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
206
207/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
208/// changes so older payloads are rejected on load. Bumped to 3 when the cache
209/// switched from aggregate rows to per-commit events for incremental updates.
210const CHURN_CACHE_VERSION: u8 = 3;
211
212/// Serializable per-commit event for the disk cache.
213#[derive(Clone, bitcode::Encode, bitcode::Decode)]
214struct CachedCommitEvent {
215    timestamp: u64,
216    lines_added: u32,
217    lines_deleted: u32,
218    author_idx: Option<u32>,
219}
220
221/// Serializable per-file churn entry for the disk cache.
222#[derive(Clone, bitcode::Encode, bitcode::Decode)]
223struct CachedFileChurn {
224    path: String,
225    events: Vec<CachedCommitEvent>,
226}
227
228/// Cached churn data keyed by last indexed SHA and since string.
229#[derive(Clone, bitcode::Encode, bitcode::Decode)]
230struct ChurnCache {
231    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
232    version: u8,
233    last_indexed_sha: String,
234    git_after: String,
235    files: Vec<CachedFileChurn>,
236    shallow_clone: bool,
237    /// Author email pool referenced by [`CachedCommitEvent::author_idx`].
238    author_pool: Vec<String>,
239}
240
241/// Per-file commit events retained in memory while building or updating churn.
242struct FileEvents {
243    events: Vec<CachedCommitEvent>,
244}
245
246/// Event-level churn state. Unlike [`ChurnResult`], this preserves commit
247/// timestamps so a cache can merge new commits and recompute trend/recency.
248struct ChurnEventState {
249    files: FxHashMap<PathBuf, FileEvents>,
250    author_pool: Vec<String>,
251}
252
253/// Get the full HEAD SHA for cache keying.
254fn get_head_sha(root: &Path) -> Option<String> {
255    let mut command = Command::new("git");
256    command.args(["rev-parse", "HEAD"]).current_dir(root);
257    clear_ambient_git_env(&mut command);
258    command
259        .output()
260        .ok()
261        .filter(|o| o.status.success())
262        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
263}
264
265/// Check whether `ancestor` is still reachable from `descendant`.
266fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
267    let mut command = Command::new("git");
268    command
269        .args(["merge-base", "--is-ancestor", ancestor, descendant])
270        .current_dir(root);
271    clear_ambient_git_env(&mut command);
272    command.status().is_ok_and(|s| s.success())
273}
274
275/// Try to load churn data from disk cache. Returns `None` on cache miss
276/// or version mismatch.
277fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
278    let cache_file = cache_dir.join("churn.bin");
279    let data = std::fs::read(&cache_file).ok()?;
280    if data.len() > MAX_CHURN_CACHE_SIZE {
281        return None;
282    }
283    let cache: ChurnCache = bitcode::decode(&data).ok()?;
284    if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
285        return None;
286    }
287    Some(cache)
288}
289
290/// Save churn data to disk cache.
291fn save_churn_cache(
292    cache_dir: &Path,
293    last_indexed_sha: &str,
294    git_after: &str,
295    state: &ChurnEventState,
296    shallow_clone: bool,
297) {
298    let files: Vec<CachedFileChurn> = state
299        .files
300        .iter()
301        .map(|f| CachedFileChurn {
302            path: f.0.to_string_lossy().to_string(),
303            events: f.1.events.clone(),
304        })
305        .collect();
306    let cache = ChurnCache {
307        version: CHURN_CACHE_VERSION,
308        last_indexed_sha: last_indexed_sha.to_string(),
309        git_after: git_after.to_string(),
310        files,
311        shallow_clone,
312        author_pool: state.author_pool.clone(),
313    };
314    let _ = std::fs::create_dir_all(cache_dir);
315    let data = bitcode::encode(&cache);
316    // Write to temp file then rename for atomic update (avoids partial reads by concurrent processes)
317    let tmp = cache_dir.join("churn.bin.tmp");
318    if std::fs::write(&tmp, data).is_ok() {
319        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
320    }
321}
322
323/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
324/// since duration match. If HEAD advanced from the cached SHA, runs an
325/// incremental `git log <cached>..HEAD --numstat` scan and merges it.
326///
327/// Returns `(ChurnResult, bool)` where the bool indicates whether reusable
328/// cache state was used.
329/// Returns `None` if git analysis fails.
330pub fn analyze_churn_cached(
331    root: &Path,
332    since: &SinceDuration,
333    cache_dir: &Path,
334    no_cache: bool,
335) -> Option<(ChurnResult, bool)> {
336    let head_sha = get_head_sha(root)?;
337
338    if !no_cache && let Some(cache) = load_churn_cache(cache_dir, &since.git_after) {
339        if cache.last_indexed_sha == head_sha {
340            let shallow_clone = cache.shallow_clone;
341            let state = cache.into_event_state();
342            return Some((build_churn_result(state, shallow_clone), true));
343        }
344
345        if is_ancestor(root, &cache.last_indexed_sha, &head_sha) {
346            let shallow_clone = is_shallow_clone(root);
347            let range = format!("{}..HEAD", cache.last_indexed_sha);
348            if let Some(delta) = analyze_churn_events(root, since, Some(&range)) {
349                let mut state = cache.into_event_state();
350                merge_churn_states(&mut state, delta);
351                save_churn_cache(
352                    cache_dir,
353                    &head_sha,
354                    &since.git_after,
355                    &state,
356                    shallow_clone,
357                );
358                return Some((build_churn_result(state, shallow_clone), true));
359            }
360        }
361    }
362
363    let shallow_clone = is_shallow_clone(root);
364    let state = analyze_churn_events(root, since, None)?;
365    if !no_cache {
366        save_churn_cache(
367            cache_dir,
368            &head_sha,
369            &since.git_after,
370            &state,
371            shallow_clone,
372        );
373    }
374
375    let result = build_churn_result(state, shallow_clone);
376    Some((result, false))
377}
378
379// ── Internal ──────────────────────────────────────────────────────
380
381impl ChurnCache {
382    fn into_event_state(self) -> ChurnEventState {
383        let files = self
384            .files
385            .into_iter()
386            .map(|entry| {
387                (
388                    PathBuf::from(entry.path),
389                    FileEvents {
390                        events: entry.events,
391                    },
392                )
393            })
394            .collect();
395        ChurnEventState {
396            files,
397            author_pool: self.author_pool,
398        }
399    }
400}
401
402/// Run `git log --numstat` and return event-level churn state.
403fn analyze_churn_events(
404    root: &Path,
405    since: &SinceDuration,
406    revision_range: Option<&str>,
407) -> Option<ChurnEventState> {
408    let mut command = Command::new("git");
409    command.arg("log");
410    if let Some(range) = revision_range {
411        command.arg(range);
412    }
413    command
414        .args([
415            "--numstat",
416            "--no-merges",
417            "--no-renames",
418            "--use-mailmap",
419            "--format=format:%at|%ae",
420            &format!("--after={}", since.git_after),
421        ])
422        .current_dir(root);
423    clear_ambient_git_env(&mut command);
424
425    let output = match command.output() {
426        Ok(o) => o,
427        Err(e) => {
428            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
429            return None;
430        }
431    };
432
433    if !output.status.success() {
434        let stderr = String::from_utf8_lossy(&output.stderr);
435        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
436        return None;
437    }
438
439    let stdout = String::from_utf8_lossy(&output.stdout);
440    Some(parse_git_log_events(&stdout, root))
441}
442
443/// Merge new churn events into cached event state.
444fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
445    let mut base_author_index: FxHashMap<String, u32> = base
446        .author_pool
447        .iter()
448        .enumerate()
449        .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
450        .collect();
451
452    let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
453    for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
454        let Ok(old_idx) = u32::try_from(old_idx) else {
455            continue;
456        };
457        let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
458        author_mapping.insert(old_idx, new_idx);
459    }
460
461    for (path, mut file) in delta.files {
462        for event in &mut file.events {
463            event.author_idx = event
464                .author_idx
465                .and_then(|idx| author_mapping.get(&idx).copied());
466        }
467        base.files
468            .entry(path)
469            .and_modify(|existing| existing.events.append(&mut file.events))
470            .or_insert(file);
471    }
472}
473
474/// Parse `git log --numstat --format=format:%at|%ae` output into events.
475fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
476    let now_secs = std::time::SystemTime::now()
477        .duration_since(std::time::UNIX_EPOCH)
478        .unwrap_or_default()
479        .as_secs();
480
481    let mut files: FxHashMap<PathBuf, FileEvents> = FxHashMap::default();
482    let mut author_pool: Vec<String> = Vec::new();
483    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
484    let mut current_timestamp: Option<u64> = None;
485    let mut current_author_idx: Option<u32> = None;
486
487    for line in stdout.lines() {
488        let line = line.trim();
489        if line.is_empty() {
490            continue;
491        }
492
493        // Header lines have shape: "<ts>|<email>"
494        if let Some((ts_str, email)) = line.split_once('|')
495            && let Ok(ts) = ts_str.parse::<u64>()
496        {
497            current_timestamp = Some(ts);
498            current_author_idx = Some(intern_author(email, &mut author_pool, &mut author_index));
499            continue;
500        }
501
502        // Backwards-compat: bare timestamp (legacy format or test fixtures).
503        if let Ok(ts) = line.parse::<u64>() {
504            current_timestamp = Some(ts);
505            current_author_idx = None;
506            continue;
507        }
508
509        // Numstat line: "10\t5\tpath/to/file"
510        if let Some((added, deleted, path)) = parse_numstat_line(line) {
511            let abs_path = root.join(path);
512            let ts = current_timestamp.unwrap_or(now_secs);
513            files
514                .entry(abs_path)
515                .or_insert_with(|| FileEvents { events: Vec::new() })
516                .events
517                .push(CachedCommitEvent {
518                    timestamp: ts,
519                    lines_added: added,
520                    lines_deleted: deleted,
521                    author_idx: current_author_idx,
522                });
523        }
524    }
525
526    ChurnEventState { files, author_pool }
527}
528
529/// Convert event-level churn state into the public aggregate result.
530#[expect(
531    clippy::cast_possible_truncation,
532    reason = "commit count per file is bounded by git history depth"
533)]
534fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
535    let now_secs = std::time::SystemTime::now()
536        .duration_since(std::time::UNIX_EPOCH)
537        .unwrap_or_default()
538        .as_secs();
539
540    let files = state
541        .files
542        .into_iter()
543        .map(|(path, file)| {
544            let mut timestamps = Vec::with_capacity(file.events.len());
545            let mut weighted_commits = 0.0;
546            let mut lines_added = 0;
547            let mut lines_deleted = 0;
548            let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
549
550            for event in file.events {
551                timestamps.push(event.timestamp);
552                let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
553                let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
554                weighted_commits += weight;
555                lines_added += event.lines_added;
556                lines_deleted += event.lines_deleted;
557
558                if let Some(idx) = event.author_idx {
559                    authors
560                        .entry(idx)
561                        .and_modify(|c| {
562                            c.commits += 1;
563                            c.weighted_commits += weight;
564                            c.first_commit_ts = c.first_commit_ts.min(event.timestamp);
565                            c.last_commit_ts = c.last_commit_ts.max(event.timestamp);
566                        })
567                        .or_insert(AuthorContribution {
568                            commits: 1,
569                            weighted_commits: weight,
570                            first_commit_ts: event.timestamp,
571                            last_commit_ts: event.timestamp,
572                        });
573                }
574            }
575
576            let commits = timestamps.len() as u32;
577            let trend = compute_trend(&timestamps);
578            // Round per-author weighted sums for cache stability.
579            for c in authors.values_mut() {
580                c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
581            }
582            let churn = FileChurn {
583                path: path.clone(),
584                commits,
585                weighted_commits: (weighted_commits * 100.0).round() / 100.0,
586                lines_added,
587                lines_deleted,
588                trend,
589                authors,
590            };
591            (path, churn)
592        })
593        .collect();
594
595    ChurnResult {
596        files,
597        shallow_clone,
598        author_pool: state.author_pool,
599    }
600}
601
602/// Parse `git log --numstat --format=format:%at|%ae` output.
603///
604/// Returns a per-file churn map plus the author email pool referenced by
605/// interned indices in [`FileChurn::authors`].
606#[cfg(test)]
607fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
608    let result = build_churn_result(parse_git_log_events(stdout, root), false);
609    (result.files, result.author_pool)
610}
611
612/// Intern an author email into the pool, returning its stable index.
613fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
614    if let Some(&idx) = index.get(email) {
615        return idx;
616    }
617    #[expect(
618        clippy::cast_possible_truncation,
619        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
620    )]
621    let idx = pool.len() as u32;
622    let owned = email.to_string();
623    index.insert(owned.clone(), idx);
624    pool.push(owned);
625    idx
626}
627
628/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
629/// Binary files show as `"-\t-\tpath"` — skip those.
630fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
631    let mut parts = line.splitn(3, '\t');
632    let added_str = parts.next()?;
633    let deleted_str = parts.next()?;
634    let path = parts.next()?;
635
636    // Binary files show "-" for added/deleted — skip them
637    let added: u32 = added_str.parse().ok()?;
638    let deleted: u32 = deleted_str.parse().ok()?;
639
640    Some((added, deleted, path))
641}
642
643/// Compute churn trend by splitting commits into two temporal halves.
644///
645/// Finds the midpoint between the oldest and newest commit timestamps,
646/// then compares commit counts in each half:
647/// - Recent > 1.5× older → Accelerating
648/// - Recent < 0.67× older → Cooling
649/// - Otherwise → Stable
650fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
651    if timestamps.len() < 2 {
652        return ChurnTrend::Stable;
653    }
654
655    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
656    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
657
658    if max_ts == min_ts {
659        return ChurnTrend::Stable;
660    }
661
662    let midpoint = min_ts + (max_ts - min_ts) / 2;
663    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
664    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
665
666    if older < 1.0 {
667        return ChurnTrend::Stable;
668    }
669
670    let ratio = recent / older;
671    if ratio > 1.5 {
672        ChurnTrend::Accelerating
673    } else if ratio < 0.67 {
674        ChurnTrend::Cooling
675    } else {
676        ChurnTrend::Stable
677    }
678}
679
680fn is_iso_date(input: &str) -> bool {
681    input.len() == 10
682        && input.as_bytes().get(4) == Some(&b'-')
683        && input.as_bytes().get(7) == Some(&b'-')
684        && input[..4].bytes().all(|b| b.is_ascii_digit())
685        && input[5..7].bytes().all(|b| b.is_ascii_digit())
686        && input[8..10].bytes().all(|b| b.is_ascii_digit())
687}
688
689fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
690    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
691        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
692    })?;
693    if pos == 0 {
694        return Err(format!(
695            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
696        ));
697    }
698    Ok((&input[..pos], &input[pos..]))
699}
700
701#[cfg(test)]
702mod tests {
703    use super::*;
704
705    // ── parse_since ──────────────────────────────────────────────
706
707    #[test]
708    fn parse_since_months_short() {
709        let d = parse_since("6m").unwrap();
710        assert_eq!(d.git_after, "6 months ago");
711        assert_eq!(d.display, "6 months");
712    }
713
714    #[test]
715    fn parse_since_months_long() {
716        let d = parse_since("6months").unwrap();
717        assert_eq!(d.git_after, "6 months ago");
718        assert_eq!(d.display, "6 months");
719    }
720
721    #[test]
722    fn parse_since_days() {
723        let d = parse_since("90d").unwrap();
724        assert_eq!(d.git_after, "90 days ago");
725        assert_eq!(d.display, "90 days");
726    }
727
728    #[test]
729    fn parse_since_year_singular() {
730        let d = parse_since("1y").unwrap();
731        assert_eq!(d.git_after, "1 year ago");
732        assert_eq!(d.display, "1 year");
733    }
734
735    #[test]
736    fn parse_since_years_plural() {
737        let d = parse_since("2years").unwrap();
738        assert_eq!(d.git_after, "2 years ago");
739        assert_eq!(d.display, "2 years");
740    }
741
742    #[test]
743    fn parse_since_weeks() {
744        let d = parse_since("2w").unwrap();
745        assert_eq!(d.git_after, "2 weeks ago");
746        assert_eq!(d.display, "2 weeks");
747    }
748
749    #[test]
750    fn parse_since_iso_date() {
751        let d = parse_since("2025-06-01").unwrap();
752        assert_eq!(d.git_after, "2025-06-01");
753        assert_eq!(d.display, "2025-06-01");
754    }
755
756    #[test]
757    fn parse_since_month_singular() {
758        let d = parse_since("1month").unwrap();
759        assert_eq!(d.display, "1 month");
760    }
761
762    #[test]
763    fn parse_since_day_singular() {
764        let d = parse_since("1day").unwrap();
765        assert_eq!(d.display, "1 day");
766    }
767
768    #[test]
769    fn parse_since_zero_rejected() {
770        assert!(parse_since("0m").is_err());
771    }
772
773    #[test]
774    fn parse_since_no_unit_rejected() {
775        assert!(parse_since("90").is_err());
776    }
777
778    #[test]
779    fn parse_since_unknown_unit_rejected() {
780        assert!(parse_since("6x").is_err());
781    }
782
783    #[test]
784    fn parse_since_no_number_rejected() {
785        assert!(parse_since("months").is_err());
786    }
787
788    // ── parse_numstat_line ───────────────────────────────────────
789
790    #[test]
791    fn numstat_normal() {
792        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
793        assert_eq!(a, 10);
794        assert_eq!(d, 5);
795        assert_eq!(p, "src/file.ts");
796    }
797
798    #[test]
799    fn numstat_binary_skipped() {
800        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
801    }
802
803    #[test]
804    fn numstat_zero_lines() {
805        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
806        assert_eq!(a, 0);
807        assert_eq!(d, 0);
808        assert_eq!(p, "src/empty.ts");
809    }
810
811    // ── compute_trend ────────────────────────────────────────────
812
813    #[test]
814    fn trend_empty_is_stable() {
815        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
816    }
817
818    #[test]
819    fn trend_single_commit_is_stable() {
820        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
821    }
822
823    #[test]
824    fn trend_accelerating() {
825        // 2 old commits, 5 recent commits
826        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
827        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
828    }
829
830    #[test]
831    fn trend_cooling() {
832        // 5 old commits, 2 recent commits
833        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
834        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
835    }
836
837    #[test]
838    fn trend_stable_even_distribution() {
839        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
840        let timestamps = vec![100, 200, 300, 700, 800, 900];
841        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
842    }
843
844    #[test]
845    fn trend_same_timestamp_is_stable() {
846        let timestamps = vec![500, 500, 500];
847        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
848    }
849
850    // ── is_iso_date ──────────────────────────────────────────────
851
852    #[test]
853    fn iso_date_valid() {
854        assert!(is_iso_date("2025-06-01"));
855        assert!(is_iso_date("2025-12-31"));
856    }
857
858    #[test]
859    fn iso_date_with_time_rejected() {
860        // Only exact YYYY-MM-DD (10 chars) is accepted
861        assert!(!is_iso_date("2025-06-01T00:00:00"));
862    }
863
864    #[test]
865    fn iso_date_invalid() {
866        assert!(!is_iso_date("6months"));
867        assert!(!is_iso_date("2025"));
868        assert!(!is_iso_date("not-a-date"));
869        assert!(!is_iso_date("abcd-ef-gh"));
870    }
871
872    // ── Display ──────────────────────────────────────────────────
873
874    #[test]
875    fn trend_display() {
876        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
877        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
878        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
879    }
880
881    // ── parse_git_log ───────────────────────────────────────────
882
883    #[test]
884    fn parse_git_log_single_commit() {
885        let root = Path::new("/project");
886        let output = "1700000000\n10\t5\tsrc/index.ts\n";
887        let (result, _) = parse_git_log(output, root);
888        assert_eq!(result.len(), 1);
889        let churn = &result[&PathBuf::from("/project/src/index.ts")];
890        assert_eq!(churn.commits, 1);
891        assert_eq!(churn.lines_added, 10);
892        assert_eq!(churn.lines_deleted, 5);
893    }
894
895    #[test]
896    fn parse_git_log_multiple_commits_same_file() {
897        let root = Path::new("/project");
898        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
899        let (result, _) = parse_git_log(output, root);
900        assert_eq!(result.len(), 1);
901        let churn = &result[&PathBuf::from("/project/src/index.ts")];
902        assert_eq!(churn.commits, 2);
903        assert_eq!(churn.lines_added, 13);
904        assert_eq!(churn.lines_deleted, 7);
905    }
906
907    #[test]
908    fn parse_git_log_multiple_files() {
909        let root = Path::new("/project");
910        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
911        let (result, _) = parse_git_log(output, root);
912        assert_eq!(result.len(), 2);
913        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
914        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
915    }
916
917    #[test]
918    fn parse_git_log_empty_output() {
919        let root = Path::new("/project");
920        let (result, _) = parse_git_log("", root);
921        assert!(result.is_empty());
922    }
923
924    #[test]
925    fn parse_git_log_skips_binary_files() {
926        let root = Path::new("/project");
927        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
928        let (result, _) = parse_git_log(output, root);
929        assert_eq!(result.len(), 1);
930        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
931    }
932
933    #[test]
934    fn parse_git_log_weighted_commits_are_positive() {
935        let root = Path::new("/project");
936        // Use a timestamp near "now" to ensure weight doesn't decay to zero
937        let now_secs = std::time::SystemTime::now()
938            .duration_since(std::time::UNIX_EPOCH)
939            .unwrap()
940            .as_secs();
941        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
942        let (result, _) = parse_git_log(&output, root);
943        let churn = &result[&PathBuf::from("/project/src/a.ts")];
944        assert!(
945            churn.weighted_commits > 0.0,
946            "weighted_commits should be positive for recent commits"
947        );
948    }
949
950    // ── compute_trend edge cases ─────────────────────────────────
951
952    #[test]
953    fn trend_boundary_1_5x_ratio() {
954        // Exactly 1.5x ratio (3 recent : 2 old) → boundary between stable and accelerating
955        // midpoint = 100 + (1000-100)/2 = 550
956        // old: 100, 200 (2 timestamps <= 550)
957        // recent: 600, 800, 1000 (3 timestamps > 550)
958        // ratio = 3/2 = 1.5 — NOT > 1.5, so stable
959        let timestamps = vec![100, 200, 600, 800, 1000];
960        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
961    }
962
963    #[test]
964    fn trend_just_above_1_5x() {
965        // midpoint = 100 + (1000-100)/2 = 550
966        // old: 100 (1 timestamp <= 550)
967        // recent: 600, 800, 1000 (3 timestamps > 550)
968        // ratio = 3/1 = 3.0 → accelerating
969        let timestamps = vec![100, 600, 800, 1000];
970        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
971    }
972
973    #[test]
974    fn trend_boundary_0_67x_ratio() {
975        // Exactly 0.67x ratio → boundary between cooling and stable
976        // midpoint = 100 + (1000-100)/2 = 550
977        // old: 100, 200, 300 (3 timestamps <= 550)
978        // recent: 600, 1000 (2 timestamps > 550)
979        // ratio = 2/3 = 0.666... < 0.67 → cooling
980        let timestamps = vec![100, 200, 300, 600, 1000];
981        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
982    }
983
984    #[test]
985    fn trend_two_timestamps_different() {
986        // Only 2 timestamps: midpoint = 100 + (200-100)/2 = 150
987        // old: 100 (1 timestamp <= 150)
988        // recent: 200 (1 timestamp > 150)
989        // ratio = 1/1 = 1.0 → stable
990        let timestamps = vec![100, 200];
991        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
992    }
993
994    // ── parse_since additional coverage ─────────────────────────
995
996    #[test]
997    fn parse_since_week_singular() {
998        let d = parse_since("1week").unwrap();
999        assert_eq!(d.git_after, "1 week ago");
1000        assert_eq!(d.display, "1 week");
1001    }
1002
1003    #[test]
1004    fn parse_since_weeks_long() {
1005        let d = parse_since("3weeks").unwrap();
1006        assert_eq!(d.git_after, "3 weeks ago");
1007        assert_eq!(d.display, "3 weeks");
1008    }
1009
1010    #[test]
1011    fn parse_since_days_long() {
1012        let d = parse_since("30days").unwrap();
1013        assert_eq!(d.git_after, "30 days ago");
1014        assert_eq!(d.display, "30 days");
1015    }
1016
1017    #[test]
1018    fn parse_since_year_long() {
1019        let d = parse_since("1year").unwrap();
1020        assert_eq!(d.git_after, "1 year ago");
1021        assert_eq!(d.display, "1 year");
1022    }
1023
1024    #[test]
1025    fn parse_since_overflow_number_rejected() {
1026        // Number too large for u64
1027        let result = parse_since("99999999999999999999d");
1028        assert!(result.is_err());
1029        let err = result.unwrap_err();
1030        assert!(err.contains("invalid number"));
1031    }
1032
1033    #[test]
1034    fn parse_since_zero_days_rejected() {
1035        assert!(parse_since("0d").is_err());
1036    }
1037
1038    #[test]
1039    fn parse_since_zero_weeks_rejected() {
1040        assert!(parse_since("0w").is_err());
1041    }
1042
1043    #[test]
1044    fn parse_since_zero_years_rejected() {
1045        assert!(parse_since("0y").is_err());
1046    }
1047
1048    // ── parse_numstat_line additional coverage ──────────────────
1049
1050    #[test]
1051    fn numstat_missing_path() {
1052        // Only two tab-separated fields, no path
1053        assert!(parse_numstat_line("10\t5").is_none());
1054    }
1055
1056    #[test]
1057    fn numstat_single_field() {
1058        assert!(parse_numstat_line("10").is_none());
1059    }
1060
1061    #[test]
1062    fn numstat_empty_string() {
1063        assert!(parse_numstat_line("").is_none());
1064    }
1065
1066    #[test]
1067    fn numstat_only_added_is_binary() {
1068        // Added is "-" but deleted is numeric
1069        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1070    }
1071
1072    #[test]
1073    fn numstat_only_deleted_is_binary() {
1074        // Added is numeric but deleted is "-"
1075        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1076    }
1077
1078    #[test]
1079    fn numstat_path_with_spaces() {
1080        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1081        assert_eq!(a, 3);
1082        assert_eq!(d, 1);
1083        assert_eq!(p, "path with spaces/file.ts");
1084    }
1085
1086    #[test]
1087    fn numstat_large_numbers() {
1088        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1089        assert_eq!(a, 9999);
1090        assert_eq!(d, 8888);
1091        assert_eq!(p, "src/big.ts");
1092    }
1093
1094    // ── is_iso_date additional coverage ─────────────────────────
1095
1096    #[test]
1097    fn iso_date_wrong_separator_positions() {
1098        // Dashes in wrong positions
1099        assert!(!is_iso_date("20-25-0601"));
1100        assert!(!is_iso_date("202506-01-"));
1101    }
1102
1103    #[test]
1104    fn iso_date_too_short() {
1105        assert!(!is_iso_date("2025-06-0"));
1106    }
1107
1108    #[test]
1109    fn iso_date_letters_in_day() {
1110        assert!(!is_iso_date("2025-06-ab"));
1111    }
1112
1113    #[test]
1114    fn iso_date_letters_in_month() {
1115        assert!(!is_iso_date("2025-ab-01"));
1116    }
1117
1118    // ── split_number_unit additional coverage ───────────────────
1119
1120    #[test]
1121    fn split_number_unit_valid() {
1122        let (num, unit) = split_number_unit("42days").unwrap();
1123        assert_eq!(num, "42");
1124        assert_eq!(unit, "days");
1125    }
1126
1127    #[test]
1128    fn split_number_unit_single_digit() {
1129        let (num, unit) = split_number_unit("1m").unwrap();
1130        assert_eq!(num, "1");
1131        assert_eq!(unit, "m");
1132    }
1133
1134    #[test]
1135    fn split_number_unit_no_digits() {
1136        let err = split_number_unit("abc").unwrap_err();
1137        assert!(err.contains("must start with a number"));
1138    }
1139
1140    #[test]
1141    fn split_number_unit_no_unit() {
1142        let err = split_number_unit("123").unwrap_err();
1143        assert!(err.contains("requires a unit suffix"));
1144    }
1145
1146    // ── parse_git_log additional coverage ───────────────────────
1147
1148    #[test]
1149    fn parse_git_log_numstat_before_timestamp_uses_now() {
1150        let root = Path::new("/project");
1151        // No timestamp line before the numstat line
1152        let output = "10\t5\tsrc/no_ts.ts\n";
1153        let (result, _) = parse_git_log(output, root);
1154        assert_eq!(result.len(), 1);
1155        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1156        assert_eq!(churn.commits, 1);
1157        assert_eq!(churn.lines_added, 10);
1158        assert_eq!(churn.lines_deleted, 5);
1159        // Without a timestamp, it falls back to now_secs, so weight should be ~1.0
1160        assert!(
1161            churn.weighted_commits > 0.9,
1162            "weight should be near 1.0 when timestamp defaults to now"
1163        );
1164    }
1165
1166    #[test]
1167    fn parse_git_log_whitespace_lines_ignored() {
1168        let root = Path::new("/project");
1169        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1170        let (result, _) = parse_git_log(output, root);
1171        assert_eq!(result.len(), 1);
1172    }
1173
1174    #[test]
1175    fn parse_git_log_trend_is_computed_per_file() {
1176        let root = Path::new("/project");
1177        // Two commits far apart for one file, recent-heavy for another
1178        let output = "\
11791000\n5\t1\tsrc/old.ts\n\
11802000\n3\t1\tsrc/old.ts\n\
11811000\n1\t0\tsrc/hot.ts\n\
11821800\n1\t0\tsrc/hot.ts\n\
11831900\n1\t0\tsrc/hot.ts\n\
11841950\n1\t0\tsrc/hot.ts\n\
11852000\n1\t0\tsrc/hot.ts\n";
1186        let (result, _) = parse_git_log(output, root);
1187        let old = &result[&PathBuf::from("/project/src/old.ts")];
1188        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1189        assert_eq!(old.commits, 2);
1190        assert_eq!(hot.commits, 5);
1191        // hot.ts has 4 recent vs 1 old => accelerating
1192        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1193    }
1194
1195    #[test]
1196    fn parse_git_log_weighted_decay_for_old_commits() {
1197        let root = Path::new("/project");
1198        let now = std::time::SystemTime::now()
1199            .duration_since(std::time::UNIX_EPOCH)
1200            .unwrap()
1201            .as_secs();
1202        // One commit from 180 days ago (two half-lives) should weigh ~0.25
1203        let old_ts = now - (180 * 86_400);
1204        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1205        let (result, _) = parse_git_log(&output, root);
1206        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1207        assert!(
1208            churn.weighted_commits < 0.5,
1209            "180-day-old commit should weigh ~0.25, got {}",
1210            churn.weighted_commits
1211        );
1212        assert!(
1213            churn.weighted_commits > 0.1,
1214            "180-day-old commit should weigh ~0.25, got {}",
1215            churn.weighted_commits
1216        );
1217    }
1218
1219    #[test]
1220    fn parse_git_log_path_stored_as_absolute() {
1221        let root = Path::new("/my/project");
1222        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1223        let (result, _) = parse_git_log(output, root);
1224        let key = PathBuf::from("/my/project/lib/utils.ts");
1225        assert!(result.contains_key(&key));
1226        assert_eq!(result[&key].path, key);
1227    }
1228
1229    #[test]
1230    fn parse_git_log_weighted_commits_rounded() {
1231        let root = Path::new("/project");
1232        let now = std::time::SystemTime::now()
1233            .duration_since(std::time::UNIX_EPOCH)
1234            .unwrap()
1235            .as_secs();
1236        // A commit right now should weigh exactly 1.00
1237        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1238        let (result, _) = parse_git_log(&output, root);
1239        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1240        // Weighted commits are rounded to 2 decimal places
1241        let decimals = format!("{:.2}", churn.weighted_commits);
1242        assert_eq!(
1243            churn.weighted_commits.to_string().len(),
1244            decimals.len().min(churn.weighted_commits.to_string().len()),
1245            "weighted_commits should be rounded to at most 2 decimal places"
1246        );
1247    }
1248
1249    // ── ChurnTrend serde ────────────────────────────────────────
1250
1251    #[test]
1252    fn trend_serde_serialization() {
1253        assert_eq!(
1254            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1255            "\"accelerating\""
1256        );
1257        assert_eq!(
1258            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1259            "\"stable\""
1260        );
1261        assert_eq!(
1262            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1263            "\"cooling\""
1264        );
1265    }
1266
1267    // ── parse_git_log: author tracking ──────────────────────────
1268
1269    #[test]
1270    fn parse_git_log_extracts_author_email() {
1271        let root = Path::new("/project");
1272        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1273        let (result, pool) = parse_git_log(output, root);
1274        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1275        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1276        assert_eq!(churn.authors.len(), 1);
1277        let alice = &churn.authors[&0];
1278        assert_eq!(alice.commits, 1);
1279        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1280        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1281    }
1282
1283    #[test]
1284    fn parse_git_log_intern_dedupes_authors() {
1285        let root = Path::new("/project");
1286        let output = "\
12871700000000|alice@example.com
12881\t0\ta.ts
12891700100000|bob@example.com
12902\t1\tb.ts
12911700200000|alice@example.com
12923\t2\tc.ts
1293";
1294        let (_result, pool) = parse_git_log(output, root);
1295        assert_eq!(pool.len(), 2);
1296        assert!(pool.contains(&"alice@example.com".to_string()));
1297        assert!(pool.contains(&"bob@example.com".to_string()));
1298    }
1299
1300    #[test]
1301    fn parse_git_log_aggregates_per_author() {
1302        let root = Path::new("/project");
1303        // alice touches index.ts twice, bob once.
1304        let output = "\
13051700000000|alice@example.com
13061\t0\tsrc/index.ts
13071700100000|bob@example.com
13082\t0\tsrc/index.ts
13091700200000|alice@example.com
13101\t1\tsrc/index.ts
1311";
1312        let (result, pool) = parse_git_log(output, root);
1313        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1314        assert_eq!(churn.commits, 3);
1315        assert_eq!(churn.authors.len(), 2);
1316
1317        let alice_idx =
1318            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1319        let alice = &churn.authors[&alice_idx];
1320        assert_eq!(alice.commits, 2);
1321        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1322        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1323    }
1324
1325    #[test]
1326    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1327        // Backwards-compat path: header has no `|email` suffix.
1328        let root = Path::new("/project");
1329        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1330        let (result, pool) = parse_git_log(output, root);
1331        assert!(pool.is_empty());
1332        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1333        assert_eq!(churn.commits, 1);
1334        assert!(churn.authors.is_empty());
1335    }
1336
1337    // ── intern_author ──────────────────────────────────────────
1338
1339    #[test]
1340    fn intern_author_returns_existing_index() {
1341        let mut pool = Vec::new();
1342        let mut index = FxHashMap::default();
1343        let i1 = intern_author("alice@x", &mut pool, &mut index);
1344        let i2 = intern_author("alice@x", &mut pool, &mut index);
1345        assert_eq!(i1, i2);
1346        assert_eq!(pool.len(), 1);
1347    }
1348
1349    #[test]
1350    fn intern_author_assigns_sequential_indices() {
1351        let mut pool = Vec::new();
1352        let mut index = FxHashMap::default();
1353        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1354        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1355        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1356        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1357    }
1358
1359    // ── incremental cache ───────────────────────────────────────
1360
1361    fn git(root: &Path, args: &[&str]) {
1362        let status = std::process::Command::new("git")
1363            .args(args)
1364            .current_dir(root)
1365            .status()
1366            .expect("run git");
1367        assert!(status.success(), "git {args:?} failed");
1368    }
1369
1370    fn write(root: &Path, path: &str, contents: &str) {
1371        let path = root.join(path);
1372        std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1373        std::fs::write(path, contents).unwrap();
1374    }
1375
1376    #[test]
1377    fn cached_churn_merges_new_commits_after_head_advances() {
1378        let repo = tempfile::tempdir().expect("create repo");
1379        let root = repo.path();
1380        git(root, &["init"]);
1381        git(root, &["config", "user.email", "churn@example.test"]);
1382        git(root, &["config", "user.name", "Churn Test"]);
1383        git(root, &["config", "commit.gpgsign", "false"]);
1384
1385        write(root, "src/a.ts", "export const a = 1;\n");
1386        git(root, &["add", "."]);
1387        git(root, &["commit", "-m", "initial"]);
1388
1389        let since = parse_since("1y").unwrap();
1390        let cache = tempfile::tempdir().expect("create cache dir");
1391        let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1392        assert!(!cold_hit);
1393        let file = root.join("src/a.ts");
1394        assert_eq!(cold.files[&file].commits, 1);
1395
1396        let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1397        assert!(warm_hit);
1398
1399        write(
1400            root,
1401            "src/a.ts",
1402            "export const a = 1;\nexport const b = 2;\n",
1403        );
1404        git(root, &["add", "."]);
1405        git(root, &["commit", "-m", "update a"]);
1406        let head = get_head_sha(root).unwrap();
1407
1408        let (incremental, incremental_hit) =
1409            analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1410        assert!(incremental_hit);
1411        assert_eq!(incremental.files[&file].commits, 2);
1412
1413        let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1414        assert_eq!(cache.last_indexed_sha, head);
1415    }
1416}