Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::{Command, Output};
9use std::sync::OnceLock;
10
11use serde::Serialize;
12
13use crate::git_env::clear_ambient_git_env;
14
15/// Function pointer signature used by `set_spawn_hook` to intercept the
16/// `git log --numstat` subprocess. Lets the CLI route long-running git
17/// log calls through its `ScopedChild` registry so SIGINT / SIGTERM
18/// reap the subprocess instead of leaving it running after the parent
19/// exits. See `crates/cli/src/signal/` and issue #477.
20pub type ChurnSpawnHook = fn(&mut Command) -> std::io::Result<Output>;
21
22static SPAWN_HOOK: OnceLock<ChurnSpawnHook> = OnceLock::new();
23
24/// Install a spawn-hook that wraps the `git log` subprocess. Idempotent;
25/// subsequent calls are no-ops. Called once from the CLI's `main()` to
26/// route through the signal registry; defaults to `Command::output`
27/// when not set so the function-pointer indirection stays free for tests
28/// and embedders that don't care.
29pub fn set_spawn_hook(hook: ChurnSpawnHook) {
30    let _ = SPAWN_HOOK.set(hook);
31}
32
33fn spawn_output(command: &mut Command) -> std::io::Result<Output> {
34    if let Some(hook) = SPAWN_HOOK.get() {
35        hook(command)
36    } else {
37        command.output()
38    }
39}
40
41/// Number of seconds in one day.
42const SECS_PER_DAY: f64 = 86_400.0;
43
44/// Recency weight half-life in days. A commit from 90 days ago counts half
45/// as much as today's commit; 180 days ago counts 25%.
46const HALF_LIFE_DAYS: f64 = 90.0;
47
48/// Parsed duration for the `--since` flag.
49#[derive(Debug, Clone)]
50pub struct SinceDuration {
51    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
52    pub git_after: String,
53    /// Human-readable display string (e.g., `"6 months"`).
54    pub display: String,
55}
56
57/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
59#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
60#[serde(rename_all = "snake_case")]
61pub enum ChurnTrend {
62    /// Recent half has >1.5× the commits of the older half.
63    Accelerating,
64    /// Churn is roughly stable between halves.
65    Stable,
66    /// Recent half has <0.67× the commits of the older half.
67    Cooling,
68}
69
70impl std::fmt::Display for ChurnTrend {
71    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
72        match self {
73            Self::Accelerating => write!(f, "accelerating"),
74            Self::Stable => write!(f, "stable"),
75            Self::Cooling => write!(f, "cooling"),
76        }
77    }
78}
79
80/// Per-author commit aggregation for a single file.
81///
82/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
83/// per-file maps small and the bitcode cache compact.
84#[derive(Debug, Clone, Copy)]
85pub struct AuthorContribution {
86    /// Total commits by this author touching this file in the analysis window.
87    pub commits: u32,
88    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
89    pub weighted_commits: f64,
90    /// Earliest commit timestamp by this author (epoch seconds).
91    pub first_commit_ts: u64,
92    /// Latest commit timestamp by this author (epoch seconds).
93    pub last_commit_ts: u64,
94}
95
96/// Per-file churn data collected from git history.
97#[derive(Debug, Clone)]
98pub struct FileChurn {
99    /// Absolute file path.
100    pub path: PathBuf,
101    /// Total number of commits touching this file in the analysis window.
102    pub commits: u32,
103    /// Recency-weighted commit count (exponential decay, half-life 90 days).
104    pub weighted_commits: f64,
105    /// Total lines added across all commits.
106    pub lines_added: u32,
107    /// Total lines deleted across all commits.
108    pub lines_deleted: u32,
109    /// Churn trend: accelerating, stable, or cooling.
110    pub trend: ChurnTrend,
111    /// Per-author contributions keyed by interned author index.
112    /// Indices reference [`ChurnResult::author_pool`].
113    pub authors: FxHashMap<u32, AuthorContribution>,
114}
115
116/// Result of churn analysis.
117pub struct ChurnResult {
118    /// Per-file churn data, keyed by absolute path.
119    pub files: FxHashMap<PathBuf, FileChurn>,
120    /// Whether the repository is a shallow clone.
121    pub shallow_clone: bool,
122    /// Author email pool. Per-file [`AuthorContribution`] entries reference
123    /// authors by their index into this vector.
124    pub author_pool: Vec<String>,
125}
126
127/// Parse a `--since` value into a git-compatible duration.
128///
129/// Accepts:
130/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
131/// - ISO dates: `2025-06-01`
132///
133/// # Errors
134///
135/// Returns an error if the input is not a recognized duration format or ISO date,
136/// the numeric part is invalid, or the duration is zero.
137pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
138    // Try ISO date first (YYYY-MM-DD)
139    if is_iso_date(input) {
140        return Ok(SinceDuration {
141            git_after: input.to_string(),
142            display: input.to_string(),
143        });
144    }
145
146    // Parse duration: number + unit
147    let (num_str, unit) = split_number_unit(input)?;
148    let num: u64 = num_str
149        .parse()
150        .map_err(|_| format!("invalid number in --since: {input}"))?;
151
152    if num == 0 {
153        return Err("--since duration must be greater than 0".to_string());
154    }
155
156    match unit {
157        "d" | "day" | "days" => {
158            let s = if num == 1 { "" } else { "s" };
159            Ok(SinceDuration {
160                git_after: format!("{num} day{s} ago"),
161                display: format!("{num} day{s}"),
162            })
163        }
164        "w" | "week" | "weeks" => {
165            let s = if num == 1 { "" } else { "s" };
166            Ok(SinceDuration {
167                git_after: format!("{num} week{s} ago"),
168                display: format!("{num} week{s}"),
169            })
170        }
171        "m" | "month" | "months" => {
172            let s = if num == 1 { "" } else { "s" };
173            Ok(SinceDuration {
174                git_after: format!("{num} month{s} ago"),
175                display: format!("{num} month{s}"),
176            })
177        }
178        "y" | "year" | "years" => {
179            let s = if num == 1 { "" } else { "s" };
180            Ok(SinceDuration {
181                git_after: format!("{num} year{s} ago"),
182                display: format!("{num} year{s}"),
183            })
184        }
185        _ => Err(format!(
186            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
187        )),
188    }
189}
190
191/// Analyze git churn for files in the given root directory.
192///
193/// Returns `None` if git is not available or the directory is not a git repository.
194pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
195    let shallow = is_shallow_clone(root);
196    let state = analyze_churn_events(root, since, None)?;
197    Some(build_churn_result(state, shallow))
198}
199
200/// Check if the repository is a shallow clone.
201#[must_use]
202pub fn is_shallow_clone(root: &Path) -> bool {
203    let mut command = Command::new("git");
204    command
205        .args(["rev-parse", "--is-shallow-repository"])
206        .current_dir(root);
207    clear_ambient_git_env(&mut command);
208    command.output().is_ok_and(|o| {
209        String::from_utf8_lossy(&o.stdout)
210            .trim()
211            .eq_ignore_ascii_case("true")
212    })
213}
214
215/// Check if the directory is inside a git repository.
216#[must_use]
217pub fn is_git_repo(root: &Path) -> bool {
218    let mut command = Command::new("git");
219    command
220        .args(["rev-parse", "--git-dir"])
221        .current_dir(root)
222        .stdout(std::process::Stdio::null())
223        .stderr(std::process::Stdio::null());
224    clear_ambient_git_env(&mut command);
225    command.status().is_ok_and(|s| s.success())
226}
227
228// ── Churn cache ──────────────────────────────────────────────────
229
230/// Maximum size of a churn cache file (64 MB). The incremental cache stores
231/// per-commit events, so it needs more headroom than the old aggregate rows.
232const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
233
234/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
235/// changes so older payloads are rejected on load. Bumped to 3 when the cache
236/// switched from aggregate rows to per-commit events for incremental updates.
237const CHURN_CACHE_VERSION: u8 = 3;
238
239/// Serializable per-commit event for the disk cache.
240#[derive(Clone, bitcode::Encode, bitcode::Decode)]
241struct CachedCommitEvent {
242    timestamp: u64,
243    lines_added: u32,
244    lines_deleted: u32,
245    author_idx: Option<u32>,
246}
247
248/// Serializable per-file churn entry for the disk cache.
249#[derive(Clone, bitcode::Encode, bitcode::Decode)]
250struct CachedFileChurn {
251    path: String,
252    events: Vec<CachedCommitEvent>,
253}
254
255/// Cached churn data keyed by last indexed SHA and since string.
256#[derive(Clone, bitcode::Encode, bitcode::Decode)]
257struct ChurnCache {
258    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
259    version: u8,
260    last_indexed_sha: String,
261    git_after: String,
262    files: Vec<CachedFileChurn>,
263    shallow_clone: bool,
264    /// Author email pool referenced by [`CachedCommitEvent::author_idx`].
265    author_pool: Vec<String>,
266}
267
268/// Per-file commit events retained in memory while building or updating churn.
269struct FileEvents {
270    events: Vec<CachedCommitEvent>,
271}
272
273/// Event-level churn state. Unlike [`ChurnResult`], this preserves commit
274/// timestamps so a cache can merge new commits and recompute trend/recency.
275struct ChurnEventState {
276    files: FxHashMap<PathBuf, FileEvents>,
277    author_pool: Vec<String>,
278}
279
280/// Get the full HEAD SHA for cache keying.
281fn get_head_sha(root: &Path) -> Option<String> {
282    let mut command = Command::new("git");
283    command.args(["rev-parse", "HEAD"]).current_dir(root);
284    clear_ambient_git_env(&mut command);
285    command
286        .output()
287        .ok()
288        .filter(|o| o.status.success())
289        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
290}
291
292/// Check whether `ancestor` is still reachable from `descendant`.
293fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
294    let mut command = Command::new("git");
295    command
296        .args(["merge-base", "--is-ancestor", ancestor, descendant])
297        .current_dir(root);
298    clear_ambient_git_env(&mut command);
299    command.status().is_ok_and(|s| s.success())
300}
301
302/// Try to load churn data from disk cache. Returns `None` on cache miss
303/// or version mismatch.
304fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
305    let cache_file = cache_dir.join("churn.bin");
306    let data = std::fs::read(&cache_file).ok()?;
307    if data.len() > MAX_CHURN_CACHE_SIZE {
308        return None;
309    }
310    let cache: ChurnCache = bitcode::decode(&data).ok()?;
311    if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
312        return None;
313    }
314    Some(cache)
315}
316
317/// Save churn data to disk cache.
318fn save_churn_cache(
319    cache_dir: &Path,
320    last_indexed_sha: &str,
321    git_after: &str,
322    state: &ChurnEventState,
323    shallow_clone: bool,
324) {
325    let files: Vec<CachedFileChurn> = state
326        .files
327        .iter()
328        .map(|f| CachedFileChurn {
329            path: f.0.to_string_lossy().to_string(),
330            events: f.1.events.clone(),
331        })
332        .collect();
333    let cache = ChurnCache {
334        version: CHURN_CACHE_VERSION,
335        last_indexed_sha: last_indexed_sha.to_string(),
336        git_after: git_after.to_string(),
337        files,
338        shallow_clone,
339        author_pool: state.author_pool.clone(),
340    };
341    let _ = std::fs::create_dir_all(cache_dir);
342    let data = bitcode::encode(&cache);
343    // Write to temp file then rename for atomic update (avoids partial reads by concurrent processes)
344    let tmp = cache_dir.join("churn.bin.tmp");
345    if std::fs::write(&tmp, data).is_ok() {
346        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
347    }
348}
349
350/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
351/// since duration match. If HEAD advanced from the cached SHA, runs an
352/// incremental `git log <cached>..HEAD --numstat` scan and merges it.
353///
354/// Returns `(ChurnResult, bool)` where the bool indicates whether reusable
355/// cache state was used.
356/// Returns `None` if git analysis fails.
357pub fn analyze_churn_cached(
358    root: &Path,
359    since: &SinceDuration,
360    cache_dir: &Path,
361    no_cache: bool,
362) -> Option<(ChurnResult, bool)> {
363    let head_sha = get_head_sha(root)?;
364
365    if !no_cache && let Some(cache) = load_churn_cache(cache_dir, &since.git_after) {
366        if cache.last_indexed_sha == head_sha {
367            let shallow_clone = cache.shallow_clone;
368            let state = cache.into_event_state();
369            return Some((build_churn_result(state, shallow_clone), true));
370        }
371
372        if is_ancestor(root, &cache.last_indexed_sha, &head_sha) {
373            let shallow_clone = is_shallow_clone(root);
374            let range = format!("{}..HEAD", cache.last_indexed_sha);
375            if let Some(delta) = analyze_churn_events(root, since, Some(&range)) {
376                let mut state = cache.into_event_state();
377                merge_churn_states(&mut state, delta);
378                save_churn_cache(
379                    cache_dir,
380                    &head_sha,
381                    &since.git_after,
382                    &state,
383                    shallow_clone,
384                );
385                return Some((build_churn_result(state, shallow_clone), true));
386            }
387        }
388    }
389
390    let shallow_clone = is_shallow_clone(root);
391    let state = analyze_churn_events(root, since, None)?;
392    if !no_cache {
393        save_churn_cache(
394            cache_dir,
395            &head_sha,
396            &since.git_after,
397            &state,
398            shallow_clone,
399        );
400    }
401
402    let result = build_churn_result(state, shallow_clone);
403    Some((result, false))
404}
405
406// ── Internal ──────────────────────────────────────────────────────
407
408impl ChurnCache {
409    fn into_event_state(self) -> ChurnEventState {
410        let files = self
411            .files
412            .into_iter()
413            .map(|entry| {
414                (
415                    PathBuf::from(entry.path),
416                    FileEvents {
417                        events: entry.events,
418                    },
419                )
420            })
421            .collect();
422        ChurnEventState {
423            files,
424            author_pool: self.author_pool,
425        }
426    }
427}
428
429/// Run `git log --numstat` and return event-level churn state.
430fn analyze_churn_events(
431    root: &Path,
432    since: &SinceDuration,
433    revision_range: Option<&str>,
434) -> Option<ChurnEventState> {
435    let mut command = Command::new("git");
436    command.arg("log");
437    if let Some(range) = revision_range {
438        command.arg(range);
439    }
440    command
441        .args([
442            "--numstat",
443            "--no-merges",
444            "--no-renames",
445            "--use-mailmap",
446            "--format=format:%at|%ae",
447            &format!("--after={}", since.git_after),
448        ])
449        .current_dir(root);
450    clear_ambient_git_env(&mut command);
451
452    let output = match spawn_output(&mut command) {
453        Ok(o) => o,
454        Err(e) => {
455            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
456            return None;
457        }
458    };
459
460    if !output.status.success() {
461        let stderr = String::from_utf8_lossy(&output.stderr);
462        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
463        return None;
464    }
465
466    let stdout = String::from_utf8_lossy(&output.stdout);
467    Some(parse_git_log_events(&stdout, root))
468}
469
470/// Merge new churn events into cached event state.
471fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
472    let mut base_author_index: FxHashMap<String, u32> = base
473        .author_pool
474        .iter()
475        .enumerate()
476        .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
477        .collect();
478
479    let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
480    for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
481        let Ok(old_idx) = u32::try_from(old_idx) else {
482            continue;
483        };
484        let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
485        author_mapping.insert(old_idx, new_idx);
486    }
487
488    for (path, mut file) in delta.files {
489        for event in &mut file.events {
490            event.author_idx = event
491                .author_idx
492                .and_then(|idx| author_mapping.get(&idx).copied());
493        }
494        base.files
495            .entry(path)
496            .and_modify(|existing| existing.events.append(&mut file.events))
497            .or_insert(file);
498    }
499}
500
501/// Parse `git log --numstat --format=format:%at|%ae` output into events.
502fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
503    let now_secs = std::time::SystemTime::now()
504        .duration_since(std::time::UNIX_EPOCH)
505        .unwrap_or_default()
506        .as_secs();
507
508    let mut files: FxHashMap<PathBuf, FileEvents> = FxHashMap::default();
509    let mut author_pool: Vec<String> = Vec::new();
510    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
511    let mut current_timestamp: Option<u64> = None;
512    let mut current_author_idx: Option<u32> = None;
513
514    for line in stdout.lines() {
515        let line = line.trim();
516        if line.is_empty() {
517            continue;
518        }
519
520        // Header lines have shape: "<ts>|<email>"
521        if let Some((ts_str, email)) = line.split_once('|')
522            && let Ok(ts) = ts_str.parse::<u64>()
523        {
524            current_timestamp = Some(ts);
525            current_author_idx = Some(intern_author(email, &mut author_pool, &mut author_index));
526            continue;
527        }
528
529        // Backwards-compat: bare timestamp (legacy format or test fixtures).
530        if let Ok(ts) = line.parse::<u64>() {
531            current_timestamp = Some(ts);
532            current_author_idx = None;
533            continue;
534        }
535
536        // Numstat line: "10\t5\tpath/to/file"
537        if let Some((added, deleted, path)) = parse_numstat_line(line) {
538            let abs_path = root.join(path);
539            let ts = current_timestamp.unwrap_or(now_secs);
540            files
541                .entry(abs_path)
542                .or_insert_with(|| FileEvents { events: Vec::new() })
543                .events
544                .push(CachedCommitEvent {
545                    timestamp: ts,
546                    lines_added: added,
547                    lines_deleted: deleted,
548                    author_idx: current_author_idx,
549                });
550        }
551    }
552
553    ChurnEventState { files, author_pool }
554}
555
556/// Convert event-level churn state into the public aggregate result.
557#[expect(
558    clippy::cast_possible_truncation,
559    reason = "commit count per file is bounded by git history depth"
560)]
561fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
562    let now_secs = std::time::SystemTime::now()
563        .duration_since(std::time::UNIX_EPOCH)
564        .unwrap_or_default()
565        .as_secs();
566
567    let files = state
568        .files
569        .into_iter()
570        .map(|(path, file)| {
571            let mut timestamps = Vec::with_capacity(file.events.len());
572            let mut weighted_commits = 0.0;
573            let mut lines_added = 0;
574            let mut lines_deleted = 0;
575            let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
576
577            for event in file.events {
578                timestamps.push(event.timestamp);
579                let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
580                let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
581                weighted_commits += weight;
582                lines_added += event.lines_added;
583                lines_deleted += event.lines_deleted;
584
585                if let Some(idx) = event.author_idx {
586                    authors
587                        .entry(idx)
588                        .and_modify(|c| {
589                            c.commits += 1;
590                            c.weighted_commits += weight;
591                            c.first_commit_ts = c.first_commit_ts.min(event.timestamp);
592                            c.last_commit_ts = c.last_commit_ts.max(event.timestamp);
593                        })
594                        .or_insert(AuthorContribution {
595                            commits: 1,
596                            weighted_commits: weight,
597                            first_commit_ts: event.timestamp,
598                            last_commit_ts: event.timestamp,
599                        });
600                }
601            }
602
603            let commits = timestamps.len() as u32;
604            let trend = compute_trend(&timestamps);
605            // Round per-author weighted sums for cache stability.
606            for c in authors.values_mut() {
607                c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
608            }
609            let churn = FileChurn {
610                path: path.clone(),
611                commits,
612                weighted_commits: (weighted_commits * 100.0).round() / 100.0,
613                lines_added,
614                lines_deleted,
615                trend,
616                authors,
617            };
618            (path, churn)
619        })
620        .collect();
621
622    ChurnResult {
623        files,
624        shallow_clone,
625        author_pool: state.author_pool,
626    }
627}
628
629/// Parse `git log --numstat --format=format:%at|%ae` output.
630///
631/// Returns a per-file churn map plus the author email pool referenced by
632/// interned indices in [`FileChurn::authors`].
633#[cfg(test)]
634fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
635    let result = build_churn_result(parse_git_log_events(stdout, root), false);
636    (result.files, result.author_pool)
637}
638
639/// Intern an author email into the pool, returning its stable index.
640fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
641    if let Some(&idx) = index.get(email) {
642        return idx;
643    }
644    #[expect(
645        clippy::cast_possible_truncation,
646        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
647    )]
648    let idx = pool.len() as u32;
649    let owned = email.to_string();
650    index.insert(owned.clone(), idx);
651    pool.push(owned);
652    idx
653}
654
655/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
656/// Binary files show as `"-\t-\tpath"` — skip those.
657fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
658    let mut parts = line.splitn(3, '\t');
659    let added_str = parts.next()?;
660    let deleted_str = parts.next()?;
661    let path = parts.next()?;
662
663    // Binary files show "-" for added/deleted — skip them
664    let added: u32 = added_str.parse().ok()?;
665    let deleted: u32 = deleted_str.parse().ok()?;
666
667    Some((added, deleted, path))
668}
669
670/// Compute churn trend by splitting commits into two temporal halves.
671///
672/// Finds the midpoint between the oldest and newest commit timestamps,
673/// then compares commit counts in each half:
674/// - Recent > 1.5× older → Accelerating
675/// - Recent < 0.67× older → Cooling
676/// - Otherwise → Stable
677fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
678    if timestamps.len() < 2 {
679        return ChurnTrend::Stable;
680    }
681
682    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
683    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
684
685    if max_ts == min_ts {
686        return ChurnTrend::Stable;
687    }
688
689    let midpoint = min_ts + (max_ts - min_ts) / 2;
690    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
691    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
692
693    if older < 1.0 {
694        return ChurnTrend::Stable;
695    }
696
697    let ratio = recent / older;
698    if ratio > 1.5 {
699        ChurnTrend::Accelerating
700    } else if ratio < 0.67 {
701        ChurnTrend::Cooling
702    } else {
703        ChurnTrend::Stable
704    }
705}
706
707fn is_iso_date(input: &str) -> bool {
708    input.len() == 10
709        && input.as_bytes().get(4) == Some(&b'-')
710        && input.as_bytes().get(7) == Some(&b'-')
711        && input[..4].bytes().all(|b| b.is_ascii_digit())
712        && input[5..7].bytes().all(|b| b.is_ascii_digit())
713        && input[8..10].bytes().all(|b| b.is_ascii_digit())
714}
715
716fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
717    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
718        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
719    })?;
720    if pos == 0 {
721        return Err(format!(
722            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
723        ));
724    }
725    Ok((&input[..pos], &input[pos..]))
726}
727
728#[cfg(test)]
729mod tests {
730    use super::*;
731
732    // ── parse_since ──────────────────────────────────────────────
733
734    #[test]
735    fn parse_since_months_short() {
736        let d = parse_since("6m").unwrap();
737        assert_eq!(d.git_after, "6 months ago");
738        assert_eq!(d.display, "6 months");
739    }
740
741    #[test]
742    fn parse_since_months_long() {
743        let d = parse_since("6months").unwrap();
744        assert_eq!(d.git_after, "6 months ago");
745        assert_eq!(d.display, "6 months");
746    }
747
748    #[test]
749    fn parse_since_days() {
750        let d = parse_since("90d").unwrap();
751        assert_eq!(d.git_after, "90 days ago");
752        assert_eq!(d.display, "90 days");
753    }
754
755    #[test]
756    fn parse_since_year_singular() {
757        let d = parse_since("1y").unwrap();
758        assert_eq!(d.git_after, "1 year ago");
759        assert_eq!(d.display, "1 year");
760    }
761
762    #[test]
763    fn parse_since_years_plural() {
764        let d = parse_since("2years").unwrap();
765        assert_eq!(d.git_after, "2 years ago");
766        assert_eq!(d.display, "2 years");
767    }
768
769    #[test]
770    fn parse_since_weeks() {
771        let d = parse_since("2w").unwrap();
772        assert_eq!(d.git_after, "2 weeks ago");
773        assert_eq!(d.display, "2 weeks");
774    }
775
776    #[test]
777    fn parse_since_iso_date() {
778        let d = parse_since("2025-06-01").unwrap();
779        assert_eq!(d.git_after, "2025-06-01");
780        assert_eq!(d.display, "2025-06-01");
781    }
782
783    #[test]
784    fn parse_since_month_singular() {
785        let d = parse_since("1month").unwrap();
786        assert_eq!(d.display, "1 month");
787    }
788
789    #[test]
790    fn parse_since_day_singular() {
791        let d = parse_since("1day").unwrap();
792        assert_eq!(d.display, "1 day");
793    }
794
795    #[test]
796    fn parse_since_zero_rejected() {
797        assert!(parse_since("0m").is_err());
798    }
799
800    #[test]
801    fn parse_since_no_unit_rejected() {
802        assert!(parse_since("90").is_err());
803    }
804
805    #[test]
806    fn parse_since_unknown_unit_rejected() {
807        assert!(parse_since("6x").is_err());
808    }
809
810    #[test]
811    fn parse_since_no_number_rejected() {
812        assert!(parse_since("months").is_err());
813    }
814
815    // ── parse_numstat_line ───────────────────────────────────────
816
817    #[test]
818    fn numstat_normal() {
819        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
820        assert_eq!(a, 10);
821        assert_eq!(d, 5);
822        assert_eq!(p, "src/file.ts");
823    }
824
825    #[test]
826    fn numstat_binary_skipped() {
827        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
828    }
829
830    #[test]
831    fn numstat_zero_lines() {
832        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
833        assert_eq!(a, 0);
834        assert_eq!(d, 0);
835        assert_eq!(p, "src/empty.ts");
836    }
837
838    // ── compute_trend ────────────────────────────────────────────
839
840    #[test]
841    fn trend_empty_is_stable() {
842        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
843    }
844
845    #[test]
846    fn trend_single_commit_is_stable() {
847        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
848    }
849
850    #[test]
851    fn trend_accelerating() {
852        // 2 old commits, 5 recent commits
853        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
854        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
855    }
856
857    #[test]
858    fn trend_cooling() {
859        // 5 old commits, 2 recent commits
860        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
861        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
862    }
863
864    #[test]
865    fn trend_stable_even_distribution() {
866        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
867        let timestamps = vec![100, 200, 300, 700, 800, 900];
868        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
869    }
870
871    #[test]
872    fn trend_same_timestamp_is_stable() {
873        let timestamps = vec![500, 500, 500];
874        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
875    }
876
877    // ── is_iso_date ──────────────────────────────────────────────
878
879    #[test]
880    fn iso_date_valid() {
881        assert!(is_iso_date("2025-06-01"));
882        assert!(is_iso_date("2025-12-31"));
883    }
884
885    #[test]
886    fn iso_date_with_time_rejected() {
887        // Only exact YYYY-MM-DD (10 chars) is accepted
888        assert!(!is_iso_date("2025-06-01T00:00:00"));
889    }
890
891    #[test]
892    fn iso_date_invalid() {
893        assert!(!is_iso_date("6months"));
894        assert!(!is_iso_date("2025"));
895        assert!(!is_iso_date("not-a-date"));
896        assert!(!is_iso_date("abcd-ef-gh"));
897    }
898
899    // ── Display ──────────────────────────────────────────────────
900
901    #[test]
902    fn trend_display() {
903        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
904        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
905        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
906    }
907
908    // ── parse_git_log ───────────────────────────────────────────
909
910    #[test]
911    fn parse_git_log_single_commit() {
912        let root = Path::new("/project");
913        let output = "1700000000\n10\t5\tsrc/index.ts\n";
914        let (result, _) = parse_git_log(output, root);
915        assert_eq!(result.len(), 1);
916        let churn = &result[&PathBuf::from("/project/src/index.ts")];
917        assert_eq!(churn.commits, 1);
918        assert_eq!(churn.lines_added, 10);
919        assert_eq!(churn.lines_deleted, 5);
920    }
921
922    #[test]
923    fn parse_git_log_multiple_commits_same_file() {
924        let root = Path::new("/project");
925        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
926        let (result, _) = parse_git_log(output, root);
927        assert_eq!(result.len(), 1);
928        let churn = &result[&PathBuf::from("/project/src/index.ts")];
929        assert_eq!(churn.commits, 2);
930        assert_eq!(churn.lines_added, 13);
931        assert_eq!(churn.lines_deleted, 7);
932    }
933
934    #[test]
935    fn parse_git_log_multiple_files() {
936        let root = Path::new("/project");
937        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
938        let (result, _) = parse_git_log(output, root);
939        assert_eq!(result.len(), 2);
940        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
941        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
942    }
943
944    #[test]
945    fn parse_git_log_empty_output() {
946        let root = Path::new("/project");
947        let (result, _) = parse_git_log("", root);
948        assert!(result.is_empty());
949    }
950
951    #[test]
952    fn parse_git_log_skips_binary_files() {
953        let root = Path::new("/project");
954        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
955        let (result, _) = parse_git_log(output, root);
956        assert_eq!(result.len(), 1);
957        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
958    }
959
960    #[test]
961    fn parse_git_log_weighted_commits_are_positive() {
962        let root = Path::new("/project");
963        // Use a timestamp near "now" to ensure weight doesn't decay to zero
964        let now_secs = std::time::SystemTime::now()
965            .duration_since(std::time::UNIX_EPOCH)
966            .unwrap()
967            .as_secs();
968        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
969        let (result, _) = parse_git_log(&output, root);
970        let churn = &result[&PathBuf::from("/project/src/a.ts")];
971        assert!(
972            churn.weighted_commits > 0.0,
973            "weighted_commits should be positive for recent commits"
974        );
975    }
976
977    // ── compute_trend edge cases ─────────────────────────────────
978
979    #[test]
980    fn trend_boundary_1_5x_ratio() {
981        // Exactly 1.5x ratio (3 recent : 2 old) → boundary between stable and accelerating
982        // midpoint = 100 + (1000-100)/2 = 550
983        // old: 100, 200 (2 timestamps <= 550)
984        // recent: 600, 800, 1000 (3 timestamps > 550)
985        // ratio = 3/2 = 1.5 — NOT > 1.5, so stable
986        let timestamps = vec![100, 200, 600, 800, 1000];
987        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
988    }
989
990    #[test]
991    fn trend_just_above_1_5x() {
992        // midpoint = 100 + (1000-100)/2 = 550
993        // old: 100 (1 timestamp <= 550)
994        // recent: 600, 800, 1000 (3 timestamps > 550)
995        // ratio = 3/1 = 3.0 → accelerating
996        let timestamps = vec![100, 600, 800, 1000];
997        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
998    }
999
1000    #[test]
1001    fn trend_boundary_0_67x_ratio() {
1002        // Exactly 0.67x ratio → boundary between cooling and stable
1003        // midpoint = 100 + (1000-100)/2 = 550
1004        // old: 100, 200, 300 (3 timestamps <= 550)
1005        // recent: 600, 1000 (2 timestamps > 550)
1006        // ratio = 2/3 = 0.666... < 0.67 → cooling
1007        let timestamps = vec![100, 200, 300, 600, 1000];
1008        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
1009    }
1010
1011    #[test]
1012    fn trend_two_timestamps_different() {
1013        // Only 2 timestamps: midpoint = 100 + (200-100)/2 = 150
1014        // old: 100 (1 timestamp <= 150)
1015        // recent: 200 (1 timestamp > 150)
1016        // ratio = 1/1 = 1.0 → stable
1017        let timestamps = vec![100, 200];
1018        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
1019    }
1020
1021    // ── parse_since additional coverage ─────────────────────────
1022
1023    #[test]
1024    fn parse_since_week_singular() {
1025        let d = parse_since("1week").unwrap();
1026        assert_eq!(d.git_after, "1 week ago");
1027        assert_eq!(d.display, "1 week");
1028    }
1029
1030    #[test]
1031    fn parse_since_weeks_long() {
1032        let d = parse_since("3weeks").unwrap();
1033        assert_eq!(d.git_after, "3 weeks ago");
1034        assert_eq!(d.display, "3 weeks");
1035    }
1036
1037    #[test]
1038    fn parse_since_days_long() {
1039        let d = parse_since("30days").unwrap();
1040        assert_eq!(d.git_after, "30 days ago");
1041        assert_eq!(d.display, "30 days");
1042    }
1043
1044    #[test]
1045    fn parse_since_year_long() {
1046        let d = parse_since("1year").unwrap();
1047        assert_eq!(d.git_after, "1 year ago");
1048        assert_eq!(d.display, "1 year");
1049    }
1050
1051    #[test]
1052    fn parse_since_overflow_number_rejected() {
1053        // Number too large for u64
1054        let result = parse_since("99999999999999999999d");
1055        assert!(result.is_err());
1056        let err = result.unwrap_err();
1057        assert!(err.contains("invalid number"));
1058    }
1059
1060    #[test]
1061    fn parse_since_zero_days_rejected() {
1062        assert!(parse_since("0d").is_err());
1063    }
1064
1065    #[test]
1066    fn parse_since_zero_weeks_rejected() {
1067        assert!(parse_since("0w").is_err());
1068    }
1069
1070    #[test]
1071    fn parse_since_zero_years_rejected() {
1072        assert!(parse_since("0y").is_err());
1073    }
1074
1075    // ── parse_numstat_line additional coverage ──────────────────
1076
1077    #[test]
1078    fn numstat_missing_path() {
1079        // Only two tab-separated fields, no path
1080        assert!(parse_numstat_line("10\t5").is_none());
1081    }
1082
1083    #[test]
1084    fn numstat_single_field() {
1085        assert!(parse_numstat_line("10").is_none());
1086    }
1087
1088    #[test]
1089    fn numstat_empty_string() {
1090        assert!(parse_numstat_line("").is_none());
1091    }
1092
1093    #[test]
1094    fn numstat_only_added_is_binary() {
1095        // Added is "-" but deleted is numeric
1096        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1097    }
1098
1099    #[test]
1100    fn numstat_only_deleted_is_binary() {
1101        // Added is numeric but deleted is "-"
1102        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1103    }
1104
1105    #[test]
1106    fn numstat_path_with_spaces() {
1107        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1108        assert_eq!(a, 3);
1109        assert_eq!(d, 1);
1110        assert_eq!(p, "path with spaces/file.ts");
1111    }
1112
1113    #[test]
1114    fn numstat_large_numbers() {
1115        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1116        assert_eq!(a, 9999);
1117        assert_eq!(d, 8888);
1118        assert_eq!(p, "src/big.ts");
1119    }
1120
1121    // ── is_iso_date additional coverage ─────────────────────────
1122
1123    #[test]
1124    fn iso_date_wrong_separator_positions() {
1125        // Dashes in wrong positions
1126        assert!(!is_iso_date("20-25-0601"));
1127        assert!(!is_iso_date("202506-01-"));
1128    }
1129
1130    #[test]
1131    fn iso_date_too_short() {
1132        assert!(!is_iso_date("2025-06-0"));
1133    }
1134
1135    #[test]
1136    fn iso_date_letters_in_day() {
1137        assert!(!is_iso_date("2025-06-ab"));
1138    }
1139
1140    #[test]
1141    fn iso_date_letters_in_month() {
1142        assert!(!is_iso_date("2025-ab-01"));
1143    }
1144
1145    // ── split_number_unit additional coverage ───────────────────
1146
1147    #[test]
1148    fn split_number_unit_valid() {
1149        let (num, unit) = split_number_unit("42days").unwrap();
1150        assert_eq!(num, "42");
1151        assert_eq!(unit, "days");
1152    }
1153
1154    #[test]
1155    fn split_number_unit_single_digit() {
1156        let (num, unit) = split_number_unit("1m").unwrap();
1157        assert_eq!(num, "1");
1158        assert_eq!(unit, "m");
1159    }
1160
1161    #[test]
1162    fn split_number_unit_no_digits() {
1163        let err = split_number_unit("abc").unwrap_err();
1164        assert!(err.contains("must start with a number"));
1165    }
1166
1167    #[test]
1168    fn split_number_unit_no_unit() {
1169        let err = split_number_unit("123").unwrap_err();
1170        assert!(err.contains("requires a unit suffix"));
1171    }
1172
1173    // ── parse_git_log additional coverage ───────────────────────
1174
1175    #[test]
1176    fn parse_git_log_numstat_before_timestamp_uses_now() {
1177        let root = Path::new("/project");
1178        // No timestamp line before the numstat line
1179        let output = "10\t5\tsrc/no_ts.ts\n";
1180        let (result, _) = parse_git_log(output, root);
1181        assert_eq!(result.len(), 1);
1182        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1183        assert_eq!(churn.commits, 1);
1184        assert_eq!(churn.lines_added, 10);
1185        assert_eq!(churn.lines_deleted, 5);
1186        // Without a timestamp, it falls back to now_secs, so weight should be ~1.0
1187        assert!(
1188            churn.weighted_commits > 0.9,
1189            "weight should be near 1.0 when timestamp defaults to now"
1190        );
1191    }
1192
1193    #[test]
1194    fn parse_git_log_whitespace_lines_ignored() {
1195        let root = Path::new("/project");
1196        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1197        let (result, _) = parse_git_log(output, root);
1198        assert_eq!(result.len(), 1);
1199    }
1200
1201    #[test]
1202    fn parse_git_log_trend_is_computed_per_file() {
1203        let root = Path::new("/project");
1204        // Two commits far apart for one file, recent-heavy for another
1205        let output = "\
12061000\n5\t1\tsrc/old.ts\n\
12072000\n3\t1\tsrc/old.ts\n\
12081000\n1\t0\tsrc/hot.ts\n\
12091800\n1\t0\tsrc/hot.ts\n\
12101900\n1\t0\tsrc/hot.ts\n\
12111950\n1\t0\tsrc/hot.ts\n\
12122000\n1\t0\tsrc/hot.ts\n";
1213        let (result, _) = parse_git_log(output, root);
1214        let old = &result[&PathBuf::from("/project/src/old.ts")];
1215        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1216        assert_eq!(old.commits, 2);
1217        assert_eq!(hot.commits, 5);
1218        // hot.ts has 4 recent vs 1 old => accelerating
1219        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1220    }
1221
1222    #[test]
1223    fn parse_git_log_weighted_decay_for_old_commits() {
1224        let root = Path::new("/project");
1225        let now = std::time::SystemTime::now()
1226            .duration_since(std::time::UNIX_EPOCH)
1227            .unwrap()
1228            .as_secs();
1229        // One commit from 180 days ago (two half-lives) should weigh ~0.25
1230        let old_ts = now - (180 * 86_400);
1231        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1232        let (result, _) = parse_git_log(&output, root);
1233        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1234        assert!(
1235            churn.weighted_commits < 0.5,
1236            "180-day-old commit should weigh ~0.25, got {}",
1237            churn.weighted_commits
1238        );
1239        assert!(
1240            churn.weighted_commits > 0.1,
1241            "180-day-old commit should weigh ~0.25, got {}",
1242            churn.weighted_commits
1243        );
1244    }
1245
1246    #[test]
1247    fn parse_git_log_path_stored_as_absolute() {
1248        let root = Path::new("/my/project");
1249        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1250        let (result, _) = parse_git_log(output, root);
1251        let key = PathBuf::from("/my/project/lib/utils.ts");
1252        assert!(result.contains_key(&key));
1253        assert_eq!(result[&key].path, key);
1254    }
1255
1256    #[test]
1257    fn parse_git_log_weighted_commits_rounded() {
1258        let root = Path::new("/project");
1259        let now = std::time::SystemTime::now()
1260            .duration_since(std::time::UNIX_EPOCH)
1261            .unwrap()
1262            .as_secs();
1263        // A commit right now should weigh exactly 1.00
1264        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1265        let (result, _) = parse_git_log(&output, root);
1266        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1267        // Weighted commits are rounded to 2 decimal places
1268        let decimals = format!("{:.2}", churn.weighted_commits);
1269        assert_eq!(
1270            churn.weighted_commits.to_string().len(),
1271            decimals.len().min(churn.weighted_commits.to_string().len()),
1272            "weighted_commits should be rounded to at most 2 decimal places"
1273        );
1274    }
1275
1276    // ── ChurnTrend serde ────────────────────────────────────────
1277
1278    #[test]
1279    fn trend_serde_serialization() {
1280        assert_eq!(
1281            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1282            "\"accelerating\""
1283        );
1284        assert_eq!(
1285            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1286            "\"stable\""
1287        );
1288        assert_eq!(
1289            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1290            "\"cooling\""
1291        );
1292    }
1293
1294    // ── parse_git_log: author tracking ──────────────────────────
1295
1296    #[test]
1297    fn parse_git_log_extracts_author_email() {
1298        let root = Path::new("/project");
1299        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1300        let (result, pool) = parse_git_log(output, root);
1301        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1302        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1303        assert_eq!(churn.authors.len(), 1);
1304        let alice = &churn.authors[&0];
1305        assert_eq!(alice.commits, 1);
1306        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1307        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1308    }
1309
1310    #[test]
1311    fn parse_git_log_intern_dedupes_authors() {
1312        let root = Path::new("/project");
1313        let output = "\
13141700000000|alice@example.com
13151\t0\ta.ts
13161700100000|bob@example.com
13172\t1\tb.ts
13181700200000|alice@example.com
13193\t2\tc.ts
1320";
1321        let (_result, pool) = parse_git_log(output, root);
1322        assert_eq!(pool.len(), 2);
1323        assert!(pool.contains(&"alice@example.com".to_string()));
1324        assert!(pool.contains(&"bob@example.com".to_string()));
1325    }
1326
1327    #[test]
1328    fn parse_git_log_aggregates_per_author() {
1329        let root = Path::new("/project");
1330        // alice touches index.ts twice, bob once.
1331        let output = "\
13321700000000|alice@example.com
13331\t0\tsrc/index.ts
13341700100000|bob@example.com
13352\t0\tsrc/index.ts
13361700200000|alice@example.com
13371\t1\tsrc/index.ts
1338";
1339        let (result, pool) = parse_git_log(output, root);
1340        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1341        assert_eq!(churn.commits, 3);
1342        assert_eq!(churn.authors.len(), 2);
1343
1344        let alice_idx =
1345            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1346        let alice = &churn.authors[&alice_idx];
1347        assert_eq!(alice.commits, 2);
1348        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1349        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1350    }
1351
1352    #[test]
1353    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1354        // Backwards-compat path: header has no `|email` suffix.
1355        let root = Path::new("/project");
1356        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1357        let (result, pool) = parse_git_log(output, root);
1358        assert!(pool.is_empty());
1359        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1360        assert_eq!(churn.commits, 1);
1361        assert!(churn.authors.is_empty());
1362    }
1363
1364    // ── intern_author ──────────────────────────────────────────
1365
1366    #[test]
1367    fn intern_author_returns_existing_index() {
1368        let mut pool = Vec::new();
1369        let mut index = FxHashMap::default();
1370        let i1 = intern_author("alice@x", &mut pool, &mut index);
1371        let i2 = intern_author("alice@x", &mut pool, &mut index);
1372        assert_eq!(i1, i2);
1373        assert_eq!(pool.len(), 1);
1374    }
1375
1376    #[test]
1377    fn intern_author_assigns_sequential_indices() {
1378        let mut pool = Vec::new();
1379        let mut index = FxHashMap::default();
1380        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1381        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1382        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1383        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1384    }
1385
1386    // ── incremental cache ───────────────────────────────────────
1387
1388    fn git(root: &Path, args: &[&str]) {
1389        let status = std::process::Command::new("git")
1390            .args(args)
1391            .current_dir(root)
1392            .status()
1393            .expect("run git");
1394        assert!(status.success(), "git {args:?} failed");
1395    }
1396
1397    fn write(root: &Path, path: &str, contents: &str) {
1398        let path = root.join(path);
1399        std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1400        std::fs::write(path, contents).unwrap();
1401    }
1402
1403    #[test]
1404    fn cached_churn_merges_new_commits_after_head_advances() {
1405        let repo = tempfile::tempdir().expect("create repo");
1406        let root = repo.path();
1407        git(root, &["init"]);
1408        git(root, &["config", "user.email", "churn@example.test"]);
1409        git(root, &["config", "user.name", "Churn Test"]);
1410        git(root, &["config", "commit.gpgsign", "false"]);
1411
1412        write(root, "src/a.ts", "export const a = 1;\n");
1413        git(root, &["add", "."]);
1414        git(root, &["commit", "-m", "initial"]);
1415
1416        let since = parse_since("1y").unwrap();
1417        let cache = tempfile::tempdir().expect("create cache dir");
1418        let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1419        assert!(!cold_hit);
1420        let file = root.join("src/a.ts");
1421        assert_eq!(cold.files[&file].commits, 1);
1422
1423        let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1424        assert!(warm_hit);
1425
1426        write(
1427            root,
1428            "src/a.ts",
1429            "export const a = 1;\nexport const b = 2;\n",
1430        );
1431        git(root, &["add", "."]);
1432        git(root, &["commit", "-m", "update a"]);
1433        let head = get_head_sha(root).unwrap();
1434
1435        let (incremental, incremental_hit) =
1436            analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1437        assert!(incremental_hit);
1438        assert_eq!(incremental.files[&file].commits, 2);
1439
1440        let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1441        assert_eq!(cache.last_indexed_sha, head);
1442    }
1443}