Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::{Command, Output};
9use std::sync::OnceLock;
10
11use serde::Serialize;
12
13/// Function pointer signature used by `set_spawn_hook` to intercept the
14/// `git log --numstat` subprocess. Lets the CLI route long-running git
15/// log calls through its `ScopedChild` registry so SIGINT / SIGTERM
16/// reap the subprocess instead of leaving it running after the parent
17/// exits. See `crates/cli/src/signal/` and issue #477.
18pub type ChurnSpawnHook = fn(&mut Command) -> std::io::Result<Output>;
19
20static SPAWN_HOOK: OnceLock<ChurnSpawnHook> = OnceLock::new();
21
22/// Install a spawn-hook that wraps the `git log` subprocess. Idempotent;
23/// subsequent calls are no-ops. Called once from the CLI's `main()` to
24/// route through the signal registry; defaults to `Command::output`
25/// when not set so the function-pointer indirection stays free for tests
26/// and embedders that don't care.
27pub fn set_spawn_hook(hook: ChurnSpawnHook) {
28    let _ = SPAWN_HOOK.set(hook);
29}
30
31fn spawn_output(command: &mut Command) -> std::io::Result<Output> {
32    if let Some(hook) = SPAWN_HOOK.get() {
33        hook(command)
34    } else {
35        command.output()
36    }
37}
38
39/// Number of seconds in one day.
40const SECS_PER_DAY: f64 = 86_400.0;
41
42/// Recency weight half-life in days. A commit from 90 days ago counts half
43/// as much as today's commit; 180 days ago counts 25%.
44const HALF_LIFE_DAYS: f64 = 90.0;
45
46/// Parsed duration for the `--since` flag.
47#[derive(Debug, Clone)]
48pub struct SinceDuration {
49    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
50    pub git_after: String,
51    /// Human-readable display string (e.g., `"6 months"`).
52    pub display: String,
53}
54
55/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
56#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
57#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
58#[serde(rename_all = "snake_case")]
59pub enum ChurnTrend {
60    /// Recent half has >1.5× the commits of the older half.
61    Accelerating,
62    /// Churn is roughly stable between halves.
63    Stable,
64    /// Recent half has <0.67× the commits of the older half.
65    Cooling,
66}
67
68impl std::fmt::Display for ChurnTrend {
69    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70        match self {
71            Self::Accelerating => write!(f, "accelerating"),
72            Self::Stable => write!(f, "stable"),
73            Self::Cooling => write!(f, "cooling"),
74        }
75    }
76}
77
78/// Per-author commit aggregation for a single file.
79///
80/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
81/// per-file maps small and the bitcode cache compact.
82#[derive(Debug, Clone, Copy)]
83pub struct AuthorContribution {
84    /// Total commits by this author touching this file in the analysis window.
85    pub commits: u32,
86    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
87    pub weighted_commits: f64,
88    /// Earliest commit timestamp by this author (epoch seconds).
89    pub first_commit_ts: u64,
90    /// Latest commit timestamp by this author (epoch seconds).
91    pub last_commit_ts: u64,
92}
93
94/// Per-file churn data collected from git history.
95#[derive(Debug, Clone)]
96pub struct FileChurn {
97    /// Absolute file path.
98    pub path: PathBuf,
99    /// Total number of commits touching this file in the analysis window.
100    pub commits: u32,
101    /// Recency-weighted commit count (exponential decay, half-life 90 days).
102    pub weighted_commits: f64,
103    /// Total lines added across all commits.
104    pub lines_added: u32,
105    /// Total lines deleted across all commits.
106    pub lines_deleted: u32,
107    /// Churn trend: accelerating, stable, or cooling.
108    pub trend: ChurnTrend,
109    /// Per-author contributions keyed by interned author index.
110    /// Indices reference [`ChurnResult::author_pool`].
111    pub authors: FxHashMap<u32, AuthorContribution>,
112}
113
114/// Result of churn analysis.
115pub struct ChurnResult {
116    /// Per-file churn data, keyed by absolute path.
117    pub files: FxHashMap<PathBuf, FileChurn>,
118    /// Whether the repository is a shallow clone.
119    pub shallow_clone: bool,
120    /// Author email pool. Per-file [`AuthorContribution`] entries reference
121    /// authors by their index into this vector.
122    pub author_pool: Vec<String>,
123}
124
125/// Parse a `--since` value into a git-compatible duration.
126///
127/// Accepts:
128/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
129/// - ISO dates: `2025-06-01`
130///
131/// # Errors
132///
133/// Returns an error if the input is not a recognized duration format or ISO date,
134/// the numeric part is invalid, or the duration is zero.
135pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
136    if is_iso_date(input) {
137        return Ok(SinceDuration {
138            git_after: input.to_string(),
139            display: input.to_string(),
140        });
141    }
142
143    let (num_str, unit) = split_number_unit(input)?;
144    let num: u64 = num_str
145        .parse()
146        .map_err(|_| format!("invalid number in --since: {input}"))?;
147
148    if num == 0 {
149        return Err("--since duration must be greater than 0".to_string());
150    }
151
152    match unit {
153        "d" | "day" | "days" => {
154            let s = if num == 1 { "" } else { "s" };
155            Ok(SinceDuration {
156                git_after: format!("{num} day{s} ago"),
157                display: format!("{num} day{s}"),
158            })
159        }
160        "w" | "week" | "weeks" => {
161            let s = if num == 1 { "" } else { "s" };
162            Ok(SinceDuration {
163                git_after: format!("{num} week{s} ago"),
164                display: format!("{num} week{s}"),
165            })
166        }
167        "m" | "month" | "months" => {
168            let s = if num == 1 { "" } else { "s" };
169            Ok(SinceDuration {
170                git_after: format!("{num} month{s} ago"),
171                display: format!("{num} month{s}"),
172            })
173        }
174        "y" | "year" | "years" => {
175            let s = if num == 1 { "" } else { "s" };
176            Ok(SinceDuration {
177                git_after: format!("{num} year{s} ago"),
178                display: format!("{num} year{s}"),
179            })
180        }
181        _ => Err(format!(
182            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
183        )),
184    }
185}
186
187/// Analyze git churn for files in the given root directory.
188///
189/// Returns `None` if git is not available or the directory is not a git repository.
190pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
191    let shallow = is_shallow_clone(root);
192    let state = analyze_churn_events(root, since, None)?;
193    Some(build_churn_result(state, shallow))
194}
195
196/// Check if the repository is a shallow clone.
197#[must_use]
198pub fn is_shallow_clone(root: &Path) -> bool {
199    let mut command = crate::spawn::git();
200    command
201        .args(["rev-parse", "--is-shallow-repository"])
202        .current_dir(root);
203    command.output().is_ok_and(|o| {
204        String::from_utf8_lossy(&o.stdout)
205            .trim()
206            .eq_ignore_ascii_case("true")
207    })
208}
209
210/// Check if the directory is inside a git repository.
211#[must_use]
212pub fn is_git_repo(root: &Path) -> bool {
213    let mut command = crate::spawn::git();
214    command
215        .args(["rev-parse", "--git-dir"])
216        .current_dir(root)
217        .stdout(std::process::Stdio::null())
218        .stderr(std::process::Stdio::null());
219    command.status().is_ok_and(|s| s.success())
220}
221
222/// Maximum size of a churn cache file (64 MB). The incremental cache stores
223/// per-commit events, so it needs more headroom than the old aggregate rows.
224const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
225
226/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
227/// changes so older payloads are rejected on load. Bumped to 3 when the cache
228/// switched from aggregate rows to per-commit events for incremental updates.
229const CHURN_CACHE_VERSION: u8 = 3;
230
231/// Serializable per-commit event for the disk cache.
232#[derive(Clone, bitcode::Encode, bitcode::Decode)]
233struct CachedCommitEvent {
234    timestamp: u64,
235    lines_added: u32,
236    lines_deleted: u32,
237    author_idx: Option<u32>,
238}
239
240/// Serializable per-file churn entry for the disk cache.
241#[derive(Clone, bitcode::Encode, bitcode::Decode)]
242struct CachedFileChurn {
243    path: String,
244    events: Vec<CachedCommitEvent>,
245}
246
247/// Cached churn data keyed by last indexed SHA and since string.
248#[derive(Clone, bitcode::Encode, bitcode::Decode)]
249struct ChurnCache {
250    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
251    version: u8,
252    last_indexed_sha: String,
253    git_after: String,
254    files: Vec<CachedFileChurn>,
255    shallow_clone: bool,
256    /// Author email pool referenced by [`CachedCommitEvent::author_idx`].
257    author_pool: Vec<String>,
258}
259
260/// Per-file commit events retained in memory while building or updating churn.
261struct FileEvents {
262    events: Vec<CachedCommitEvent>,
263}
264
265/// Event-level churn state. Unlike [`ChurnResult`], this preserves commit
266/// timestamps so a cache can merge new commits and recompute trend/recency.
267struct ChurnEventState {
268    files: FxHashMap<PathBuf, FileEvents>,
269    author_pool: Vec<String>,
270}
271
272/// Get the full HEAD SHA for cache keying.
273fn get_head_sha(root: &Path) -> Option<String> {
274    let mut command = crate::spawn::git();
275    command.args(["rev-parse", "HEAD"]).current_dir(root);
276    command
277        .output()
278        .ok()
279        .filter(|o| o.status.success())
280        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
281}
282
283/// Check whether `ancestor` is still reachable from `descendant`.
284fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
285    let mut command = crate::spawn::git();
286    command
287        .args(["merge-base", "--is-ancestor", ancestor, descendant])
288        .current_dir(root);
289    command.status().is_ok_and(|s| s.success())
290}
291
292/// Try to load churn data from disk cache. Returns `None` on cache miss
293/// or version mismatch.
294fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
295    let cache_file = cache_dir.join("churn.bin");
296    let data = std::fs::read(&cache_file).ok()?;
297    if data.len() > MAX_CHURN_CACHE_SIZE {
298        return None;
299    }
300    let cache: ChurnCache = bitcode::decode(&data).ok()?;
301    if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
302        return None;
303    }
304    Some(cache)
305}
306
307/// Save churn data to disk cache.
308fn save_churn_cache(
309    cache_dir: &Path,
310    last_indexed_sha: &str,
311    git_after: &str,
312    state: &ChurnEventState,
313    shallow_clone: bool,
314) {
315    let files: Vec<CachedFileChurn> = state
316        .files
317        .iter()
318        .map(|f| CachedFileChurn {
319            path: f.0.to_string_lossy().to_string(),
320            events: f.1.events.clone(),
321        })
322        .collect();
323    let cache = ChurnCache {
324        version: CHURN_CACHE_VERSION,
325        last_indexed_sha: last_indexed_sha.to_string(),
326        git_after: git_after.to_string(),
327        files,
328        shallow_clone,
329        author_pool: state.author_pool.clone(),
330    };
331    let _ = std::fs::create_dir_all(cache_dir);
332    let data = bitcode::encode(&cache);
333    let tmp = cache_dir.join("churn.bin.tmp");
334    if std::fs::write(&tmp, data).is_ok() {
335        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
336    }
337}
338
339/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
340/// since duration match. If HEAD advanced from the cached SHA, runs an
341/// incremental `git log <cached>..HEAD --numstat` scan and merges it.
342///
343/// Returns `(ChurnResult, bool)` where the bool indicates whether reusable
344/// cache state was used.
345/// Returns `None` if git analysis fails.
346pub fn analyze_churn_cached(
347    root: &Path,
348    since: &SinceDuration,
349    cache_dir: &Path,
350    no_cache: bool,
351) -> Option<(ChurnResult, bool)> {
352    let head_sha = get_head_sha(root)?;
353
354    if !no_cache && let Some(cache) = load_churn_cache(cache_dir, &since.git_after) {
355        if cache.last_indexed_sha == head_sha {
356            let shallow_clone = cache.shallow_clone;
357            let state = cache.into_event_state();
358            return Some((build_churn_result(state, shallow_clone), true));
359        }
360
361        if is_ancestor(root, &cache.last_indexed_sha, &head_sha) {
362            let shallow_clone = is_shallow_clone(root);
363            let range = format!("{}..HEAD", cache.last_indexed_sha);
364            if let Some(delta) = analyze_churn_events(root, since, Some(&range)) {
365                let mut state = cache.into_event_state();
366                merge_churn_states(&mut state, delta);
367                save_churn_cache(
368                    cache_dir,
369                    &head_sha,
370                    &since.git_after,
371                    &state,
372                    shallow_clone,
373                );
374                return Some((build_churn_result(state, shallow_clone), true));
375            }
376        }
377    }
378
379    let shallow_clone = is_shallow_clone(root);
380    let state = analyze_churn_events(root, since, None)?;
381    if !no_cache {
382        save_churn_cache(
383            cache_dir,
384            &head_sha,
385            &since.git_after,
386            &state,
387            shallow_clone,
388        );
389    }
390
391    let result = build_churn_result(state, shallow_clone);
392    Some((result, false))
393}
394
395impl ChurnCache {
396    fn into_event_state(self) -> ChurnEventState {
397        let files = self
398            .files
399            .into_iter()
400            .map(|entry| {
401                (
402                    PathBuf::from(entry.path),
403                    FileEvents {
404                        events: entry.events,
405                    },
406                )
407            })
408            .collect();
409        ChurnEventState {
410            files,
411            author_pool: self.author_pool,
412        }
413    }
414}
415
416/// Run `git log --numstat` and return event-level churn state.
417fn analyze_churn_events(
418    root: &Path,
419    since: &SinceDuration,
420    revision_range: Option<&str>,
421) -> Option<ChurnEventState> {
422    let mut command = crate::spawn::git();
423    command.arg("log");
424    if let Some(range) = revision_range {
425        command.arg(range);
426    }
427    command
428        .args([
429            "--numstat",
430            "--no-merges",
431            "--no-renames",
432            "--use-mailmap",
433            "--format=format:%at|%ae",
434            &format!("--after={}", since.git_after),
435        ])
436        .current_dir(root);
437
438    let output = match spawn_output(&mut command) {
439        Ok(o) => o,
440        Err(e) => {
441            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
442            return None;
443        }
444    };
445
446    if !output.status.success() {
447        let stderr = String::from_utf8_lossy(&output.stderr);
448        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
449        return None;
450    }
451
452    let stdout = String::from_utf8_lossy(&output.stdout);
453    Some(parse_git_log_events(&stdout, root))
454}
455
456/// Merge new churn events into cached event state.
457fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
458    let mut base_author_index: FxHashMap<String, u32> = base
459        .author_pool
460        .iter()
461        .enumerate()
462        .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
463        .collect();
464
465    let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
466    for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
467        let Ok(old_idx) = u32::try_from(old_idx) else {
468            continue;
469        };
470        let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
471        author_mapping.insert(old_idx, new_idx);
472    }
473
474    for (path, mut file) in delta.files {
475        for event in &mut file.events {
476            event.author_idx = event
477                .author_idx
478                .and_then(|idx| author_mapping.get(&idx).copied());
479        }
480        base.files
481            .entry(path)
482            .and_modify(|existing| existing.events.append(&mut file.events))
483            .or_insert(file);
484    }
485}
486
487/// Parse `git log --numstat --format=format:%at|%ae` output into events.
488fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
489    let now_secs = std::time::SystemTime::now()
490        .duration_since(std::time::UNIX_EPOCH)
491        .unwrap_or_default()
492        .as_secs();
493
494    let mut files: FxHashMap<PathBuf, FileEvents> = FxHashMap::default();
495    let mut author_pool: Vec<String> = Vec::new();
496    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
497    let mut current_timestamp: Option<u64> = None;
498    let mut current_author_idx: Option<u32> = None;
499
500    for line in stdout.lines() {
501        let line = line.trim();
502        if line.is_empty() {
503            continue;
504        }
505
506        if let Some((ts_str, email)) = line.split_once('|')
507            && let Ok(ts) = ts_str.parse::<u64>()
508        {
509            current_timestamp = Some(ts);
510            current_author_idx = Some(intern_author(email, &mut author_pool, &mut author_index));
511            continue;
512        }
513
514        if let Ok(ts) = line.parse::<u64>() {
515            current_timestamp = Some(ts);
516            current_author_idx = None;
517            continue;
518        }
519
520        if let Some((added, deleted, path)) = parse_numstat_line(line) {
521            let abs_path = root.join(path);
522            let ts = current_timestamp.unwrap_or(now_secs);
523            files
524                .entry(abs_path)
525                .or_insert_with(|| FileEvents { events: Vec::new() })
526                .events
527                .push(CachedCommitEvent {
528                    timestamp: ts,
529                    lines_added: added,
530                    lines_deleted: deleted,
531                    author_idx: current_author_idx,
532                });
533        }
534    }
535
536    ChurnEventState { files, author_pool }
537}
538
539/// Convert event-level churn state into the public aggregate result.
540#[expect(
541    clippy::cast_possible_truncation,
542    reason = "commit count per file is bounded by git history depth"
543)]
544fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
545    let now_secs = std::time::SystemTime::now()
546        .duration_since(std::time::UNIX_EPOCH)
547        .unwrap_or_default()
548        .as_secs();
549
550    let files = state
551        .files
552        .into_iter()
553        .map(|(path, file)| {
554            let mut timestamps = Vec::with_capacity(file.events.len());
555            let mut weighted_commits = 0.0;
556            let mut lines_added = 0;
557            let mut lines_deleted = 0;
558            let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
559
560            for event in file.events {
561                timestamps.push(event.timestamp);
562                let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
563                let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
564                weighted_commits += weight;
565                lines_added += event.lines_added;
566                lines_deleted += event.lines_deleted;
567
568                if let Some(idx) = event.author_idx {
569                    authors
570                        .entry(idx)
571                        .and_modify(|c| {
572                            c.commits += 1;
573                            c.weighted_commits += weight;
574                            c.first_commit_ts = c.first_commit_ts.min(event.timestamp);
575                            c.last_commit_ts = c.last_commit_ts.max(event.timestamp);
576                        })
577                        .or_insert(AuthorContribution {
578                            commits: 1,
579                            weighted_commits: weight,
580                            first_commit_ts: event.timestamp,
581                            last_commit_ts: event.timestamp,
582                        });
583                }
584            }
585
586            let commits = timestamps.len() as u32;
587            let trend = compute_trend(&timestamps);
588            for c in authors.values_mut() {
589                c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
590            }
591            let churn = FileChurn {
592                path: path.clone(),
593                commits,
594                weighted_commits: (weighted_commits * 100.0).round() / 100.0,
595                lines_added,
596                lines_deleted,
597                trend,
598                authors,
599            };
600            (path, churn)
601        })
602        .collect();
603
604    ChurnResult {
605        files,
606        shallow_clone,
607        author_pool: state.author_pool,
608    }
609}
610
611/// Parse `git log --numstat --format=format:%at|%ae` output.
612///
613/// Returns a per-file churn map plus the author email pool referenced by
614/// interned indices in [`FileChurn::authors`].
615#[cfg(test)]
616fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
617    let result = build_churn_result(parse_git_log_events(stdout, root), false);
618    (result.files, result.author_pool)
619}
620
621/// Intern an author email into the pool, returning its stable index.
622fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
623    if let Some(&idx) = index.get(email) {
624        return idx;
625    }
626    #[expect(
627        clippy::cast_possible_truncation,
628        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
629    )]
630    let idx = pool.len() as u32;
631    let owned = email.to_string();
632    index.insert(owned.clone(), idx);
633    pool.push(owned);
634    idx
635}
636
637/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
638/// Binary files show as `"-\t-\tpath"` — skip those.
639fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
640    let mut parts = line.splitn(3, '\t');
641    let added_str = parts.next()?;
642    let deleted_str = parts.next()?;
643    let path = parts.next()?;
644
645    let added: u32 = added_str.parse().ok()?;
646    let deleted: u32 = deleted_str.parse().ok()?;
647
648    Some((added, deleted, path))
649}
650
651/// Compute churn trend by splitting commits into two temporal halves.
652///
653/// Finds the midpoint between the oldest and newest commit timestamps,
654/// then compares commit counts in each half:
655/// - Recent > 1.5× older → Accelerating
656/// - Recent < 0.67× older → Cooling
657/// - Otherwise → Stable
658fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
659    if timestamps.len() < 2 {
660        return ChurnTrend::Stable;
661    }
662
663    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
664    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
665
666    if max_ts == min_ts {
667        return ChurnTrend::Stable;
668    }
669
670    let midpoint = min_ts + (max_ts - min_ts) / 2;
671    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
672    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
673
674    if older < 1.0 {
675        return ChurnTrend::Stable;
676    }
677
678    let ratio = recent / older;
679    if ratio > 1.5 {
680        ChurnTrend::Accelerating
681    } else if ratio < 0.67 {
682        ChurnTrend::Cooling
683    } else {
684        ChurnTrend::Stable
685    }
686}
687
688fn is_iso_date(input: &str) -> bool {
689    input.len() == 10
690        && input.as_bytes().get(4) == Some(&b'-')
691        && input.as_bytes().get(7) == Some(&b'-')
692        && input[..4].bytes().all(|b| b.is_ascii_digit())
693        && input[5..7].bytes().all(|b| b.is_ascii_digit())
694        && input[8..10].bytes().all(|b| b.is_ascii_digit())
695}
696
697fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
698    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
699        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
700    })?;
701    if pos == 0 {
702        return Err(format!(
703            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
704        ));
705    }
706    Ok((&input[..pos], &input[pos..]))
707}
708
709#[cfg(test)]
710mod tests {
711    use super::*;
712
713    #[test]
714    fn parse_since_months_short() {
715        let d = parse_since("6m").unwrap();
716        assert_eq!(d.git_after, "6 months ago");
717        assert_eq!(d.display, "6 months");
718    }
719
720    #[test]
721    fn parse_since_months_long() {
722        let d = parse_since("6months").unwrap();
723        assert_eq!(d.git_after, "6 months ago");
724        assert_eq!(d.display, "6 months");
725    }
726
727    #[test]
728    fn parse_since_days() {
729        let d = parse_since("90d").unwrap();
730        assert_eq!(d.git_after, "90 days ago");
731        assert_eq!(d.display, "90 days");
732    }
733
734    #[test]
735    fn parse_since_year_singular() {
736        let d = parse_since("1y").unwrap();
737        assert_eq!(d.git_after, "1 year ago");
738        assert_eq!(d.display, "1 year");
739    }
740
741    #[test]
742    fn parse_since_years_plural() {
743        let d = parse_since("2years").unwrap();
744        assert_eq!(d.git_after, "2 years ago");
745        assert_eq!(d.display, "2 years");
746    }
747
748    #[test]
749    fn parse_since_weeks() {
750        let d = parse_since("2w").unwrap();
751        assert_eq!(d.git_after, "2 weeks ago");
752        assert_eq!(d.display, "2 weeks");
753    }
754
755    #[test]
756    fn parse_since_iso_date() {
757        let d = parse_since("2025-06-01").unwrap();
758        assert_eq!(d.git_after, "2025-06-01");
759        assert_eq!(d.display, "2025-06-01");
760    }
761
762    #[test]
763    fn parse_since_month_singular() {
764        let d = parse_since("1month").unwrap();
765        assert_eq!(d.display, "1 month");
766    }
767
768    #[test]
769    fn parse_since_day_singular() {
770        let d = parse_since("1day").unwrap();
771        assert_eq!(d.display, "1 day");
772    }
773
774    #[test]
775    fn parse_since_zero_rejected() {
776        assert!(parse_since("0m").is_err());
777    }
778
779    #[test]
780    fn parse_since_no_unit_rejected() {
781        assert!(parse_since("90").is_err());
782    }
783
784    #[test]
785    fn parse_since_unknown_unit_rejected() {
786        assert!(parse_since("6x").is_err());
787    }
788
789    #[test]
790    fn parse_since_no_number_rejected() {
791        assert!(parse_since("months").is_err());
792    }
793
794    #[test]
795    fn numstat_normal() {
796        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
797        assert_eq!(a, 10);
798        assert_eq!(d, 5);
799        assert_eq!(p, "src/file.ts");
800    }
801
802    #[test]
803    fn numstat_binary_skipped() {
804        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
805    }
806
807    #[test]
808    fn numstat_zero_lines() {
809        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
810        assert_eq!(a, 0);
811        assert_eq!(d, 0);
812        assert_eq!(p, "src/empty.ts");
813    }
814
815    #[test]
816    fn trend_empty_is_stable() {
817        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
818    }
819
820    #[test]
821    fn trend_single_commit_is_stable() {
822        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
823    }
824
825    #[test]
826    fn trend_accelerating() {
827        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
828        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
829    }
830
831    #[test]
832    fn trend_cooling() {
833        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
834        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
835    }
836
837    #[test]
838    fn trend_stable_even_distribution() {
839        let timestamps = vec![100, 200, 300, 700, 800, 900];
840        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
841    }
842
843    #[test]
844    fn trend_same_timestamp_is_stable() {
845        let timestamps = vec![500, 500, 500];
846        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
847    }
848
849    #[test]
850    fn iso_date_valid() {
851        assert!(is_iso_date("2025-06-01"));
852        assert!(is_iso_date("2025-12-31"));
853    }
854
855    #[test]
856    fn iso_date_with_time_rejected() {
857        assert!(!is_iso_date("2025-06-01T00:00:00"));
858    }
859
860    #[test]
861    fn iso_date_invalid() {
862        assert!(!is_iso_date("6months"));
863        assert!(!is_iso_date("2025"));
864        assert!(!is_iso_date("not-a-date"));
865        assert!(!is_iso_date("abcd-ef-gh"));
866    }
867
868    #[test]
869    fn trend_display() {
870        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
871        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
872        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
873    }
874
875    #[test]
876    fn parse_git_log_single_commit() {
877        let root = Path::new("/project");
878        let output = "1700000000\n10\t5\tsrc/index.ts\n";
879        let (result, _) = parse_git_log(output, root);
880        assert_eq!(result.len(), 1);
881        let churn = &result[&PathBuf::from("/project/src/index.ts")];
882        assert_eq!(churn.commits, 1);
883        assert_eq!(churn.lines_added, 10);
884        assert_eq!(churn.lines_deleted, 5);
885    }
886
887    #[test]
888    fn parse_git_log_multiple_commits_same_file() {
889        let root = Path::new("/project");
890        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
891        let (result, _) = parse_git_log(output, root);
892        assert_eq!(result.len(), 1);
893        let churn = &result[&PathBuf::from("/project/src/index.ts")];
894        assert_eq!(churn.commits, 2);
895        assert_eq!(churn.lines_added, 13);
896        assert_eq!(churn.lines_deleted, 7);
897    }
898
899    #[test]
900    fn parse_git_log_multiple_files() {
901        let root = Path::new("/project");
902        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
903        let (result, _) = parse_git_log(output, root);
904        assert_eq!(result.len(), 2);
905        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
906        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
907    }
908
909    #[test]
910    fn parse_git_log_empty_output() {
911        let root = Path::new("/project");
912        let (result, _) = parse_git_log("", root);
913        assert!(result.is_empty());
914    }
915
916    #[test]
917    fn parse_git_log_skips_binary_files() {
918        let root = Path::new("/project");
919        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
920        let (result, _) = parse_git_log(output, root);
921        assert_eq!(result.len(), 1);
922        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
923    }
924
925    #[test]
926    fn parse_git_log_weighted_commits_are_positive() {
927        let root = Path::new("/project");
928        let now_secs = std::time::SystemTime::now()
929            .duration_since(std::time::UNIX_EPOCH)
930            .unwrap()
931            .as_secs();
932        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
933        let (result, _) = parse_git_log(&output, root);
934        let churn = &result[&PathBuf::from("/project/src/a.ts")];
935        assert!(
936            churn.weighted_commits > 0.0,
937            "weighted_commits should be positive for recent commits"
938        );
939    }
940
941    #[test]
942    fn trend_boundary_1_5x_ratio() {
943        let timestamps = vec![100, 200, 600, 800, 1000];
944        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
945    }
946
947    #[test]
948    fn trend_just_above_1_5x() {
949        let timestamps = vec![100, 600, 800, 1000];
950        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
951    }
952
953    #[test]
954    fn trend_boundary_0_67x_ratio() {
955        let timestamps = vec![100, 200, 300, 600, 1000];
956        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
957    }
958
959    #[test]
960    fn trend_two_timestamps_different() {
961        let timestamps = vec![100, 200];
962        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
963    }
964
965    #[test]
966    fn parse_since_week_singular() {
967        let d = parse_since("1week").unwrap();
968        assert_eq!(d.git_after, "1 week ago");
969        assert_eq!(d.display, "1 week");
970    }
971
972    #[test]
973    fn parse_since_weeks_long() {
974        let d = parse_since("3weeks").unwrap();
975        assert_eq!(d.git_after, "3 weeks ago");
976        assert_eq!(d.display, "3 weeks");
977    }
978
979    #[test]
980    fn parse_since_days_long() {
981        let d = parse_since("30days").unwrap();
982        assert_eq!(d.git_after, "30 days ago");
983        assert_eq!(d.display, "30 days");
984    }
985
986    #[test]
987    fn parse_since_year_long() {
988        let d = parse_since("1year").unwrap();
989        assert_eq!(d.git_after, "1 year ago");
990        assert_eq!(d.display, "1 year");
991    }
992
993    #[test]
994    fn parse_since_overflow_number_rejected() {
995        let result = parse_since("99999999999999999999d");
996        assert!(result.is_err());
997        let err = result.unwrap_err();
998        assert!(err.contains("invalid number"));
999    }
1000
1001    #[test]
1002    fn parse_since_zero_days_rejected() {
1003        assert!(parse_since("0d").is_err());
1004    }
1005
1006    #[test]
1007    fn parse_since_zero_weeks_rejected() {
1008        assert!(parse_since("0w").is_err());
1009    }
1010
1011    #[test]
1012    fn parse_since_zero_years_rejected() {
1013        assert!(parse_since("0y").is_err());
1014    }
1015
1016    #[test]
1017    fn numstat_missing_path() {
1018        assert!(parse_numstat_line("10\t5").is_none());
1019    }
1020
1021    #[test]
1022    fn numstat_single_field() {
1023        assert!(parse_numstat_line("10").is_none());
1024    }
1025
1026    #[test]
1027    fn numstat_empty_string() {
1028        assert!(parse_numstat_line("").is_none());
1029    }
1030
1031    #[test]
1032    fn numstat_only_added_is_binary() {
1033        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1034    }
1035
1036    #[test]
1037    fn numstat_only_deleted_is_binary() {
1038        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1039    }
1040
1041    #[test]
1042    fn numstat_path_with_spaces() {
1043        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1044        assert_eq!(a, 3);
1045        assert_eq!(d, 1);
1046        assert_eq!(p, "path with spaces/file.ts");
1047    }
1048
1049    #[test]
1050    fn numstat_large_numbers() {
1051        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1052        assert_eq!(a, 9999);
1053        assert_eq!(d, 8888);
1054        assert_eq!(p, "src/big.ts");
1055    }
1056
1057    #[test]
1058    fn iso_date_wrong_separator_positions() {
1059        assert!(!is_iso_date("20-25-0601"));
1060        assert!(!is_iso_date("202506-01-"));
1061    }
1062
1063    #[test]
1064    fn iso_date_too_short() {
1065        assert!(!is_iso_date("2025-06-0"));
1066    }
1067
1068    #[test]
1069    fn iso_date_letters_in_day() {
1070        assert!(!is_iso_date("2025-06-ab"));
1071    }
1072
1073    #[test]
1074    fn iso_date_letters_in_month() {
1075        assert!(!is_iso_date("2025-ab-01"));
1076    }
1077
1078    #[test]
1079    fn split_number_unit_valid() {
1080        let (num, unit) = split_number_unit("42days").unwrap();
1081        assert_eq!(num, "42");
1082        assert_eq!(unit, "days");
1083    }
1084
1085    #[test]
1086    fn split_number_unit_single_digit() {
1087        let (num, unit) = split_number_unit("1m").unwrap();
1088        assert_eq!(num, "1");
1089        assert_eq!(unit, "m");
1090    }
1091
1092    #[test]
1093    fn split_number_unit_no_digits() {
1094        let err = split_number_unit("abc").unwrap_err();
1095        assert!(err.contains("must start with a number"));
1096    }
1097
1098    #[test]
1099    fn split_number_unit_no_unit() {
1100        let err = split_number_unit("123").unwrap_err();
1101        assert!(err.contains("requires a unit suffix"));
1102    }
1103
1104    #[test]
1105    fn parse_git_log_numstat_before_timestamp_uses_now() {
1106        let root = Path::new("/project");
1107        let output = "10\t5\tsrc/no_ts.ts\n";
1108        let (result, _) = parse_git_log(output, root);
1109        assert_eq!(result.len(), 1);
1110        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1111        assert_eq!(churn.commits, 1);
1112        assert_eq!(churn.lines_added, 10);
1113        assert_eq!(churn.lines_deleted, 5);
1114        assert!(
1115            churn.weighted_commits > 0.9,
1116            "weight should be near 1.0 when timestamp defaults to now"
1117        );
1118    }
1119
1120    #[test]
1121    fn parse_git_log_whitespace_lines_ignored() {
1122        let root = Path::new("/project");
1123        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1124        let (result, _) = parse_git_log(output, root);
1125        assert_eq!(result.len(), 1);
1126    }
1127
1128    #[test]
1129    fn parse_git_log_trend_is_computed_per_file() {
1130        let root = Path::new("/project");
1131        let output = "\
11321000\n5\t1\tsrc/old.ts\n\
11332000\n3\t1\tsrc/old.ts\n\
11341000\n1\t0\tsrc/hot.ts\n\
11351800\n1\t0\tsrc/hot.ts\n\
11361900\n1\t0\tsrc/hot.ts\n\
11371950\n1\t0\tsrc/hot.ts\n\
11382000\n1\t0\tsrc/hot.ts\n";
1139        let (result, _) = parse_git_log(output, root);
1140        let old = &result[&PathBuf::from("/project/src/old.ts")];
1141        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1142        assert_eq!(old.commits, 2);
1143        assert_eq!(hot.commits, 5);
1144        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1145    }
1146
1147    #[test]
1148    fn parse_git_log_weighted_decay_for_old_commits() {
1149        let root = Path::new("/project");
1150        let now = std::time::SystemTime::now()
1151            .duration_since(std::time::UNIX_EPOCH)
1152            .unwrap()
1153            .as_secs();
1154        let old_ts = now - (180 * 86_400);
1155        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1156        let (result, _) = parse_git_log(&output, root);
1157        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1158        assert!(
1159            churn.weighted_commits < 0.5,
1160            "180-day-old commit should weigh ~0.25, got {}",
1161            churn.weighted_commits
1162        );
1163        assert!(
1164            churn.weighted_commits > 0.1,
1165            "180-day-old commit should weigh ~0.25, got {}",
1166            churn.weighted_commits
1167        );
1168    }
1169
1170    #[test]
1171    fn parse_git_log_path_stored_as_absolute() {
1172        let root = Path::new("/my/project");
1173        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1174        let (result, _) = parse_git_log(output, root);
1175        let key = PathBuf::from("/my/project/lib/utils.ts");
1176        assert!(result.contains_key(&key));
1177        assert_eq!(result[&key].path, key);
1178    }
1179
1180    #[test]
1181    fn parse_git_log_weighted_commits_rounded() {
1182        let root = Path::new("/project");
1183        let now = std::time::SystemTime::now()
1184            .duration_since(std::time::UNIX_EPOCH)
1185            .unwrap()
1186            .as_secs();
1187        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1188        let (result, _) = parse_git_log(&output, root);
1189        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1190        let decimals = format!("{:.2}", churn.weighted_commits);
1191        assert_eq!(
1192            churn.weighted_commits.to_string().len(),
1193            decimals.len().min(churn.weighted_commits.to_string().len()),
1194            "weighted_commits should be rounded to at most 2 decimal places"
1195        );
1196    }
1197
1198    #[test]
1199    fn trend_serde_serialization() {
1200        assert_eq!(
1201            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1202            "\"accelerating\""
1203        );
1204        assert_eq!(
1205            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1206            "\"stable\""
1207        );
1208        assert_eq!(
1209            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1210            "\"cooling\""
1211        );
1212    }
1213
1214    #[test]
1215    fn parse_git_log_extracts_author_email() {
1216        let root = Path::new("/project");
1217        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1218        let (result, pool) = parse_git_log(output, root);
1219        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1220        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1221        assert_eq!(churn.authors.len(), 1);
1222        let alice = &churn.authors[&0];
1223        assert_eq!(alice.commits, 1);
1224        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1225        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1226    }
1227
1228    #[test]
1229    fn parse_git_log_intern_dedupes_authors() {
1230        let root = Path::new("/project");
1231        let output = "\
12321700000000|alice@example.com
12331\t0\ta.ts
12341700100000|bob@example.com
12352\t1\tb.ts
12361700200000|alice@example.com
12373\t2\tc.ts
1238";
1239        let (_result, pool) = parse_git_log(output, root);
1240        assert_eq!(pool.len(), 2);
1241        assert!(pool.contains(&"alice@example.com".to_string()));
1242        assert!(pool.contains(&"bob@example.com".to_string()));
1243    }
1244
1245    #[test]
1246    fn parse_git_log_aggregates_per_author() {
1247        let root = Path::new("/project");
1248        let output = "\
12491700000000|alice@example.com
12501\t0\tsrc/index.ts
12511700100000|bob@example.com
12522\t0\tsrc/index.ts
12531700200000|alice@example.com
12541\t1\tsrc/index.ts
1255";
1256        let (result, pool) = parse_git_log(output, root);
1257        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1258        assert_eq!(churn.commits, 3);
1259        assert_eq!(churn.authors.len(), 2);
1260
1261        let alice_idx =
1262            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1263        let alice = &churn.authors[&alice_idx];
1264        assert_eq!(alice.commits, 2);
1265        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1266        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1267    }
1268
1269    #[test]
1270    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1271        let root = Path::new("/project");
1272        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1273        let (result, pool) = parse_git_log(output, root);
1274        assert!(pool.is_empty());
1275        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1276        assert_eq!(churn.commits, 1);
1277        assert!(churn.authors.is_empty());
1278    }
1279
1280    #[test]
1281    fn intern_author_returns_existing_index() {
1282        let mut pool = Vec::new();
1283        let mut index = FxHashMap::default();
1284        let i1 = intern_author("alice@x", &mut pool, &mut index);
1285        let i2 = intern_author("alice@x", &mut pool, &mut index);
1286        assert_eq!(i1, i2);
1287        assert_eq!(pool.len(), 1);
1288    }
1289
1290    #[test]
1291    fn intern_author_assigns_sequential_indices() {
1292        let mut pool = Vec::new();
1293        let mut index = FxHashMap::default();
1294        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1295        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1296        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1297        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1298    }
1299
1300    fn git(root: &Path, args: &[&str]) {
1301        let status = std::process::Command::new("git")
1302            .args(args)
1303            .current_dir(root)
1304            .status()
1305            .expect("run git");
1306        assert!(status.success(), "git {args:?} failed");
1307    }
1308
1309    fn write(root: &Path, path: &str, contents: &str) {
1310        let path = root.join(path);
1311        std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1312        std::fs::write(path, contents).unwrap();
1313    }
1314
1315    #[test]
1316    fn cached_churn_merges_new_commits_after_head_advances() {
1317        let repo = tempfile::tempdir().expect("create repo");
1318        let root = repo.path();
1319        git(root, &["init"]);
1320        git(root, &["config", "user.email", "churn@example.test"]);
1321        git(root, &["config", "user.name", "Churn Test"]);
1322        git(root, &["config", "commit.gpgsign", "false"]);
1323
1324        write(root, "src/a.ts", "export const a = 1;\n");
1325        git(root, &["add", "."]);
1326        git(root, &["commit", "-m", "initial"]);
1327
1328        let since = parse_since("1y").unwrap();
1329        let cache = tempfile::tempdir().expect("create cache dir");
1330        let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1331        assert!(!cold_hit);
1332        let file = root.join("src/a.ts");
1333        assert_eq!(cold.files[&file].commits, 1);
1334
1335        let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1336        assert!(warm_hit);
1337
1338        write(
1339            root,
1340            "src/a.ts",
1341            "export const a = 1;\nexport const b = 2;\n",
1342        );
1343        git(root, &["add", "."]);
1344        git(root, &["commit", "-m", "update a"]);
1345        let head = get_head_sha(root).unwrap();
1346
1347        let (incremental, incremental_hit) =
1348            analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1349        assert!(incremental_hit);
1350        assert_eq!(incremental.files[&file].commits, 2);
1351
1352        let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1353        assert_eq!(cache.last_indexed_sha, head);
1354    }
1355}