Skip to main content

fallow_core/
churn.rs

1//! Git churn analysis for hotspot detection.
2//!
3//! Shells out to `git log` to collect per-file change history, then computes
4//! recency-weighted churn scores and trend indicators.
5
6use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::Command;
9
10use serde::Serialize;
11
12/// Number of seconds in one day.
13const SECS_PER_DAY: f64 = 86_400.0;
14
15/// Recency weight half-life in days. A commit from 90 days ago counts half
16/// as much as today's commit; 180 days ago counts 25%.
17const HALF_LIFE_DAYS: f64 = 90.0;
18
19/// Parsed duration for the `--since` flag.
20#[derive(Debug, Clone)]
21pub struct SinceDuration {
22    /// Value to pass to `git log --after` (e.g., `"6 months ago"` or `"2025-06-01"`).
23    pub git_after: String,
24    /// Human-readable display string (e.g., `"6 months"`).
25    pub display: String,
26}
27
28/// Churn trend indicator based on comparing recent vs older halves of the analysis period.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
30#[serde(rename_all = "snake_case")]
31pub enum ChurnTrend {
32    /// Recent half has >1.5× the commits of the older half.
33    Accelerating,
34    /// Churn is roughly stable between halves.
35    Stable,
36    /// Recent half has <0.67× the commits of the older half.
37    Cooling,
38}
39
40impl std::fmt::Display for ChurnTrend {
41    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42        match self {
43            Self::Accelerating => write!(f, "accelerating"),
44            Self::Stable => write!(f, "stable"),
45            Self::Cooling => write!(f, "cooling"),
46        }
47    }
48}
49
50/// Per-author commit aggregation for a single file.
51///
52/// Authors are interned via [`ChurnResult::author_pool`] indices to keep
53/// per-file maps small and the bitcode cache compact.
54#[derive(Debug, Clone, Copy)]
55pub struct AuthorContribution {
56    /// Total commits by this author touching this file in the analysis window.
57    pub commits: u32,
58    /// Recency-weighted commit sum (exponential decay, half-life 90 days).
59    pub weighted_commits: f64,
60    /// Earliest commit timestamp by this author (epoch seconds).
61    pub first_commit_ts: u64,
62    /// Latest commit timestamp by this author (epoch seconds).
63    pub last_commit_ts: u64,
64}
65
66/// Per-file churn data collected from git history.
67#[derive(Debug, Clone)]
68pub struct FileChurn {
69    /// Absolute file path.
70    pub path: PathBuf,
71    /// Total number of commits touching this file in the analysis window.
72    pub commits: u32,
73    /// Recency-weighted commit count (exponential decay, half-life 90 days).
74    pub weighted_commits: f64,
75    /// Total lines added across all commits.
76    pub lines_added: u32,
77    /// Total lines deleted across all commits.
78    pub lines_deleted: u32,
79    /// Churn trend: accelerating, stable, or cooling.
80    pub trend: ChurnTrend,
81    /// Per-author contributions keyed by interned author index.
82    /// Indices reference [`ChurnResult::author_pool`].
83    pub authors: FxHashMap<u32, AuthorContribution>,
84}
85
86/// Result of churn analysis.
87pub struct ChurnResult {
88    /// Per-file churn data, keyed by absolute path.
89    pub files: FxHashMap<PathBuf, FileChurn>,
90    /// Whether the repository is a shallow clone.
91    pub shallow_clone: bool,
92    /// Author email pool. Per-file [`AuthorContribution`] entries reference
93    /// authors by their index into this vector.
94    pub author_pool: Vec<String>,
95}
96
97/// Parse a `--since` value into a git-compatible duration.
98///
99/// Accepts:
100/// - Durations: `6m`, `6months`, `90d`, `90days`, `1y`, `1year`, `2w`, `2weeks`
101/// - ISO dates: `2025-06-01`
102///
103/// # Errors
104///
105/// Returns an error if the input is not a recognized duration format or ISO date,
106/// the numeric part is invalid, or the duration is zero.
107pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
108    // Try ISO date first (YYYY-MM-DD)
109    if is_iso_date(input) {
110        return Ok(SinceDuration {
111            git_after: input.to_string(),
112            display: input.to_string(),
113        });
114    }
115
116    // Parse duration: number + unit
117    let (num_str, unit) = split_number_unit(input)?;
118    let num: u64 = num_str
119        .parse()
120        .map_err(|_| format!("invalid number in --since: {input}"))?;
121
122    if num == 0 {
123        return Err("--since duration must be greater than 0".to_string());
124    }
125
126    match unit {
127        "d" | "day" | "days" => {
128            let s = if num == 1 { "" } else { "s" };
129            Ok(SinceDuration {
130                git_after: format!("{num} day{s} ago"),
131                display: format!("{num} day{s}"),
132            })
133        }
134        "w" | "week" | "weeks" => {
135            let s = if num == 1 { "" } else { "s" };
136            Ok(SinceDuration {
137                git_after: format!("{num} week{s} ago"),
138                display: format!("{num} week{s}"),
139            })
140        }
141        "m" | "month" | "months" => {
142            let s = if num == 1 { "" } else { "s" };
143            Ok(SinceDuration {
144                git_after: format!("{num} month{s} ago"),
145                display: format!("{num} month{s}"),
146            })
147        }
148        "y" | "year" | "years" => {
149            let s = if num == 1 { "" } else { "s" };
150            Ok(SinceDuration {
151                git_after: format!("{num} year{s} ago"),
152                display: format!("{num} year{s}"),
153            })
154        }
155        _ => Err(format!(
156            "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
157        )),
158    }
159}
160
161/// Analyze git churn for files in the given root directory.
162///
163/// Returns `None` if git is not available or the directory is not a git repository.
164pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
165    let shallow = is_shallow_clone(root);
166
167    let output = Command::new("git")
168        .args([
169            "log",
170            "--numstat",
171            "--no-merges",
172            "--no-renames",
173            "--use-mailmap",
174            "--format=format:%at|%ae",
175            &format!("--after={}", since.git_after),
176        ])
177        .current_dir(root)
178        .output();
179
180    let output = match output {
181        Ok(o) => o,
182        Err(e) => {
183            tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
184            return None;
185        }
186    };
187
188    if !output.status.success() {
189        let stderr = String::from_utf8_lossy(&output.stderr);
190        tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
191        return None;
192    }
193
194    let stdout = String::from_utf8_lossy(&output.stdout);
195    let (files, author_pool) = parse_git_log(&stdout, root);
196
197    Some(ChurnResult {
198        files,
199        shallow_clone: shallow,
200        author_pool,
201    })
202}
203
204/// Check if the repository is a shallow clone.
205#[must_use]
206pub fn is_shallow_clone(root: &Path) -> bool {
207    Command::new("git")
208        .args(["rev-parse", "--is-shallow-repository"])
209        .current_dir(root)
210        .output()
211        .is_ok_and(|o| {
212            String::from_utf8_lossy(&o.stdout)
213                .trim()
214                .eq_ignore_ascii_case("true")
215        })
216}
217
218/// Check if the directory is inside a git repository.
219#[must_use]
220pub fn is_git_repo(root: &Path) -> bool {
221    Command::new("git")
222        .args(["rev-parse", "--git-dir"])
223        .current_dir(root)
224        .stdout(std::process::Stdio::null())
225        .stderr(std::process::Stdio::null())
226        .status()
227        .is_ok_and(|s| s.success())
228}
229
230// ── Churn cache ──────────────────────────────────────────────────
231
232/// Maximum size of a churn cache file (16 MB).
233const MAX_CHURN_CACHE_SIZE: usize = 16 * 1024 * 1024;
234
235/// Cache schema version. Bump when the on-disk shape of [`ChurnCache`]
236/// changes so older payloads are rejected on load. Bumped to 2 in v2.37.0
237/// when per-author contributions and the author pool were added.
238const CHURN_CACHE_VERSION: u8 = 2;
239
240/// Serializable per-author contribution entry for the disk cache.
241#[derive(bitcode::Encode, bitcode::Decode)]
242struct CachedAuthorContribution {
243    author_idx: u32,
244    commits: u32,
245    weighted_commits: f64,
246    first_commit_ts: u64,
247    last_commit_ts: u64,
248}
249
250/// Serializable per-file churn entry for the disk cache.
251#[derive(bitcode::Encode, bitcode::Decode)]
252struct CachedFileChurn {
253    path: String,
254    commits: u32,
255    weighted_commits: f64,
256    lines_added: u32,
257    lines_deleted: u32,
258    trend: ChurnTrend,
259    authors: Vec<CachedAuthorContribution>,
260}
261
262/// Cached churn data keyed by HEAD SHA and since string.
263#[derive(bitcode::Encode, bitcode::Decode)]
264struct ChurnCache {
265    /// Schema version; must equal [`CHURN_CACHE_VERSION`] to be accepted.
266    version: u8,
267    head_sha: String,
268    git_after: String,
269    files: Vec<CachedFileChurn>,
270    shallow_clone: bool,
271    /// Author email pool referenced by [`CachedAuthorContribution::author_idx`].
272    author_pool: Vec<String>,
273}
274
275/// Get the full HEAD SHA for cache keying.
276fn get_head_sha(root: &Path) -> Option<String> {
277    Command::new("git")
278        .args(["rev-parse", "HEAD"])
279        .current_dir(root)
280        .output()
281        .ok()
282        .filter(|o| o.status.success())
283        .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
284}
285
286/// Try to load churn data from disk cache. Returns `None` on cache miss
287/// or version mismatch.
288fn load_churn_cache(cache_dir: &Path, head_sha: &str, git_after: &str) -> Option<ChurnResult> {
289    let cache_file = cache_dir.join("churn.bin");
290    let data = std::fs::read(&cache_file).ok()?;
291    if data.len() > MAX_CHURN_CACHE_SIZE {
292        return None;
293    }
294    let cache: ChurnCache = bitcode::decode(&data).ok()?;
295    if cache.version != CHURN_CACHE_VERSION
296        || cache.head_sha != head_sha
297        || cache.git_after != git_after
298    {
299        return None;
300    }
301    let mut files = FxHashMap::default();
302    for entry in cache.files {
303        let path = PathBuf::from(&entry.path);
304        let authors = entry
305            .authors
306            .into_iter()
307            .map(|a| {
308                (
309                    a.author_idx,
310                    AuthorContribution {
311                        commits: a.commits,
312                        weighted_commits: a.weighted_commits,
313                        first_commit_ts: a.first_commit_ts,
314                        last_commit_ts: a.last_commit_ts,
315                    },
316                )
317            })
318            .collect();
319        files.insert(
320            path.clone(),
321            FileChurn {
322                path,
323                commits: entry.commits,
324                weighted_commits: entry.weighted_commits,
325                lines_added: entry.lines_added,
326                lines_deleted: entry.lines_deleted,
327                trend: entry.trend,
328                authors,
329            },
330        );
331    }
332    Some(ChurnResult {
333        files,
334        shallow_clone: cache.shallow_clone,
335        author_pool: cache.author_pool,
336    })
337}
338
339/// Save churn data to disk cache.
340fn save_churn_cache(cache_dir: &Path, head_sha: &str, git_after: &str, result: &ChurnResult) {
341    let files: Vec<CachedFileChurn> = result
342        .files
343        .values()
344        .map(|f| CachedFileChurn {
345            path: f.path.to_string_lossy().to_string(),
346            commits: f.commits,
347            weighted_commits: f.weighted_commits,
348            lines_added: f.lines_added,
349            lines_deleted: f.lines_deleted,
350            trend: f.trend,
351            authors: f
352                .authors
353                .iter()
354                .map(|(&idx, c)| CachedAuthorContribution {
355                    author_idx: idx,
356                    commits: c.commits,
357                    weighted_commits: c.weighted_commits,
358                    first_commit_ts: c.first_commit_ts,
359                    last_commit_ts: c.last_commit_ts,
360                })
361                .collect(),
362        })
363        .collect();
364    let cache = ChurnCache {
365        version: CHURN_CACHE_VERSION,
366        head_sha: head_sha.to_string(),
367        git_after: git_after.to_string(),
368        files,
369        shallow_clone: result.shallow_clone,
370        author_pool: result.author_pool.clone(),
371    };
372    let _ = std::fs::create_dir_all(cache_dir);
373    let data = bitcode::encode(&cache);
374    // Write to temp file then rename for atomic update (avoids partial reads by concurrent processes)
375    let tmp = cache_dir.join("churn.bin.tmp");
376    if std::fs::write(&tmp, data).is_ok() {
377        let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
378    }
379}
380
381/// Analyze churn with disk caching. Uses cached result when HEAD SHA and
382/// since duration match. On cache miss, runs `git log` and saves the result.
383///
384/// Returns `(ChurnResult, bool)` where the bool indicates whether the cache was hit.
385/// Returns `None` if git analysis fails.
386pub fn analyze_churn_cached(
387    root: &Path,
388    since: &SinceDuration,
389    cache_dir: &Path,
390    no_cache: bool,
391) -> Option<(ChurnResult, bool)> {
392    let head_sha = get_head_sha(root)?;
393
394    if !no_cache && let Some(cached) = load_churn_cache(cache_dir, &head_sha, &since.git_after) {
395        return Some((cached, true));
396    }
397
398    let result = analyze_churn(root, since)?;
399
400    if !no_cache {
401        save_churn_cache(cache_dir, &head_sha, &since.git_after, &result);
402    }
403
404    Some((result, false))
405}
406
407// ── Internal ──────────────────────────────────────────────────────
408
409/// Intermediate per-file accumulator during git log parsing.
410struct FileAccum {
411    /// Commit timestamps (epoch seconds) for trend computation.
412    commit_timestamps: Vec<u64>,
413    /// Recency-weighted commit sum.
414    weighted_commits: f64,
415    lines_added: u32,
416    lines_deleted: u32,
417    /// Per-author contributions keyed by interned author index.
418    authors: FxHashMap<u32, AuthorContribution>,
419}
420
421/// Parse `git log --numstat --format=format:%at|%ae` output.
422///
423/// Returns a per-file churn map plus the author email pool referenced by
424/// interned indices in [`FileChurn::authors`].
425#[expect(
426    clippy::cast_possible_truncation,
427    reason = "commit count per file is bounded by git history depth"
428)]
429fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
430    let now_secs = std::time::SystemTime::now()
431        .duration_since(std::time::UNIX_EPOCH)
432        .unwrap_or_default()
433        .as_secs();
434
435    let mut accum: FxHashMap<PathBuf, FileAccum> = FxHashMap::default();
436    let mut author_pool: Vec<String> = Vec::new();
437    let mut author_index: FxHashMap<String, u32> = FxHashMap::default();
438    let mut current_timestamp: Option<u64> = None;
439    let mut current_author_idx: Option<u32> = None;
440
441    for line in stdout.lines() {
442        let line = line.trim();
443        if line.is_empty() {
444            continue;
445        }
446
447        // Header lines have shape: "<ts>|<email>"
448        if let Some((ts_str, email)) = line.split_once('|')
449            && let Ok(ts) = ts_str.parse::<u64>()
450        {
451            current_timestamp = Some(ts);
452            current_author_idx = Some(intern_author(email, &mut author_pool, &mut author_index));
453            continue;
454        }
455
456        // Backwards-compat: bare timestamp (legacy format or test fixtures).
457        if let Ok(ts) = line.parse::<u64>() {
458            current_timestamp = Some(ts);
459            current_author_idx = None;
460            continue;
461        }
462
463        // Numstat line: "10\t5\tpath/to/file"
464        if let Some((added, deleted, path)) = parse_numstat_line(line) {
465            let abs_path = root.join(path);
466            let ts = current_timestamp.unwrap_or(now_secs);
467            let age_days = (now_secs.saturating_sub(ts)) as f64 / SECS_PER_DAY;
468            let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
469
470            let entry = accum.entry(abs_path).or_insert_with(|| FileAccum {
471                commit_timestamps: Vec::new(),
472                weighted_commits: 0.0,
473                lines_added: 0,
474                lines_deleted: 0,
475                authors: FxHashMap::default(),
476            });
477            entry.commit_timestamps.push(ts);
478            entry.weighted_commits += weight;
479            entry.lines_added += added;
480            entry.lines_deleted += deleted;
481
482            if let Some(idx) = current_author_idx {
483                entry
484                    .authors
485                    .entry(idx)
486                    .and_modify(|c| {
487                        c.commits += 1;
488                        c.weighted_commits += weight;
489                        c.first_commit_ts = c.first_commit_ts.min(ts);
490                        c.last_commit_ts = c.last_commit_ts.max(ts);
491                    })
492                    .or_insert(AuthorContribution {
493                        commits: 1,
494                        weighted_commits: weight,
495                        first_commit_ts: ts,
496                        last_commit_ts: ts,
497                    });
498            }
499        }
500    }
501
502    let files = accum
503        .into_iter()
504        .map(|(path, acc)| {
505            let commits = acc.commit_timestamps.len() as u32;
506            let trend = compute_trend(&acc.commit_timestamps);
507            let mut authors = acc.authors;
508            // Round per-author weighted sums for cache stability.
509            for c in authors.values_mut() {
510                c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
511            }
512            let churn = FileChurn {
513                path: path.clone(),
514                commits,
515                weighted_commits: (acc.weighted_commits * 100.0).round() / 100.0,
516                lines_added: acc.lines_added,
517                lines_deleted: acc.lines_deleted,
518                trend,
519                authors,
520            };
521            (path, churn)
522        })
523        .collect();
524
525    (files, author_pool)
526}
527
528/// Intern an author email into the pool, returning its stable index.
529fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
530    if let Some(&idx) = index.get(email) {
531        return idx;
532    }
533    #[expect(
534        clippy::cast_possible_truncation,
535        reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
536    )]
537    let idx = pool.len() as u32;
538    let owned = email.to_string();
539    index.insert(owned.clone(), idx);
540    pool.push(owned);
541    idx
542}
543
544/// Parse a single numstat line: `"10\t5\tpath/to/file.ts"`.
545/// Binary files show as `"-\t-\tpath"` — skip those.
546fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
547    let mut parts = line.splitn(3, '\t');
548    let added_str = parts.next()?;
549    let deleted_str = parts.next()?;
550    let path = parts.next()?;
551
552    // Binary files show "-" for added/deleted — skip them
553    let added: u32 = added_str.parse().ok()?;
554    let deleted: u32 = deleted_str.parse().ok()?;
555
556    Some((added, deleted, path))
557}
558
559/// Compute churn trend by splitting commits into two temporal halves.
560///
561/// Finds the midpoint between the oldest and newest commit timestamps,
562/// then compares commit counts in each half:
563/// - Recent > 1.5× older → Accelerating
564/// - Recent < 0.67× older → Cooling
565/// - Otherwise → Stable
566fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
567    if timestamps.len() < 2 {
568        return ChurnTrend::Stable;
569    }
570
571    let min_ts = timestamps.iter().copied().min().unwrap_or(0);
572    let max_ts = timestamps.iter().copied().max().unwrap_or(0);
573
574    if max_ts == min_ts {
575        return ChurnTrend::Stable;
576    }
577
578    let midpoint = min_ts + (max_ts - min_ts) / 2;
579    let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
580    let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
581
582    if older < 1.0 {
583        return ChurnTrend::Stable;
584    }
585
586    let ratio = recent / older;
587    if ratio > 1.5 {
588        ChurnTrend::Accelerating
589    } else if ratio < 0.67 {
590        ChurnTrend::Cooling
591    } else {
592        ChurnTrend::Stable
593    }
594}
595
596fn is_iso_date(input: &str) -> bool {
597    input.len() == 10
598        && input.as_bytes().get(4) == Some(&b'-')
599        && input.as_bytes().get(7) == Some(&b'-')
600        && input[..4].bytes().all(|b| b.is_ascii_digit())
601        && input[5..7].bytes().all(|b| b.is_ascii_digit())
602        && input[8..10].bytes().all(|b| b.is_ascii_digit())
603}
604
605fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
606    let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
607        format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
608    })?;
609    if pos == 0 {
610        return Err(format!(
611            "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
612        ));
613    }
614    Ok((&input[..pos], &input[pos..]))
615}
616
617#[cfg(test)]
618mod tests {
619    use super::*;
620
621    // ── parse_since ──────────────────────────────────────────────
622
623    #[test]
624    fn parse_since_months_short() {
625        let d = parse_since("6m").unwrap();
626        assert_eq!(d.git_after, "6 months ago");
627        assert_eq!(d.display, "6 months");
628    }
629
630    #[test]
631    fn parse_since_months_long() {
632        let d = parse_since("6months").unwrap();
633        assert_eq!(d.git_after, "6 months ago");
634        assert_eq!(d.display, "6 months");
635    }
636
637    #[test]
638    fn parse_since_days() {
639        let d = parse_since("90d").unwrap();
640        assert_eq!(d.git_after, "90 days ago");
641        assert_eq!(d.display, "90 days");
642    }
643
644    #[test]
645    fn parse_since_year_singular() {
646        let d = parse_since("1y").unwrap();
647        assert_eq!(d.git_after, "1 year ago");
648        assert_eq!(d.display, "1 year");
649    }
650
651    #[test]
652    fn parse_since_years_plural() {
653        let d = parse_since("2years").unwrap();
654        assert_eq!(d.git_after, "2 years ago");
655        assert_eq!(d.display, "2 years");
656    }
657
658    #[test]
659    fn parse_since_weeks() {
660        let d = parse_since("2w").unwrap();
661        assert_eq!(d.git_after, "2 weeks ago");
662        assert_eq!(d.display, "2 weeks");
663    }
664
665    #[test]
666    fn parse_since_iso_date() {
667        let d = parse_since("2025-06-01").unwrap();
668        assert_eq!(d.git_after, "2025-06-01");
669        assert_eq!(d.display, "2025-06-01");
670    }
671
672    #[test]
673    fn parse_since_month_singular() {
674        let d = parse_since("1month").unwrap();
675        assert_eq!(d.display, "1 month");
676    }
677
678    #[test]
679    fn parse_since_day_singular() {
680        let d = parse_since("1day").unwrap();
681        assert_eq!(d.display, "1 day");
682    }
683
684    #[test]
685    fn parse_since_zero_rejected() {
686        assert!(parse_since("0m").is_err());
687    }
688
689    #[test]
690    fn parse_since_no_unit_rejected() {
691        assert!(parse_since("90").is_err());
692    }
693
694    #[test]
695    fn parse_since_unknown_unit_rejected() {
696        assert!(parse_since("6x").is_err());
697    }
698
699    #[test]
700    fn parse_since_no_number_rejected() {
701        assert!(parse_since("months").is_err());
702    }
703
704    // ── parse_numstat_line ───────────────────────────────────────
705
706    #[test]
707    fn numstat_normal() {
708        let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
709        assert_eq!(a, 10);
710        assert_eq!(d, 5);
711        assert_eq!(p, "src/file.ts");
712    }
713
714    #[test]
715    fn numstat_binary_skipped() {
716        assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
717    }
718
719    #[test]
720    fn numstat_zero_lines() {
721        let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
722        assert_eq!(a, 0);
723        assert_eq!(d, 0);
724        assert_eq!(p, "src/empty.ts");
725    }
726
727    // ── compute_trend ────────────────────────────────────────────
728
729    #[test]
730    fn trend_empty_is_stable() {
731        assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
732    }
733
734    #[test]
735    fn trend_single_commit_is_stable() {
736        assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
737    }
738
739    #[test]
740    fn trend_accelerating() {
741        // 2 old commits, 5 recent commits
742        let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
743        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
744    }
745
746    #[test]
747    fn trend_cooling() {
748        // 5 old commits, 2 recent commits
749        let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
750        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
751    }
752
753    #[test]
754    fn trend_stable_even_distribution() {
755        // 3 old commits, 3 recent commits → ratio = 1.0 → stable
756        let timestamps = vec![100, 200, 300, 700, 800, 900];
757        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
758    }
759
760    #[test]
761    fn trend_same_timestamp_is_stable() {
762        let timestamps = vec![500, 500, 500];
763        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
764    }
765
766    // ── is_iso_date ──────────────────────────────────────────────
767
768    #[test]
769    fn iso_date_valid() {
770        assert!(is_iso_date("2025-06-01"));
771        assert!(is_iso_date("2025-12-31"));
772    }
773
774    #[test]
775    fn iso_date_with_time_rejected() {
776        // Only exact YYYY-MM-DD (10 chars) is accepted
777        assert!(!is_iso_date("2025-06-01T00:00:00"));
778    }
779
780    #[test]
781    fn iso_date_invalid() {
782        assert!(!is_iso_date("6months"));
783        assert!(!is_iso_date("2025"));
784        assert!(!is_iso_date("not-a-date"));
785        assert!(!is_iso_date("abcd-ef-gh"));
786    }
787
788    // ── Display ──────────────────────────────────────────────────
789
790    #[test]
791    fn trend_display() {
792        assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
793        assert_eq!(ChurnTrend::Stable.to_string(), "stable");
794        assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
795    }
796
797    // ── parse_git_log ───────────────────────────────────────────
798
799    #[test]
800    fn parse_git_log_single_commit() {
801        let root = Path::new("/project");
802        let output = "1700000000\n10\t5\tsrc/index.ts\n";
803        let (result, _) = parse_git_log(output, root);
804        assert_eq!(result.len(), 1);
805        let churn = &result[&PathBuf::from("/project/src/index.ts")];
806        assert_eq!(churn.commits, 1);
807        assert_eq!(churn.lines_added, 10);
808        assert_eq!(churn.lines_deleted, 5);
809    }
810
811    #[test]
812    fn parse_git_log_multiple_commits_same_file() {
813        let root = Path::new("/project");
814        let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
815        let (result, _) = parse_git_log(output, root);
816        assert_eq!(result.len(), 1);
817        let churn = &result[&PathBuf::from("/project/src/index.ts")];
818        assert_eq!(churn.commits, 2);
819        assert_eq!(churn.lines_added, 13);
820        assert_eq!(churn.lines_deleted, 7);
821    }
822
823    #[test]
824    fn parse_git_log_multiple_files() {
825        let root = Path::new("/project");
826        let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
827        let (result, _) = parse_git_log(output, root);
828        assert_eq!(result.len(), 2);
829        assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
830        assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
831    }
832
833    #[test]
834    fn parse_git_log_empty_output() {
835        let root = Path::new("/project");
836        let (result, _) = parse_git_log("", root);
837        assert!(result.is_empty());
838    }
839
840    #[test]
841    fn parse_git_log_skips_binary_files() {
842        let root = Path::new("/project");
843        let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
844        let (result, _) = parse_git_log(output, root);
845        assert_eq!(result.len(), 1);
846        assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
847    }
848
849    #[test]
850    fn parse_git_log_weighted_commits_are_positive() {
851        let root = Path::new("/project");
852        // Use a timestamp near "now" to ensure weight doesn't decay to zero
853        let now_secs = std::time::SystemTime::now()
854            .duration_since(std::time::UNIX_EPOCH)
855            .unwrap()
856            .as_secs();
857        let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
858        let (result, _) = parse_git_log(&output, root);
859        let churn = &result[&PathBuf::from("/project/src/a.ts")];
860        assert!(
861            churn.weighted_commits > 0.0,
862            "weighted_commits should be positive for recent commits"
863        );
864    }
865
866    // ── compute_trend edge cases ─────────────────────────────────
867
868    #[test]
869    fn trend_boundary_1_5x_ratio() {
870        // Exactly 1.5x ratio (3 recent : 2 old) → boundary between stable and accelerating
871        // midpoint = 100 + (1000-100)/2 = 550
872        // old: 100, 200 (2 timestamps <= 550)
873        // recent: 600, 800, 1000 (3 timestamps > 550)
874        // ratio = 3/2 = 1.5 — NOT > 1.5, so stable
875        let timestamps = vec![100, 200, 600, 800, 1000];
876        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
877    }
878
879    #[test]
880    fn trend_just_above_1_5x() {
881        // midpoint = 100 + (1000-100)/2 = 550
882        // old: 100 (1 timestamp <= 550)
883        // recent: 600, 800, 1000 (3 timestamps > 550)
884        // ratio = 3/1 = 3.0 → accelerating
885        let timestamps = vec![100, 600, 800, 1000];
886        assert_eq!(compute_trend(&timestamps), ChurnTrend::Accelerating);
887    }
888
889    #[test]
890    fn trend_boundary_0_67x_ratio() {
891        // Exactly 0.67x ratio → boundary between cooling and stable
892        // midpoint = 100 + (1000-100)/2 = 550
893        // old: 100, 200, 300 (3 timestamps <= 550)
894        // recent: 600, 1000 (2 timestamps > 550)
895        // ratio = 2/3 = 0.666... < 0.67 → cooling
896        let timestamps = vec![100, 200, 300, 600, 1000];
897        assert_eq!(compute_trend(&timestamps), ChurnTrend::Cooling);
898    }
899
900    #[test]
901    fn trend_two_timestamps_different() {
902        // Only 2 timestamps: midpoint = 100 + (200-100)/2 = 150
903        // old: 100 (1 timestamp <= 150)
904        // recent: 200 (1 timestamp > 150)
905        // ratio = 1/1 = 1.0 → stable
906        let timestamps = vec![100, 200];
907        assert_eq!(compute_trend(&timestamps), ChurnTrend::Stable);
908    }
909
910    // ── parse_since additional coverage ─────────────────────────
911
912    #[test]
913    fn parse_since_week_singular() {
914        let d = parse_since("1week").unwrap();
915        assert_eq!(d.git_after, "1 week ago");
916        assert_eq!(d.display, "1 week");
917    }
918
919    #[test]
920    fn parse_since_weeks_long() {
921        let d = parse_since("3weeks").unwrap();
922        assert_eq!(d.git_after, "3 weeks ago");
923        assert_eq!(d.display, "3 weeks");
924    }
925
926    #[test]
927    fn parse_since_days_long() {
928        let d = parse_since("30days").unwrap();
929        assert_eq!(d.git_after, "30 days ago");
930        assert_eq!(d.display, "30 days");
931    }
932
933    #[test]
934    fn parse_since_year_long() {
935        let d = parse_since("1year").unwrap();
936        assert_eq!(d.git_after, "1 year ago");
937        assert_eq!(d.display, "1 year");
938    }
939
940    #[test]
941    fn parse_since_overflow_number_rejected() {
942        // Number too large for u64
943        let result = parse_since("99999999999999999999d");
944        assert!(result.is_err());
945        let err = result.unwrap_err();
946        assert!(err.contains("invalid number"));
947    }
948
949    #[test]
950    fn parse_since_zero_days_rejected() {
951        assert!(parse_since("0d").is_err());
952    }
953
954    #[test]
955    fn parse_since_zero_weeks_rejected() {
956        assert!(parse_since("0w").is_err());
957    }
958
959    #[test]
960    fn parse_since_zero_years_rejected() {
961        assert!(parse_since("0y").is_err());
962    }
963
964    // ── parse_numstat_line additional coverage ──────────────────
965
966    #[test]
967    fn numstat_missing_path() {
968        // Only two tab-separated fields, no path
969        assert!(parse_numstat_line("10\t5").is_none());
970    }
971
972    #[test]
973    fn numstat_single_field() {
974        assert!(parse_numstat_line("10").is_none());
975    }
976
977    #[test]
978    fn numstat_empty_string() {
979        assert!(parse_numstat_line("").is_none());
980    }
981
982    #[test]
983    fn numstat_only_added_is_binary() {
984        // Added is "-" but deleted is numeric
985        assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
986    }
987
988    #[test]
989    fn numstat_only_deleted_is_binary() {
990        // Added is numeric but deleted is "-"
991        assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
992    }
993
994    #[test]
995    fn numstat_path_with_spaces() {
996        let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
997        assert_eq!(a, 3);
998        assert_eq!(d, 1);
999        assert_eq!(p, "path with spaces/file.ts");
1000    }
1001
1002    #[test]
1003    fn numstat_large_numbers() {
1004        let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1005        assert_eq!(a, 9999);
1006        assert_eq!(d, 8888);
1007        assert_eq!(p, "src/big.ts");
1008    }
1009
1010    // ── is_iso_date additional coverage ─────────────────────────
1011
1012    #[test]
1013    fn iso_date_wrong_separator_positions() {
1014        // Dashes in wrong positions
1015        assert!(!is_iso_date("20-25-0601"));
1016        assert!(!is_iso_date("202506-01-"));
1017    }
1018
1019    #[test]
1020    fn iso_date_too_short() {
1021        assert!(!is_iso_date("2025-06-0"));
1022    }
1023
1024    #[test]
1025    fn iso_date_letters_in_day() {
1026        assert!(!is_iso_date("2025-06-ab"));
1027    }
1028
1029    #[test]
1030    fn iso_date_letters_in_month() {
1031        assert!(!is_iso_date("2025-ab-01"));
1032    }
1033
1034    // ── split_number_unit additional coverage ───────────────────
1035
1036    #[test]
1037    fn split_number_unit_valid() {
1038        let (num, unit) = split_number_unit("42days").unwrap();
1039        assert_eq!(num, "42");
1040        assert_eq!(unit, "days");
1041    }
1042
1043    #[test]
1044    fn split_number_unit_single_digit() {
1045        let (num, unit) = split_number_unit("1m").unwrap();
1046        assert_eq!(num, "1");
1047        assert_eq!(unit, "m");
1048    }
1049
1050    #[test]
1051    fn split_number_unit_no_digits() {
1052        let err = split_number_unit("abc").unwrap_err();
1053        assert!(err.contains("must start with a number"));
1054    }
1055
1056    #[test]
1057    fn split_number_unit_no_unit() {
1058        let err = split_number_unit("123").unwrap_err();
1059        assert!(err.contains("requires a unit suffix"));
1060    }
1061
1062    // ── parse_git_log additional coverage ───────────────────────
1063
1064    #[test]
1065    fn parse_git_log_numstat_before_timestamp_uses_now() {
1066        let root = Path::new("/project");
1067        // No timestamp line before the numstat line
1068        let output = "10\t5\tsrc/no_ts.ts\n";
1069        let (result, _) = parse_git_log(output, root);
1070        assert_eq!(result.len(), 1);
1071        let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1072        assert_eq!(churn.commits, 1);
1073        assert_eq!(churn.lines_added, 10);
1074        assert_eq!(churn.lines_deleted, 5);
1075        // Without a timestamp, it falls back to now_secs, so weight should be ~1.0
1076        assert!(
1077            churn.weighted_commits > 0.9,
1078            "weight should be near 1.0 when timestamp defaults to now"
1079        );
1080    }
1081
1082    #[test]
1083    fn parse_git_log_whitespace_lines_ignored() {
1084        let root = Path::new("/project");
1085        let output = "  \n1700000000\n  \n10\t5\tsrc/a.ts\n  \n";
1086        let (result, _) = parse_git_log(output, root);
1087        assert_eq!(result.len(), 1);
1088    }
1089
1090    #[test]
1091    fn parse_git_log_trend_is_computed_per_file() {
1092        let root = Path::new("/project");
1093        // Two commits far apart for one file, recent-heavy for another
1094        let output = "\
10951000\n5\t1\tsrc/old.ts\n\
10962000\n3\t1\tsrc/old.ts\n\
10971000\n1\t0\tsrc/hot.ts\n\
10981800\n1\t0\tsrc/hot.ts\n\
10991900\n1\t0\tsrc/hot.ts\n\
11001950\n1\t0\tsrc/hot.ts\n\
11012000\n1\t0\tsrc/hot.ts\n";
1102        let (result, _) = parse_git_log(output, root);
1103        let old = &result[&PathBuf::from("/project/src/old.ts")];
1104        let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1105        assert_eq!(old.commits, 2);
1106        assert_eq!(hot.commits, 5);
1107        // hot.ts has 4 recent vs 1 old => accelerating
1108        assert_eq!(hot.trend, ChurnTrend::Accelerating);
1109    }
1110
1111    #[test]
1112    fn parse_git_log_weighted_decay_for_old_commits() {
1113        let root = Path::new("/project");
1114        let now = std::time::SystemTime::now()
1115            .duration_since(std::time::UNIX_EPOCH)
1116            .unwrap()
1117            .as_secs();
1118        // One commit from 180 days ago (two half-lives) should weigh ~0.25
1119        let old_ts = now - (180 * 86_400);
1120        let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1121        let (result, _) = parse_git_log(&output, root);
1122        let churn = &result[&PathBuf::from("/project/src/old.ts")];
1123        assert!(
1124            churn.weighted_commits < 0.5,
1125            "180-day-old commit should weigh ~0.25, got {}",
1126            churn.weighted_commits
1127        );
1128        assert!(
1129            churn.weighted_commits > 0.1,
1130            "180-day-old commit should weigh ~0.25, got {}",
1131            churn.weighted_commits
1132        );
1133    }
1134
1135    #[test]
1136    fn parse_git_log_path_stored_as_absolute() {
1137        let root = Path::new("/my/project");
1138        let output = "1700000000\n1\t0\tlib/utils.ts\n";
1139        let (result, _) = parse_git_log(output, root);
1140        let key = PathBuf::from("/my/project/lib/utils.ts");
1141        assert!(result.contains_key(&key));
1142        assert_eq!(result[&key].path, key);
1143    }
1144
1145    #[test]
1146    fn parse_git_log_weighted_commits_rounded() {
1147        let root = Path::new("/project");
1148        let now = std::time::SystemTime::now()
1149            .duration_since(std::time::UNIX_EPOCH)
1150            .unwrap()
1151            .as_secs();
1152        // A commit right now should weigh exactly 1.00
1153        let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1154        let (result, _) = parse_git_log(&output, root);
1155        let churn = &result[&PathBuf::from("/project/src/a.ts")];
1156        // Weighted commits are rounded to 2 decimal places
1157        let decimals = format!("{:.2}", churn.weighted_commits);
1158        assert_eq!(
1159            churn.weighted_commits.to_string().len(),
1160            decimals.len().min(churn.weighted_commits.to_string().len()),
1161            "weighted_commits should be rounded to at most 2 decimal places"
1162        );
1163    }
1164
1165    // ── ChurnTrend serde ────────────────────────────────────────
1166
1167    #[test]
1168    fn trend_serde_serialization() {
1169        assert_eq!(
1170            serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1171            "\"accelerating\""
1172        );
1173        assert_eq!(
1174            serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1175            "\"stable\""
1176        );
1177        assert_eq!(
1178            serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1179            "\"cooling\""
1180        );
1181    }
1182
1183    // ── parse_git_log: author tracking ──────────────────────────
1184
1185    #[test]
1186    fn parse_git_log_extracts_author_email() {
1187        let root = Path::new("/project");
1188        let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1189        let (result, pool) = parse_git_log(output, root);
1190        assert_eq!(pool, vec!["alice@example.com".to_string()]);
1191        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1192        assert_eq!(churn.authors.len(), 1);
1193        let alice = &churn.authors[&0];
1194        assert_eq!(alice.commits, 1);
1195        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1196        assert_eq!(alice.last_commit_ts, 1_700_000_000);
1197    }
1198
1199    #[test]
1200    fn parse_git_log_intern_dedupes_authors() {
1201        let root = Path::new("/project");
1202        let output = "\
12031700000000|alice@example.com
12041\t0\ta.ts
12051700100000|bob@example.com
12062\t1\tb.ts
12071700200000|alice@example.com
12083\t2\tc.ts
1209";
1210        let (_result, pool) = parse_git_log(output, root);
1211        assert_eq!(pool.len(), 2);
1212        assert!(pool.contains(&"alice@example.com".to_string()));
1213        assert!(pool.contains(&"bob@example.com".to_string()));
1214    }
1215
1216    #[test]
1217    fn parse_git_log_aggregates_per_author() {
1218        let root = Path::new("/project");
1219        // alice touches index.ts twice, bob once.
1220        let output = "\
12211700000000|alice@example.com
12221\t0\tsrc/index.ts
12231700100000|bob@example.com
12242\t0\tsrc/index.ts
12251700200000|alice@example.com
12261\t1\tsrc/index.ts
1227";
1228        let (result, pool) = parse_git_log(output, root);
1229        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1230        assert_eq!(churn.commits, 3);
1231        assert_eq!(churn.authors.len(), 2);
1232
1233        let alice_idx =
1234            u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1235        let alice = &churn.authors[&alice_idx];
1236        assert_eq!(alice.commits, 2);
1237        assert_eq!(alice.first_commit_ts, 1_700_000_000);
1238        assert_eq!(alice.last_commit_ts, 1_700_200_000);
1239    }
1240
1241    #[test]
1242    fn parse_git_log_legacy_bare_timestamp_still_parses() {
1243        // Backwards-compat path: header has no `|email` suffix.
1244        let root = Path::new("/project");
1245        let output = "1700000000\n10\t5\tsrc/index.ts\n";
1246        let (result, pool) = parse_git_log(output, root);
1247        assert!(pool.is_empty());
1248        let churn = &result[&PathBuf::from("/project/src/index.ts")];
1249        assert_eq!(churn.commits, 1);
1250        assert!(churn.authors.is_empty());
1251    }
1252
1253    // ── intern_author ──────────────────────────────────────────
1254
1255    #[test]
1256    fn intern_author_returns_existing_index() {
1257        let mut pool = Vec::new();
1258        let mut index = FxHashMap::default();
1259        let i1 = intern_author("alice@x", &mut pool, &mut index);
1260        let i2 = intern_author("alice@x", &mut pool, &mut index);
1261        assert_eq!(i1, i2);
1262        assert_eq!(pool.len(), 1);
1263    }
1264
1265    #[test]
1266    fn intern_author_assigns_sequential_indices() {
1267        let mut pool = Vec::new();
1268        let mut index = FxHashMap::default();
1269        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1270        assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1271        assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1272        assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1273    }
1274}