1use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::{Command, Output};
9use std::sync::OnceLock;
10
11use serde::{Deserialize, Serialize};
12
13pub type ChurnSpawnHook = fn(&mut Command) -> std::io::Result<Output>;
19
20static SPAWN_HOOK: OnceLock<ChurnSpawnHook> = OnceLock::new();
21
22pub fn set_spawn_hook(hook: ChurnSpawnHook) {
28 let _ = SPAWN_HOOK.set(hook);
29}
30
31fn spawn_output(command: &mut Command) -> std::io::Result<Output> {
32 if let Some(hook) = SPAWN_HOOK.get() {
33 hook(command)
34 } else {
35 command.output()
36 }
37}
38
39const SECS_PER_DAY: f64 = 86_400.0;
41
42const HALF_LIFE_DAYS: f64 = 90.0;
45
46const CHURN_FILE_SCHEMA: &str = "fallow-churn/v1";
48
49const MAX_CHURN_EVENTS: usize = 5_000_000;
55
56const MAX_FUTURE_TIMESTAMP_SECS: u64 = 365 * 24 * 60 * 60;
64
65#[derive(Debug, Clone)]
67pub struct SinceDuration {
68 pub git_after: String,
70 pub display: String,
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, bitcode::Encode, bitcode::Decode)]
76#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
77#[serde(rename_all = "snake_case")]
78pub enum ChurnTrend {
79 Accelerating,
81 Stable,
83 Cooling,
85}
86
87impl std::fmt::Display for ChurnTrend {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89 match self {
90 Self::Accelerating => write!(f, "accelerating"),
91 Self::Stable => write!(f, "stable"),
92 Self::Cooling => write!(f, "cooling"),
93 }
94 }
95}
96
97#[derive(Debug, Clone, Copy)]
102pub struct AuthorContribution {
103 pub commits: u32,
105 pub weighted_commits: f64,
107 pub first_commit_ts: u64,
109 pub last_commit_ts: u64,
111}
112
113#[derive(Debug, Clone)]
115pub struct FileChurn {
116 pub path: PathBuf,
118 pub commits: u32,
120 pub weighted_commits: f64,
122 pub lines_added: u32,
124 pub lines_deleted: u32,
126 pub trend: ChurnTrend,
128 pub authors: FxHashMap<u32, AuthorContribution>,
131}
132
133#[derive(Debug)]
135pub struct ChurnResult {
136 pub files: FxHashMap<PathBuf, FileChurn>,
138 pub shallow_clone: bool,
140 pub author_pool: Vec<String>,
143}
144
145pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
156 if is_iso_date(input) {
157 return Ok(SinceDuration {
158 git_after: input.to_string(),
159 display: input.to_string(),
160 });
161 }
162
163 let (num_str, unit) = split_number_unit(input)?;
164 let num: u64 = num_str
165 .parse()
166 .map_err(|_| format!("invalid number in --since: {input}"))?;
167
168 if num == 0 {
169 return Err("--since duration must be greater than 0".to_string());
170 }
171
172 match unit {
173 "d" | "day" | "days" => {
174 let s = if num == 1 { "" } else { "s" };
175 Ok(SinceDuration {
176 git_after: format!("{num} day{s} ago"),
177 display: format!("{num} day{s}"),
178 })
179 }
180 "w" | "week" | "weeks" => {
181 let s = if num == 1 { "" } else { "s" };
182 Ok(SinceDuration {
183 git_after: format!("{num} week{s} ago"),
184 display: format!("{num} week{s}"),
185 })
186 }
187 "m" | "month" | "months" => {
188 let s = if num == 1 { "" } else { "s" };
189 Ok(SinceDuration {
190 git_after: format!("{num} month{s} ago"),
191 display: format!("{num} month{s}"),
192 })
193 }
194 "y" | "year" | "years" => {
195 let s = if num == 1 { "" } else { "s" };
196 Ok(SinceDuration {
197 git_after: format!("{num} year{s} ago"),
198 display: format!("{num} year{s}"),
199 })
200 }
201 _ => Err(format!(
202 "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
203 )),
204 }
205}
206
207pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
211 let shallow = is_shallow_clone(root);
212 let state = analyze_churn_events(root, since, None)?;
213 Some(build_churn_result(state, shallow))
214}
215
216#[derive(Debug, Deserialize)]
222struct ChurnFileDoc {
223 schema: String,
224 #[serde(default)]
225 events: Vec<ChurnFileEvent>,
226}
227
228#[derive(Debug, Deserialize)]
233struct ChurnFileEvent {
234 path: String,
236 timestamp: u64,
238 #[serde(default)]
241 author: Option<String>,
242 added: u32,
244 deleted: u32,
246}
247
248pub fn analyze_churn_from_file(path: &Path, root: &Path) -> Result<ChurnResult, String> {
261 let raw = std::fs::read_to_string(path)
262 .map_err(|e| format!("failed to read churn file {}: {e}", path.display()))?;
263 let doc: ChurnFileDoc = serde_json::from_str(&raw)
264 .map_err(|e| format!("failed to parse churn file {}: {e}", path.display()))?;
265 if doc.schema != CHURN_FILE_SCHEMA {
266 return Err(format!(
267 "churn file {} declares schema \"{}\", expected \"{CHURN_FILE_SCHEMA}\"",
268 path.display(),
269 doc.schema
270 ));
271 }
272 if doc.events.len() > MAX_CHURN_EVENTS {
273 return Err(format!(
274 "churn file {} has {} events, exceeding the {MAX_CHURN_EVENTS} limit",
275 path.display(),
276 doc.events.len()
277 ));
278 }
279
280 let state = churn_event_state_from_doc(&doc, path, root)?;
281 Ok(build_churn_result(state, false))
282}
283
284fn churn_event_state_from_doc(
289 doc: &ChurnFileDoc,
290 path: &Path,
291 root: &Path,
292) -> Result<ChurnEventState, String> {
293 let mut builder = ChurnFileImportBuilder::new(path, root, churn_file_future_limit());
294
295 for event in &doc.events {
296 builder.push_event(event)?;
297 }
298
299 Ok(builder.finish())
300}
301
302fn churn_file_future_limit() -> u64 {
303 let now_secs = std::time::SystemTime::now()
304 .duration_since(std::time::UNIX_EPOCH)
305 .unwrap_or_default()
306 .as_secs();
307 now_secs.saturating_add(MAX_FUTURE_TIMESTAMP_SECS)
308}
309
310struct ChurnFileImportBuilder<'a> {
311 path: &'a Path,
312 root: &'a Path,
313 future_limit: u64,
314 files: FxHashMap<PathBuf, FileEvents>,
315 author_pool: Vec<String>,
316 author_index: FxHashMap<String, u32>,
317}
318
319impl<'a> ChurnFileImportBuilder<'a> {
320 fn new(path: &'a Path, root: &'a Path, future_limit: u64) -> Self {
321 Self {
322 path,
323 root,
324 future_limit,
325 files: FxHashMap::default(),
326 author_pool: Vec::new(),
327 author_index: FxHashMap::default(),
328 }
329 }
330
331 fn push_event(&mut self, event: &ChurnFileEvent) -> Result<(), String> {
332 let rel = normalize_churn_event_path(self.path, &event.path)?;
333 validate_churn_event_timestamp(self.path, event.timestamp, self.future_limit, &rel)?;
334
335 let abs_path = self.root.join(&rel);
336 let author_idx = self.intern_author(event.author.as_deref());
337 self.files
338 .entry(abs_path)
339 .or_insert_with(|| FileEvents { events: Vec::new() })
340 .events
341 .push(CachedCommitEvent {
342 timestamp: event.timestamp,
343 lines_added: event.added,
344 lines_deleted: event.deleted,
345 author_idx,
346 });
347 Ok(())
348 }
349
350 fn intern_author(&mut self, author: Option<&str>) -> Option<u32> {
351 author
352 .map(str::trim)
353 .filter(|email| !email.is_empty())
354 .map(|email| intern_author(email, &mut self.author_pool, &mut self.author_index))
355 }
356
357 fn finish(self) -> ChurnEventState {
358 ChurnEventState {
359 files: self.files,
360 author_pool: self.author_pool,
361 }
362 }
363}
364
365fn normalize_churn_event_path(path: &Path, event_path: &str) -> Result<String, String> {
366 let normalized = event_path.replace('\\', "/");
367 let rel = normalized.trim();
368 if rel.is_empty() {
369 return Err(format!(
370 "churn file {} has an event with an empty path",
371 path.display()
372 ));
373 }
374 Ok(rel.to_string())
375}
376
377fn validate_churn_event_timestamp(
378 path: &Path,
379 timestamp: u64,
380 future_limit: u64,
381 rel: &str,
382) -> Result<(), String> {
383 if timestamp <= future_limit {
384 return Ok(());
385 }
386
387 Err(format!(
388 "churn file {} has event timestamp {} for \"{rel}\" more than a year in the \
389 future; timestamps must be unix SECONDS (not milliseconds), UTC",
390 path.display(),
391 timestamp
392 ))
393}
394
395#[must_use]
397pub fn is_shallow_clone(root: &Path) -> bool {
398 let mut command = crate::spawn::git();
399 command
400 .args(["rev-parse", "--is-shallow-repository"])
401 .current_dir(root);
402 command.output().is_ok_and(|o| {
403 String::from_utf8_lossy(&o.stdout)
404 .trim()
405 .eq_ignore_ascii_case("true")
406 })
407}
408
409#[must_use]
411pub fn is_git_repo(root: &Path) -> bool {
412 let mut command = crate::spawn::git();
413 command
414 .args(["rev-parse", "--git-dir"])
415 .current_dir(root)
416 .stdout(std::process::Stdio::null())
417 .stderr(std::process::Stdio::null());
418 command.status().is_ok_and(|s| s.success())
419}
420
421const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
424
425const CHURN_CACHE_VERSION: u8 = 3;
429
430#[derive(Clone, bitcode::Encode, bitcode::Decode)]
432struct CachedCommitEvent {
433 timestamp: u64,
434 lines_added: u32,
435 lines_deleted: u32,
436 author_idx: Option<u32>,
437}
438
439#[derive(Clone, bitcode::Encode, bitcode::Decode)]
441struct CachedFileChurn {
442 path: String,
443 events: Vec<CachedCommitEvent>,
444}
445
446#[derive(Clone, bitcode::Encode, bitcode::Decode)]
448struct ChurnCache {
449 version: u8,
451 last_indexed_sha: String,
452 git_after: String,
453 files: Vec<CachedFileChurn>,
454 shallow_clone: bool,
455 author_pool: Vec<String>,
457}
458
459struct FileEvents {
461 events: Vec<CachedCommitEvent>,
462}
463
464struct ChurnEventState {
467 files: FxHashMap<PathBuf, FileEvents>,
468 author_pool: Vec<String>,
469}
470
471fn get_head_sha(root: &Path) -> Option<String> {
473 let mut command = crate::spawn::git();
474 command.args(["rev-parse", "HEAD"]).current_dir(root);
475 command
476 .output()
477 .ok()
478 .filter(|o| o.status.success())
479 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
480}
481
482fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
484 let mut command = crate::spawn::git();
485 command
486 .args(["merge-base", "--is-ancestor", ancestor, descendant])
487 .current_dir(root);
488 command.status().is_ok_and(|s| s.success())
489}
490
491fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
494 let cache_file = cache_dir.join("churn.bin");
495 let data = std::fs::read(&cache_file).ok()?;
496 if data.len() > MAX_CHURN_CACHE_SIZE {
497 return None;
498 }
499 let cache: ChurnCache = bitcode::decode(&data).ok()?;
500 if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
501 return None;
502 }
503 Some(cache)
504}
505
506fn save_churn_cache(
508 cache_dir: &Path,
509 last_indexed_sha: &str,
510 git_after: &str,
511 state: &ChurnEventState,
512 shallow_clone: bool,
513) {
514 let files: Vec<CachedFileChurn> = state
515 .files
516 .iter()
517 .map(|f| CachedFileChurn {
518 path: f.0.to_string_lossy().to_string(),
519 events: f.1.events.clone(),
520 })
521 .collect();
522 let cache = ChurnCache {
523 version: CHURN_CACHE_VERSION,
524 last_indexed_sha: last_indexed_sha.to_string(),
525 git_after: git_after.to_string(),
526 files,
527 shallow_clone,
528 author_pool: state.author_pool.clone(),
529 };
530 let _ = std::fs::create_dir_all(cache_dir);
531 let data = bitcode::encode(&cache);
532 let tmp = cache_dir.join("churn.bin.tmp");
533 if std::fs::write(&tmp, data).is_ok() {
534 let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
535 }
536}
537
538pub fn analyze_churn_cached(
546 root: &Path,
547 since: &SinceDuration,
548 cache_dir: &Path,
549 no_cache: bool,
550) -> Option<(ChurnResult, bool)> {
551 let head_sha = get_head_sha(root)?;
552
553 if !no_cache && let Some(result) = try_reuse_churn_cache(root, since, cache_dir, &head_sha) {
554 return Some((result, true));
555 }
556
557 analyze_fresh_churn(root, since, cache_dir, no_cache, &head_sha).map(|result| (result, false))
558}
559
560fn try_reuse_churn_cache(
561 root: &Path,
562 since: &SinceDuration,
563 cache_dir: &Path,
564 head_sha: &str,
565) -> Option<ChurnResult> {
566 let cache = load_churn_cache(cache_dir, &since.git_after)?;
567 if cache.last_indexed_sha == head_sha {
568 let shallow_clone = cache.shallow_clone;
569 return Some(build_churn_result(cache.into_event_state(), shallow_clone));
570 }
571
572 if !is_ancestor(root, &cache.last_indexed_sha, head_sha) {
573 return None;
574 }
575
576 extend_churn_cache(root, since, cache_dir, head_sha, cache)
577}
578
579fn extend_churn_cache(
580 root: &Path,
581 since: &SinceDuration,
582 cache_dir: &Path,
583 head_sha: &str,
584 cache: ChurnCache,
585) -> Option<ChurnResult> {
586 let shallow_clone = is_shallow_clone(root);
587 let range = format!("{}..HEAD", cache.last_indexed_sha);
588 let delta = analyze_churn_events(root, since, Some(&range))?;
589 let mut state = cache.into_event_state();
590 merge_churn_states(&mut state, delta);
591 save_churn_cache(cache_dir, head_sha, &since.git_after, &state, shallow_clone);
592 Some(build_churn_result(state, shallow_clone))
593}
594
595fn analyze_fresh_churn(
596 root: &Path,
597 since: &SinceDuration,
598 cache_dir: &Path,
599 no_cache: bool,
600 head_sha: &str,
601) -> Option<ChurnResult> {
602 let shallow_clone = is_shallow_clone(root);
603 let state = analyze_churn_events(root, since, None)?;
604 if !no_cache {
605 save_churn_cache(cache_dir, head_sha, &since.git_after, &state, shallow_clone);
606 }
607
608 Some(build_churn_result(state, shallow_clone))
609}
610
611impl ChurnCache {
612 fn into_event_state(self) -> ChurnEventState {
613 let files = self
614 .files
615 .into_iter()
616 .map(|entry| {
617 (
618 PathBuf::from(entry.path),
619 FileEvents {
620 events: entry.events,
621 },
622 )
623 })
624 .collect();
625 ChurnEventState {
626 files,
627 author_pool: self.author_pool,
628 }
629 }
630}
631
632fn analyze_churn_events(
634 root: &Path,
635 since: &SinceDuration,
636 revision_range: Option<&str>,
637) -> Option<ChurnEventState> {
638 let mut command = crate::spawn::git();
639 command.arg("log");
640 if let Some(range) = revision_range {
641 command.arg(range);
642 }
643 command
644 .args([
645 "--numstat",
646 "--no-merges",
647 "--no-renames",
648 "--use-mailmap",
649 "--format=format:%at|%ae",
650 &format!("--after={}", since.git_after),
651 ])
652 .current_dir(root);
653
654 let output = match spawn_output(&mut command) {
655 Ok(o) => o,
656 Err(e) => {
657 tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
658 return None;
659 }
660 };
661
662 if !output.status.success() {
663 let stderr = String::from_utf8_lossy(&output.stderr);
664 tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
665 return None;
666 }
667
668 let stdout = String::from_utf8_lossy(&output.stdout);
669 Some(parse_git_log_events(&stdout, root))
670}
671
672fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
674 let mut base_author_index: FxHashMap<String, u32> = base
675 .author_pool
676 .iter()
677 .enumerate()
678 .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
679 .collect();
680
681 let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
682 for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
683 let Ok(old_idx) = u32::try_from(old_idx) else {
684 continue;
685 };
686 let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
687 author_mapping.insert(old_idx, new_idx);
688 }
689
690 for (path, mut file) in delta.files {
691 for event in &mut file.events {
692 event.author_idx = event
693 .author_idx
694 .and_then(|idx| author_mapping.get(&idx).copied());
695 }
696 base.files
697 .entry(path)
698 .and_modify(|existing| existing.events.append(&mut file.events))
699 .or_insert(file);
700 }
701}
702
703fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
705 let now_secs = std::time::SystemTime::now()
706 .duration_since(std::time::UNIX_EPOCH)
707 .unwrap_or_default()
708 .as_secs();
709
710 let mut parser = GitLogEventParser::new(root, now_secs);
711
712 for line in stdout.lines() {
713 parser.consume_line(line);
714 }
715
716 parser.finish()
717}
718
719struct GitLogEventParser<'a> {
720 root: &'a Path,
721 now_secs: u64,
722 files: FxHashMap<PathBuf, FileEvents>,
723 author_pool: Vec<String>,
724 author_index: FxHashMap<String, u32>,
725 current_timestamp: Option<u64>,
726 current_author_idx: Option<u32>,
727}
728
729impl<'a> GitLogEventParser<'a> {
730 fn new(root: &'a Path, now_secs: u64) -> Self {
731 Self {
732 root,
733 now_secs,
734 files: FxHashMap::default(),
735 author_pool: Vec::new(),
736 author_index: FxHashMap::default(),
737 current_timestamp: None,
738 current_author_idx: None,
739 }
740 }
741
742 fn consume_line(&mut self, line: &str) {
743 let line = line.trim();
744 if line.is_empty() {
745 return;
746 }
747
748 if self.record_commit_header(line) {
749 return;
750 }
751 if self.record_legacy_timestamp(line) {
752 return;
753 }
754 self.record_numstat(line);
755 }
756
757 fn record_commit_header(&mut self, line: &str) -> bool {
758 let Some((ts_str, email)) = line.split_once('|') else {
759 return false;
760 };
761 let Ok(ts) = ts_str.parse::<u64>() else {
762 return false;
763 };
764
765 self.current_timestamp = Some(ts);
766 self.current_author_idx = Some(intern_author(
767 email,
768 &mut self.author_pool,
769 &mut self.author_index,
770 ));
771 true
772 }
773
774 fn record_legacy_timestamp(&mut self, line: &str) -> bool {
775 let Ok(ts) = line.parse::<u64>() else {
776 return false;
777 };
778
779 self.current_timestamp = Some(ts);
780 self.current_author_idx = None;
781 true
782 }
783
784 fn record_numstat(&mut self, line: &str) {
785 let Some((added, deleted, path)) = parse_numstat_line(line) else {
786 return;
787 };
788
789 let ts = self.current_timestamp.unwrap_or(self.now_secs);
790 self.files
791 .entry(self.root.join(path))
792 .or_insert_with(|| FileEvents { events: Vec::new() })
793 .events
794 .push(CachedCommitEvent {
795 timestamp: ts,
796 lines_added: added,
797 lines_deleted: deleted,
798 author_idx: self.current_author_idx,
799 });
800 }
801
802 fn finish(self) -> ChurnEventState {
803 ChurnEventState {
804 files: self.files,
805 author_pool: self.author_pool,
806 }
807 }
808}
809
810#[expect(
813 clippy::cast_possible_truncation,
814 reason = "commit count per file is bounded by git history depth"
815)]
816fn aggregate_file_churn(path: PathBuf, file: FileEvents, now_secs: u64) -> FileChurn {
817 let mut timestamps = Vec::with_capacity(file.events.len());
818 let mut weighted_commits = 0.0;
819 let mut lines_added = 0;
820 let mut lines_deleted = 0;
821 let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
822
823 for event in file.events {
824 timestamps.push(event.timestamp);
825 let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
826 let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
827 weighted_commits += weight;
828 lines_added += event.lines_added;
829 lines_deleted += event.lines_deleted;
830 accumulate_author(&mut authors, event.author_idx, weight, event.timestamp);
831 }
832
833 let commits = timestamps.len() as u32;
834 let trend = compute_trend(×tamps);
835 for c in authors.values_mut() {
836 c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
837 }
838 FileChurn {
839 path,
840 commits,
841 weighted_commits: (weighted_commits * 100.0).round() / 100.0,
842 lines_added,
843 lines_deleted,
844 trend,
845 authors,
846 }
847}
848
849fn accumulate_author(
851 authors: &mut FxHashMap<u32, AuthorContribution>,
852 author_idx: Option<u32>,
853 weight: f64,
854 timestamp: u64,
855) {
856 let Some(idx) = author_idx else {
857 return;
858 };
859 authors
860 .entry(idx)
861 .and_modify(|c| {
862 c.commits += 1;
863 c.weighted_commits += weight;
864 c.first_commit_ts = c.first_commit_ts.min(timestamp);
865 c.last_commit_ts = c.last_commit_ts.max(timestamp);
866 })
867 .or_insert(AuthorContribution {
868 commits: 1,
869 weighted_commits: weight,
870 first_commit_ts: timestamp,
871 last_commit_ts: timestamp,
872 });
873}
874
875fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
877 let now_secs = std::time::SystemTime::now()
878 .duration_since(std::time::UNIX_EPOCH)
879 .unwrap_or_default()
880 .as_secs();
881
882 let files = state
883 .files
884 .into_iter()
885 .map(|(path, file)| {
886 let churn = aggregate_file_churn(path.clone(), file, now_secs);
887 (path, churn)
888 })
889 .collect();
890
891 ChurnResult {
892 files,
893 shallow_clone,
894 author_pool: state.author_pool,
895 }
896}
897
898#[cfg(test)]
903fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
904 let result = build_churn_result(parse_git_log_events(stdout, root), false);
905 (result.files, result.author_pool)
906}
907
908fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
910 if let Some(&idx) = index.get(email) {
911 return idx;
912 }
913 #[expect(
914 clippy::cast_possible_truncation,
915 reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
916 )]
917 let idx = pool.len() as u32;
918 let owned = email.to_string();
919 index.insert(owned.clone(), idx);
920 pool.push(owned);
921 idx
922}
923
924fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
927 let mut parts = line.splitn(3, '\t');
928 let added_str = parts.next()?;
929 let deleted_str = parts.next()?;
930 let path = parts.next()?;
931
932 let added: u32 = added_str.parse().ok()?;
933 let deleted: u32 = deleted_str.parse().ok()?;
934
935 Some((added, deleted, path))
936}
937
938fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
946 if timestamps.len() < 2 {
947 return ChurnTrend::Stable;
948 }
949
950 let min_ts = timestamps.iter().copied().min().unwrap_or(0);
951 let max_ts = timestamps.iter().copied().max().unwrap_or(0);
952
953 if max_ts == min_ts {
954 return ChurnTrend::Stable;
955 }
956
957 let midpoint = min_ts + (max_ts - min_ts) / 2;
958 let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
959 let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
960
961 if older < 1.0 {
962 return ChurnTrend::Stable;
963 }
964
965 let ratio = recent / older;
966 if ratio > 1.5 {
967 ChurnTrend::Accelerating
968 } else if ratio < 0.67 {
969 ChurnTrend::Cooling
970 } else {
971 ChurnTrend::Stable
972 }
973}
974
975fn is_iso_date(input: &str) -> bool {
976 input.len() == 10
977 && input.as_bytes().get(4) == Some(&b'-')
978 && input.as_bytes().get(7) == Some(&b'-')
979 && input[..4].bytes().all(|b| b.is_ascii_digit())
980 && input[5..7].bytes().all(|b| b.is_ascii_digit())
981 && input[8..10].bytes().all(|b| b.is_ascii_digit())
982}
983
984fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
985 let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
986 format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
987 })?;
988 if pos == 0 {
989 return Err(format!(
990 "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
991 ));
992 }
993 Ok((&input[..pos], &input[pos..]))
994}
995
996#[cfg(test)]
997mod tests {
998 use super::*;
999
1000 #[test]
1001 fn parse_since_months_short() {
1002 let d = parse_since("6m").unwrap();
1003 assert_eq!(d.git_after, "6 months ago");
1004 assert_eq!(d.display, "6 months");
1005 }
1006
1007 #[test]
1008 fn parse_since_months_long() {
1009 let d = parse_since("6months").unwrap();
1010 assert_eq!(d.git_after, "6 months ago");
1011 assert_eq!(d.display, "6 months");
1012 }
1013
1014 #[test]
1015 fn parse_since_days() {
1016 let d = parse_since("90d").unwrap();
1017 assert_eq!(d.git_after, "90 days ago");
1018 assert_eq!(d.display, "90 days");
1019 }
1020
1021 #[test]
1022 fn parse_since_year_singular() {
1023 let d = parse_since("1y").unwrap();
1024 assert_eq!(d.git_after, "1 year ago");
1025 assert_eq!(d.display, "1 year");
1026 }
1027
1028 #[test]
1029 fn parse_since_years_plural() {
1030 let d = parse_since("2years").unwrap();
1031 assert_eq!(d.git_after, "2 years ago");
1032 assert_eq!(d.display, "2 years");
1033 }
1034
1035 #[test]
1036 fn parse_since_weeks() {
1037 let d = parse_since("2w").unwrap();
1038 assert_eq!(d.git_after, "2 weeks ago");
1039 assert_eq!(d.display, "2 weeks");
1040 }
1041
1042 #[test]
1043 fn parse_since_iso_date() {
1044 let d = parse_since("2025-06-01").unwrap();
1045 assert_eq!(d.git_after, "2025-06-01");
1046 assert_eq!(d.display, "2025-06-01");
1047 }
1048
1049 #[test]
1050 fn parse_since_month_singular() {
1051 let d = parse_since("1month").unwrap();
1052 assert_eq!(d.display, "1 month");
1053 }
1054
1055 #[test]
1056 fn parse_since_day_singular() {
1057 let d = parse_since("1day").unwrap();
1058 assert_eq!(d.display, "1 day");
1059 }
1060
1061 #[test]
1062 fn parse_since_zero_rejected() {
1063 assert!(parse_since("0m").is_err());
1064 }
1065
1066 #[test]
1067 fn parse_since_no_unit_rejected() {
1068 assert!(parse_since("90").is_err());
1069 }
1070
1071 #[test]
1072 fn parse_since_unknown_unit_rejected() {
1073 assert!(parse_since("6x").is_err());
1074 }
1075
1076 #[test]
1077 fn parse_since_no_number_rejected() {
1078 assert!(parse_since("months").is_err());
1079 }
1080
1081 #[test]
1082 fn numstat_normal() {
1083 let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
1084 assert_eq!(a, 10);
1085 assert_eq!(d, 5);
1086 assert_eq!(p, "src/file.ts");
1087 }
1088
1089 #[test]
1090 fn numstat_binary_skipped() {
1091 assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
1092 }
1093
1094 #[test]
1095 fn numstat_zero_lines() {
1096 let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
1097 assert_eq!(a, 0);
1098 assert_eq!(d, 0);
1099 assert_eq!(p, "src/empty.ts");
1100 }
1101
1102 #[test]
1103 fn trend_empty_is_stable() {
1104 assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
1105 }
1106
1107 #[test]
1108 fn trend_single_commit_is_stable() {
1109 assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
1110 }
1111
1112 #[test]
1113 fn trend_accelerating() {
1114 let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
1115 assert_eq!(compute_trend(×tamps), ChurnTrend::Accelerating);
1116 }
1117
1118 #[test]
1119 fn trend_cooling() {
1120 let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
1121 assert_eq!(compute_trend(×tamps), ChurnTrend::Cooling);
1122 }
1123
1124 #[test]
1125 fn trend_stable_even_distribution() {
1126 let timestamps = vec![100, 200, 300, 700, 800, 900];
1127 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1128 }
1129
1130 #[test]
1131 fn trend_same_timestamp_is_stable() {
1132 let timestamps = vec![500, 500, 500];
1133 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1134 }
1135
1136 #[test]
1137 fn iso_date_valid() {
1138 assert!(is_iso_date("2025-06-01"));
1139 assert!(is_iso_date("2025-12-31"));
1140 }
1141
1142 #[test]
1143 fn iso_date_with_time_rejected() {
1144 assert!(!is_iso_date("2025-06-01T00:00:00"));
1145 }
1146
1147 #[test]
1148 fn iso_date_invalid() {
1149 assert!(!is_iso_date("6months"));
1150 assert!(!is_iso_date("2025"));
1151 assert!(!is_iso_date("not-a-date"));
1152 assert!(!is_iso_date("abcd-ef-gh"));
1153 }
1154
1155 #[test]
1156 fn trend_display() {
1157 assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
1158 assert_eq!(ChurnTrend::Stable.to_string(), "stable");
1159 assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
1160 }
1161
1162 #[test]
1163 fn parse_git_log_single_commit() {
1164 let root = Path::new("/project");
1165 let output = "1700000000\n10\t5\tsrc/index.ts\n";
1166 let (result, _) = parse_git_log(output, root);
1167 assert_eq!(result.len(), 1);
1168 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1169 assert_eq!(churn.commits, 1);
1170 assert_eq!(churn.lines_added, 10);
1171 assert_eq!(churn.lines_deleted, 5);
1172 }
1173
1174 #[test]
1175 fn parse_git_log_multiple_commits_same_file() {
1176 let root = Path::new("/project");
1177 let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
1178 let (result, _) = parse_git_log(output, root);
1179 assert_eq!(result.len(), 1);
1180 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1181 assert_eq!(churn.commits, 2);
1182 assert_eq!(churn.lines_added, 13);
1183 assert_eq!(churn.lines_deleted, 7);
1184 }
1185
1186 #[test]
1187 fn parse_git_log_multiple_files() {
1188 let root = Path::new("/project");
1189 let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
1190 let (result, _) = parse_git_log(output, root);
1191 assert_eq!(result.len(), 2);
1192 assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
1193 assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
1194 }
1195
1196 #[test]
1197 fn parse_git_log_empty_output() {
1198 let root = Path::new("/project");
1199 let (result, _) = parse_git_log("", root);
1200 assert!(result.is_empty());
1201 }
1202
1203 #[test]
1204 fn parse_git_log_skips_binary_files() {
1205 let root = Path::new("/project");
1206 let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
1207 let (result, _) = parse_git_log(output, root);
1208 assert_eq!(result.len(), 1);
1209 assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
1210 }
1211
1212 #[test]
1213 fn parse_git_log_weighted_commits_are_positive() {
1214 let root = Path::new("/project");
1215 let now_secs = std::time::SystemTime::now()
1216 .duration_since(std::time::UNIX_EPOCH)
1217 .unwrap()
1218 .as_secs();
1219 let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
1220 let (result, _) = parse_git_log(&output, root);
1221 let churn = &result[&PathBuf::from("/project/src/a.ts")];
1222 assert!(
1223 churn.weighted_commits > 0.0,
1224 "weighted_commits should be positive for recent commits"
1225 );
1226 }
1227
1228 #[test]
1229 fn trend_boundary_1_5x_ratio() {
1230 let timestamps = vec![100, 200, 600, 800, 1000];
1231 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1232 }
1233
1234 #[test]
1235 fn trend_just_above_1_5x() {
1236 let timestamps = vec![100, 600, 800, 1000];
1237 assert_eq!(compute_trend(×tamps), ChurnTrend::Accelerating);
1238 }
1239
1240 #[test]
1241 fn trend_boundary_0_67x_ratio() {
1242 let timestamps = vec![100, 200, 300, 600, 1000];
1243 assert_eq!(compute_trend(×tamps), ChurnTrend::Cooling);
1244 }
1245
1246 #[test]
1247 fn trend_two_timestamps_different() {
1248 let timestamps = vec![100, 200];
1249 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1250 }
1251
1252 #[test]
1253 fn parse_since_week_singular() {
1254 let d = parse_since("1week").unwrap();
1255 assert_eq!(d.git_after, "1 week ago");
1256 assert_eq!(d.display, "1 week");
1257 }
1258
1259 #[test]
1260 fn parse_since_weeks_long() {
1261 let d = parse_since("3weeks").unwrap();
1262 assert_eq!(d.git_after, "3 weeks ago");
1263 assert_eq!(d.display, "3 weeks");
1264 }
1265
1266 #[test]
1267 fn parse_since_days_long() {
1268 let d = parse_since("30days").unwrap();
1269 assert_eq!(d.git_after, "30 days ago");
1270 assert_eq!(d.display, "30 days");
1271 }
1272
1273 #[test]
1274 fn parse_since_year_long() {
1275 let d = parse_since("1year").unwrap();
1276 assert_eq!(d.git_after, "1 year ago");
1277 assert_eq!(d.display, "1 year");
1278 }
1279
1280 #[test]
1281 fn parse_since_overflow_number_rejected() {
1282 let result = parse_since("99999999999999999999d");
1283 assert!(result.is_err());
1284 let err = result.unwrap_err();
1285 assert!(err.contains("invalid number"));
1286 }
1287
1288 #[test]
1289 fn parse_since_zero_days_rejected() {
1290 assert!(parse_since("0d").is_err());
1291 }
1292
1293 #[test]
1294 fn parse_since_zero_weeks_rejected() {
1295 assert!(parse_since("0w").is_err());
1296 }
1297
1298 #[test]
1299 fn parse_since_zero_years_rejected() {
1300 assert!(parse_since("0y").is_err());
1301 }
1302
1303 #[test]
1304 fn numstat_missing_path() {
1305 assert!(parse_numstat_line("10\t5").is_none());
1306 }
1307
1308 #[test]
1309 fn numstat_single_field() {
1310 assert!(parse_numstat_line("10").is_none());
1311 }
1312
1313 #[test]
1314 fn numstat_empty_string() {
1315 assert!(parse_numstat_line("").is_none());
1316 }
1317
1318 #[test]
1319 fn numstat_only_added_is_binary() {
1320 assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1321 }
1322
1323 #[test]
1324 fn numstat_only_deleted_is_binary() {
1325 assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1326 }
1327
1328 #[test]
1329 fn numstat_path_with_spaces() {
1330 let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1331 assert_eq!(a, 3);
1332 assert_eq!(d, 1);
1333 assert_eq!(p, "path with spaces/file.ts");
1334 }
1335
1336 #[test]
1337 fn numstat_large_numbers() {
1338 let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1339 assert_eq!(a, 9999);
1340 assert_eq!(d, 8888);
1341 assert_eq!(p, "src/big.ts");
1342 }
1343
1344 #[test]
1345 fn iso_date_wrong_separator_positions() {
1346 assert!(!is_iso_date("20-25-0601"));
1347 assert!(!is_iso_date("202506-01-"));
1348 }
1349
1350 #[test]
1351 fn iso_date_too_short() {
1352 assert!(!is_iso_date("2025-06-0"));
1353 }
1354
1355 #[test]
1356 fn iso_date_letters_in_day() {
1357 assert!(!is_iso_date("2025-06-ab"));
1358 }
1359
1360 #[test]
1361 fn iso_date_letters_in_month() {
1362 assert!(!is_iso_date("2025-ab-01"));
1363 }
1364
1365 #[test]
1366 fn split_number_unit_valid() {
1367 let (num, unit) = split_number_unit("42days").unwrap();
1368 assert_eq!(num, "42");
1369 assert_eq!(unit, "days");
1370 }
1371
1372 #[test]
1373 fn split_number_unit_single_digit() {
1374 let (num, unit) = split_number_unit("1m").unwrap();
1375 assert_eq!(num, "1");
1376 assert_eq!(unit, "m");
1377 }
1378
1379 #[test]
1380 fn split_number_unit_no_digits() {
1381 let err = split_number_unit("abc").unwrap_err();
1382 assert!(err.contains("must start with a number"));
1383 }
1384
1385 #[test]
1386 fn split_number_unit_no_unit() {
1387 let err = split_number_unit("123").unwrap_err();
1388 assert!(err.contains("requires a unit suffix"));
1389 }
1390
1391 #[test]
1392 fn parse_git_log_numstat_before_timestamp_uses_now() {
1393 let root = Path::new("/project");
1394 let output = "10\t5\tsrc/no_ts.ts\n";
1395 let (result, _) = parse_git_log(output, root);
1396 assert_eq!(result.len(), 1);
1397 let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1398 assert_eq!(churn.commits, 1);
1399 assert_eq!(churn.lines_added, 10);
1400 assert_eq!(churn.lines_deleted, 5);
1401 assert!(
1402 churn.weighted_commits > 0.9,
1403 "weight should be near 1.0 when timestamp defaults to now"
1404 );
1405 }
1406
1407 #[test]
1408 fn parse_git_log_whitespace_lines_ignored() {
1409 let root = Path::new("/project");
1410 let output = " \n1700000000\n \n10\t5\tsrc/a.ts\n \n";
1411 let (result, _) = parse_git_log(output, root);
1412 assert_eq!(result.len(), 1);
1413 }
1414
1415 #[test]
1416 fn parse_git_log_trend_is_computed_per_file() {
1417 let root = Path::new("/project");
1418 let output = "\
14191000\n5\t1\tsrc/old.ts\n\
14202000\n3\t1\tsrc/old.ts\n\
14211000\n1\t0\tsrc/hot.ts\n\
14221800\n1\t0\tsrc/hot.ts\n\
14231900\n1\t0\tsrc/hot.ts\n\
14241950\n1\t0\tsrc/hot.ts\n\
14252000\n1\t0\tsrc/hot.ts\n";
1426 let (result, _) = parse_git_log(output, root);
1427 let old = &result[&PathBuf::from("/project/src/old.ts")];
1428 let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1429 assert_eq!(old.commits, 2);
1430 assert_eq!(hot.commits, 5);
1431 assert_eq!(hot.trend, ChurnTrend::Accelerating);
1432 }
1433
1434 #[test]
1435 fn parse_git_log_weighted_decay_for_old_commits() {
1436 let root = Path::new("/project");
1437 let now = std::time::SystemTime::now()
1438 .duration_since(std::time::UNIX_EPOCH)
1439 .unwrap()
1440 .as_secs();
1441 let old_ts = now - (180 * 86_400);
1442 let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1443 let (result, _) = parse_git_log(&output, root);
1444 let churn = &result[&PathBuf::from("/project/src/old.ts")];
1445 assert!(
1446 churn.weighted_commits < 0.5,
1447 "180-day-old commit should weigh ~0.25, got {}",
1448 churn.weighted_commits
1449 );
1450 assert!(
1451 churn.weighted_commits > 0.1,
1452 "180-day-old commit should weigh ~0.25, got {}",
1453 churn.weighted_commits
1454 );
1455 }
1456
1457 #[test]
1458 fn parse_git_log_path_stored_as_absolute() {
1459 let root = Path::new("/my/project");
1460 let output = "1700000000\n1\t0\tlib/utils.ts\n";
1461 let (result, _) = parse_git_log(output, root);
1462 let key = PathBuf::from("/my/project/lib/utils.ts");
1463 assert!(result.contains_key(&key));
1464 assert_eq!(result[&key].path, key);
1465 }
1466
1467 #[test]
1468 fn parse_git_log_weighted_commits_rounded() {
1469 let root = Path::new("/project");
1470 let now = std::time::SystemTime::now()
1471 .duration_since(std::time::UNIX_EPOCH)
1472 .unwrap()
1473 .as_secs();
1474 let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1475 let (result, _) = parse_git_log(&output, root);
1476 let churn = &result[&PathBuf::from("/project/src/a.ts")];
1477 let decimals = format!("{:.2}", churn.weighted_commits);
1478 assert_eq!(
1479 churn.weighted_commits.to_string().len(),
1480 decimals.len().min(churn.weighted_commits.to_string().len()),
1481 "weighted_commits should be rounded to at most 2 decimal places"
1482 );
1483 }
1484
1485 #[test]
1486 fn trend_serde_serialization() {
1487 assert_eq!(
1488 serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1489 "\"accelerating\""
1490 );
1491 assert_eq!(
1492 serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1493 "\"stable\""
1494 );
1495 assert_eq!(
1496 serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1497 "\"cooling\""
1498 );
1499 }
1500
1501 #[test]
1502 fn parse_git_log_extracts_author_email() {
1503 let root = Path::new("/project");
1504 let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1505 let (result, pool) = parse_git_log(output, root);
1506 assert_eq!(pool, vec!["alice@example.com".to_string()]);
1507 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1508 assert_eq!(churn.authors.len(), 1);
1509 let alice = &churn.authors[&0];
1510 assert_eq!(alice.commits, 1);
1511 assert_eq!(alice.first_commit_ts, 1_700_000_000);
1512 assert_eq!(alice.last_commit_ts, 1_700_000_000);
1513 }
1514
1515 #[test]
1516 fn parse_git_log_intern_dedupes_authors() {
1517 let root = Path::new("/project");
1518 let output = "\
15191700000000|alice@example.com
15201\t0\ta.ts
15211700100000|bob@example.com
15222\t1\tb.ts
15231700200000|alice@example.com
15243\t2\tc.ts
1525";
1526 let (_result, pool) = parse_git_log(output, root);
1527 assert_eq!(pool.len(), 2);
1528 assert!(pool.contains(&"alice@example.com".to_string()));
1529 assert!(pool.contains(&"bob@example.com".to_string()));
1530 }
1531
1532 #[test]
1533 fn parse_git_log_aggregates_per_author() {
1534 let root = Path::new("/project");
1535 let output = "\
15361700000000|alice@example.com
15371\t0\tsrc/index.ts
15381700100000|bob@example.com
15392\t0\tsrc/index.ts
15401700200000|alice@example.com
15411\t1\tsrc/index.ts
1542";
1543 let (result, pool) = parse_git_log(output, root);
1544 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1545 assert_eq!(churn.commits, 3);
1546 assert_eq!(churn.authors.len(), 2);
1547
1548 let alice_idx =
1549 u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1550 let alice = &churn.authors[&alice_idx];
1551 assert_eq!(alice.commits, 2);
1552 assert_eq!(alice.first_commit_ts, 1_700_000_000);
1553 assert_eq!(alice.last_commit_ts, 1_700_200_000);
1554 }
1555
1556 #[test]
1557 fn parse_git_log_legacy_bare_timestamp_still_parses() {
1558 let root = Path::new("/project");
1559 let output = "1700000000\n10\t5\tsrc/index.ts\n";
1560 let (result, pool) = parse_git_log(output, root);
1561 assert!(pool.is_empty());
1562 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1563 assert_eq!(churn.commits, 1);
1564 assert!(churn.authors.is_empty());
1565 }
1566
1567 #[test]
1568 fn intern_author_returns_existing_index() {
1569 let mut pool = Vec::new();
1570 let mut index = FxHashMap::default();
1571 let i1 = intern_author("alice@x", &mut pool, &mut index);
1572 let i2 = intern_author("alice@x", &mut pool, &mut index);
1573 assert_eq!(i1, i2);
1574 assert_eq!(pool.len(), 1);
1575 }
1576
1577 #[test]
1578 fn intern_author_assigns_sequential_indices() {
1579 let mut pool = Vec::new();
1580 let mut index = FxHashMap::default();
1581 assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1582 assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1583 assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1584 assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1585 }
1586
1587 fn git(root: &Path, args: &[&str]) {
1588 let status = std::process::Command::new("git")
1589 .args(args)
1590 .current_dir(root)
1591 .status()
1592 .expect("run git");
1593 assert!(status.success(), "git {args:?} failed");
1594 }
1595
1596 fn write(root: &Path, path: &str, contents: &str) {
1597 let path = root.join(path);
1598 std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1599 std::fs::write(path, contents).unwrap();
1600 }
1601
1602 #[test]
1603 fn cached_churn_merges_new_commits_after_head_advances() {
1604 let repo = tempfile::tempdir().expect("create repo");
1605 let root = repo.path();
1606 git(root, &["init"]);
1607 git(root, &["config", "user.email", "churn@example.test"]);
1608 git(root, &["config", "user.name", "Churn Test"]);
1609 git(root, &["config", "commit.gpgsign", "false"]);
1610
1611 write(root, "src/a.ts", "export const a = 1;\n");
1612 git(root, &["add", "."]);
1613 git(root, &["commit", "-m", "initial"]);
1614
1615 let since = parse_since("1y").unwrap();
1616 let cache = tempfile::tempdir().expect("create cache dir");
1617 let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1618 assert!(!cold_hit);
1619 let file = root.join("src/a.ts");
1620 assert_eq!(cold.files[&file].commits, 1);
1621
1622 let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1623 assert!(warm_hit);
1624
1625 write(
1626 root,
1627 "src/a.ts",
1628 "export const a = 1;\nexport const b = 2;\n",
1629 );
1630 git(root, &["add", "."]);
1631 git(root, &["commit", "-m", "update a"]);
1632 let head = get_head_sha(root).unwrap();
1633
1634 let (incremental, incremental_hit) =
1635 analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1636 assert!(incremental_hit);
1637 assert_eq!(incremental.files[&file].commits, 2);
1638
1639 let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1640 assert_eq!(cache.last_indexed_sha, head);
1641 }
1642
1643 fn write_churn_file(dir: &std::path::Path, contents: &str) -> PathBuf {
1644 let path = dir.join("churn.json");
1645 std::fs::write(&path, contents).unwrap();
1646 path
1647 }
1648
1649 #[test]
1650 fn churn_file_happy_path() {
1651 let dir = tempfile::tempdir().unwrap();
1652 let root = Path::new("/project");
1653 let path = write_churn_file(
1654 dir.path(),
1655 r#"{
1656 "schema": "fallow-churn/v1",
1657 "events": [
1658 { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1659 { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1660 ]
1661 }"#,
1662 );
1663 let result = analyze_churn_from_file(&path, root).unwrap();
1664 let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1665 assert_eq!(churn.commits, 2);
1666 assert_eq!(churn.lines_added, 13);
1667 assert_eq!(churn.lines_deleted, 7);
1668 assert_eq!(churn.authors.len(), 2);
1669 assert!(result.author_pool.contains(&"alice@corp".to_string()));
1670 assert!(result.author_pool.contains(&"bob@corp".to_string()));
1671 assert!(!result.shallow_clone);
1672 }
1673
1674 #[test]
1675 fn churn_file_matches_git_parse() {
1676 let dir = tempfile::tempdir().unwrap();
1680 let root = Path::new("/project");
1681 let git_output = "1700000000|alice@corp\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n\n1700100000|bob@corp\n3\t2\tsrc/a.ts\n";
1682 let (git_files, git_pool) = parse_git_log(git_output, root);
1683
1684 let path = write_churn_file(
1685 dir.path(),
1686 r#"{
1687 "schema": "fallow-churn/v1",
1688 "events": [
1689 { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1690 { "path": "src/b.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 3, "deleted": 1 },
1691 { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1692 ]
1693 }"#,
1694 );
1695 let imported = analyze_churn_from_file(&path, root).unwrap();
1696
1697 assert_eq!(git_pool, imported.author_pool, "author pools diverge");
1698 assert_eq!(git_files.len(), imported.files.len());
1699 for (file, git_churn) in &git_files {
1700 let imp = &imported.files[file];
1701 assert_eq!(git_churn.commits, imp.commits, "commits for {file:?}");
1702 assert_eq!(git_churn.lines_added, imp.lines_added, "added for {file:?}");
1703 assert_eq!(
1704 git_churn.lines_deleted, imp.lines_deleted,
1705 "deleted for {file:?}"
1706 );
1707 assert_eq!(git_churn.trend, imp.trend, "trend for {file:?}");
1708 assert_eq!(
1709 git_churn.authors.len(),
1710 imp.authors.len(),
1711 "authors for {file:?}"
1712 );
1713 assert!(
1714 (git_churn.weighted_commits - imp.weighted_commits).abs() < 0.02,
1715 "weighted_commits for {file:?}: {} vs {}",
1716 git_churn.weighted_commits,
1717 imp.weighted_commits
1718 );
1719 }
1720 }
1721
1722 #[test]
1723 fn churn_file_empty_events_is_valid() {
1724 let dir = tempfile::tempdir().unwrap();
1725 let path = write_churn_file(
1726 dir.path(),
1727 r#"{ "schema": "fallow-churn/v1", "events": [] }"#,
1728 );
1729 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1730 assert!(result.files.is_empty());
1731 assert!(result.author_pool.is_empty());
1732 }
1733
1734 #[test]
1735 fn churn_file_missing_events_key_is_valid() {
1736 let dir = tempfile::tempdir().unwrap();
1737 let path = write_churn_file(dir.path(), r#"{ "schema": "fallow-churn/v1" }"#);
1738 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1739 assert!(result.files.is_empty());
1740 }
1741
1742 #[test]
1743 fn churn_file_bad_schema_rejected() {
1744 let dir = tempfile::tempdir().unwrap();
1745 let path = write_churn_file(
1746 dir.path(),
1747 r#"{ "schema": "fallow-churn/v2", "events": [] }"#,
1748 );
1749 let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1750 assert!(err.contains("expected \"fallow-churn/v1\""), "{err}");
1751 }
1752
1753 #[test]
1754 fn churn_file_malformed_json_rejected() {
1755 let dir = tempfile::tempdir().unwrap();
1756 let path = write_churn_file(dir.path(), "{ not json");
1757 assert!(analyze_churn_from_file(&path, Path::new("/project")).is_err());
1758 }
1759
1760 #[test]
1761 fn churn_file_missing_file_rejected() {
1762 let err = analyze_churn_from_file(Path::new("/no/such/churn.json"), Path::new("/project"))
1763 .unwrap_err();
1764 assert!(err.contains("failed to read churn file"), "{err}");
1765 }
1766
1767 #[test]
1768 fn churn_file_empty_path_rejected() {
1769 let dir = tempfile::tempdir().unwrap();
1770 let path = write_churn_file(
1771 dir.path(),
1772 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": " ", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1773 );
1774 let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1775 assert!(err.contains("empty path"), "{err}");
1776 }
1777
1778 #[test]
1779 fn churn_file_millisecond_timestamp_rejected() {
1780 let dir = tempfile::tempdir().unwrap();
1781 let path = write_churn_file(
1783 dir.path(),
1784 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000000, "added": 1, "deleted": 0 } ] }"#,
1785 );
1786 let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1787 assert!(err.contains("milliseconds"), "{err}");
1788 }
1789
1790 #[test]
1791 fn churn_file_missing_author_contributes_no_signal() {
1792 let dir = tempfile::tempdir().unwrap();
1793 let path = write_churn_file(
1794 dir.path(),
1795 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1796 );
1797 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1798 let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1799 assert_eq!(churn.commits, 1);
1800 assert!(churn.authors.is_empty());
1801 assert!(result.author_pool.is_empty());
1802 }
1803
1804 #[test]
1805 fn churn_file_empty_author_string_treated_as_absent() {
1806 let dir = tempfile::tempdir().unwrap();
1807 let path = write_churn_file(
1808 dir.path(),
1809 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": " ", "added": 1, "deleted": 0 } ] }"#,
1810 );
1811 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1812 assert!(result.author_pool.is_empty());
1813 }
1814
1815 #[test]
1816 fn churn_file_unknown_fields_ignored() {
1817 let dir = tempfile::tempdir().unwrap();
1820 let path = write_churn_file(
1821 dir.path(),
1822 r#"{ "schema": "fallow-churn/v1", "extra": true, "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 1, "deleted": 0, "commit": "abc123", "tz": "+0200" } ] }"#,
1823 );
1824 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1825 assert_eq!(result.files[&PathBuf::from("/project/src/a.ts")].commits, 1);
1826 }
1827
1828 #[test]
1829 fn churn_file_backslash_paths_normalized() {
1830 let dir = tempfile::tempdir().unwrap();
1831 let path = write_churn_file(
1832 dir.path(),
1833 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src\\a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1834 );
1835 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1836 assert!(
1837 result
1838 .files
1839 .contains_key(&PathBuf::from("/project/src/a.ts"))
1840 );
1841 }
1842}