1use rustc_hash::FxHashMap;
7use std::path::{Path, PathBuf};
8use std::process::{Command, Output};
9use std::sync::OnceLock;
10
11use serde::Deserialize;
12
13pub use fallow_types::churn::ChurnTrend;
14
15pub type ChurnSpawnHook = fn(&mut Command) -> std::io::Result<Output>;
21
22static SPAWN_HOOK: OnceLock<ChurnSpawnHook> = OnceLock::new();
23
24pub fn set_spawn_hook(hook: ChurnSpawnHook) {
30 let _ = SPAWN_HOOK.set(hook);
31}
32
33fn spawn_output(command: &mut Command) -> std::io::Result<Output> {
34 if let Some(hook) = SPAWN_HOOK.get() {
35 hook(command)
36 } else {
37 command.output()
38 }
39}
40
41const SECS_PER_DAY: f64 = 86_400.0;
43
44const HALF_LIFE_DAYS: f64 = 90.0;
47
48const CHURN_FILE_SCHEMA: &str = "fallow-churn/v1";
50
51const MAX_CHURN_EVENTS: usize = 5_000_000;
57
58const MAX_FUTURE_TIMESTAMP_SECS: u64 = 365 * 24 * 60 * 60;
66
67#[derive(Debug, Clone)]
69pub struct SinceDuration {
70 pub git_after: String,
72 pub display: String,
74}
75
76#[derive(Debug, Clone, Copy)]
81pub struct AuthorContribution {
82 pub commits: u32,
84 pub weighted_commits: f64,
86 pub first_commit_ts: u64,
88 pub last_commit_ts: u64,
90}
91
92#[derive(Debug, Clone)]
94pub struct FileChurn {
95 pub path: PathBuf,
97 pub commits: u32,
99 pub weighted_commits: f64,
101 pub lines_added: u32,
103 pub lines_deleted: u32,
105 pub trend: ChurnTrend,
107 pub authors: FxHashMap<u32, AuthorContribution>,
110}
111
112#[derive(Debug)]
114pub struct ChurnResult {
115 pub files: FxHashMap<PathBuf, FileChurn>,
117 pub shallow_clone: bool,
119 pub author_pool: Vec<String>,
122}
123
124pub fn parse_since(input: &str) -> Result<SinceDuration, String> {
135 if is_iso_date(input) {
136 return Ok(SinceDuration {
137 git_after: input.to_string(),
138 display: input.to_string(),
139 });
140 }
141
142 let (num_str, unit) = split_number_unit(input)?;
143 let num: u64 = num_str
144 .parse()
145 .map_err(|_| format!("invalid number in --since: {input}"))?;
146
147 if num == 0 {
148 return Err("--since duration must be greater than 0".to_string());
149 }
150
151 match unit {
152 "d" | "day" | "days" => {
153 let s = if num == 1 { "" } else { "s" };
154 Ok(SinceDuration {
155 git_after: format!("{num} day{s} ago"),
156 display: format!("{num} day{s}"),
157 })
158 }
159 "w" | "week" | "weeks" => {
160 let s = if num == 1 { "" } else { "s" };
161 Ok(SinceDuration {
162 git_after: format!("{num} week{s} ago"),
163 display: format!("{num} week{s}"),
164 })
165 }
166 "m" | "month" | "months" => {
167 let s = if num == 1 { "" } else { "s" };
168 Ok(SinceDuration {
169 git_after: format!("{num} month{s} ago"),
170 display: format!("{num} month{s}"),
171 })
172 }
173 "y" | "year" | "years" => {
174 let s = if num == 1 { "" } else { "s" };
175 Ok(SinceDuration {
176 git_after: format!("{num} year{s} ago"),
177 display: format!("{num} year{s}"),
178 })
179 }
180 _ => Err(format!(
181 "unknown duration unit '{unit}' in --since. Use d/w/m/y (e.g., 6m, 90d, 1y)"
182 )),
183 }
184}
185
186pub fn analyze_churn(root: &Path, since: &SinceDuration) -> Option<ChurnResult> {
190 let shallow = is_shallow_clone(root);
191 let state = analyze_churn_events(root, since, None)?;
192 Some(build_churn_result(state, shallow))
193}
194
195#[derive(Debug, Deserialize)]
201struct ChurnFileDoc {
202 schema: String,
203 #[serde(default)]
204 events: Vec<ChurnFileEvent>,
205}
206
207#[derive(Debug, Deserialize)]
212struct ChurnFileEvent {
213 path: String,
215 timestamp: u64,
217 #[serde(default)]
220 author: Option<String>,
221 added: u32,
223 deleted: u32,
225}
226
227pub fn analyze_churn_from_file(path: &Path, root: &Path) -> Result<ChurnResult, String> {
240 let raw = std::fs::read_to_string(path)
241 .map_err(|e| format!("failed to read churn file {}: {e}", path.display()))?;
242 let doc: ChurnFileDoc = serde_json::from_str(&raw)
243 .map_err(|e| format!("failed to parse churn file {}: {e}", path.display()))?;
244 if doc.schema != CHURN_FILE_SCHEMA {
245 return Err(format!(
246 "churn file {} declares schema \"{}\", expected \"{CHURN_FILE_SCHEMA}\"",
247 path.display(),
248 doc.schema
249 ));
250 }
251 if doc.events.len() > MAX_CHURN_EVENTS {
252 return Err(format!(
253 "churn file {} has {} events, exceeding the {MAX_CHURN_EVENTS} limit",
254 path.display(),
255 doc.events.len()
256 ));
257 }
258
259 let state = churn_event_state_from_doc(&doc, path, root)?;
260 Ok(build_churn_result(state, false))
261}
262
263fn churn_event_state_from_doc(
268 doc: &ChurnFileDoc,
269 path: &Path,
270 root: &Path,
271) -> Result<ChurnEventState, String> {
272 let mut builder = ChurnFileImportBuilder::new(path, root, churn_file_future_limit());
273
274 for event in &doc.events {
275 builder.push_event(event)?;
276 }
277
278 Ok(builder.finish())
279}
280
281fn churn_file_future_limit() -> u64 {
282 let now_secs = std::time::SystemTime::now()
283 .duration_since(std::time::UNIX_EPOCH)
284 .unwrap_or_default()
285 .as_secs();
286 now_secs.saturating_add(MAX_FUTURE_TIMESTAMP_SECS)
287}
288
289struct ChurnFileImportBuilder<'a> {
290 path: &'a Path,
291 root: &'a Path,
292 future_limit: u64,
293 files: FxHashMap<PathBuf, FileEvents>,
294 author_pool: Vec<String>,
295 author_index: FxHashMap<String, u32>,
296}
297
298impl<'a> ChurnFileImportBuilder<'a> {
299 fn new(path: &'a Path, root: &'a Path, future_limit: u64) -> Self {
300 Self {
301 path,
302 root,
303 future_limit,
304 files: FxHashMap::default(),
305 author_pool: Vec::new(),
306 author_index: FxHashMap::default(),
307 }
308 }
309
310 fn push_event(&mut self, event: &ChurnFileEvent) -> Result<(), String> {
311 let rel = normalize_churn_event_path(self.path, &event.path)?;
312 validate_churn_event_timestamp(self.path, event.timestamp, self.future_limit, &rel)?;
313
314 let abs_path = self.root.join(&rel);
315 let author_idx = self.intern_author(event.author.as_deref());
316 self.files
317 .entry(abs_path)
318 .or_insert_with(|| FileEvents { events: Vec::new() })
319 .events
320 .push(CachedCommitEvent {
321 timestamp: event.timestamp,
322 lines_added: event.added,
323 lines_deleted: event.deleted,
324 author_idx,
325 });
326 Ok(())
327 }
328
329 fn intern_author(&mut self, author: Option<&str>) -> Option<u32> {
330 author
331 .map(str::trim)
332 .filter(|email| !email.is_empty())
333 .map(|email| intern_author(email, &mut self.author_pool, &mut self.author_index))
334 }
335
336 fn finish(self) -> ChurnEventState {
337 ChurnEventState {
338 files: self.files,
339 author_pool: self.author_pool,
340 }
341 }
342}
343
344fn normalize_churn_event_path(path: &Path, event_path: &str) -> Result<String, String> {
345 let normalized = event_path.replace('\\', "/");
346 let rel = normalized.trim();
347 if rel.is_empty() {
348 return Err(format!(
349 "churn file {} has an event with an empty path",
350 path.display()
351 ));
352 }
353 Ok(rel.to_string())
354}
355
356fn validate_churn_event_timestamp(
357 path: &Path,
358 timestamp: u64,
359 future_limit: u64,
360 rel: &str,
361) -> Result<(), String> {
362 if timestamp <= future_limit {
363 return Ok(());
364 }
365
366 Err(format!(
367 "churn file {} has event timestamp {} for \"{rel}\" more than a year in the \
368 future; timestamps must be unix SECONDS (not milliseconds), UTC",
369 path.display(),
370 timestamp
371 ))
372}
373
374#[must_use]
376pub fn is_shallow_clone(root: &Path) -> bool {
377 let mut command = crate::spawn::git();
378 command
379 .args(["rev-parse", "--is-shallow-repository"])
380 .current_dir(root);
381 command.output().is_ok_and(|o| {
382 String::from_utf8_lossy(&o.stdout)
383 .trim()
384 .eq_ignore_ascii_case("true")
385 })
386}
387
388#[must_use]
390pub fn is_git_repo(root: &Path) -> bool {
391 let mut command = crate::spawn::git();
392 command
393 .args(["rev-parse", "--git-dir"])
394 .current_dir(root)
395 .stdout(std::process::Stdio::null())
396 .stderr(std::process::Stdio::null());
397 command.status().is_ok_and(|s| s.success())
398}
399
400const MAX_CHURN_CACHE_SIZE: usize = 64 * 1024 * 1024;
403
404const CHURN_CACHE_VERSION: u8 = 3;
408
409#[derive(Clone, bitcode::Encode, bitcode::Decode)]
411struct CachedCommitEvent {
412 timestamp: u64,
413 lines_added: u32,
414 lines_deleted: u32,
415 author_idx: Option<u32>,
416}
417
418#[derive(Clone, bitcode::Encode, bitcode::Decode)]
420struct CachedFileChurn {
421 path: String,
422 events: Vec<CachedCommitEvent>,
423}
424
425#[derive(Clone, bitcode::Encode, bitcode::Decode)]
427struct ChurnCache {
428 version: u8,
430 last_indexed_sha: String,
431 git_after: String,
432 files: Vec<CachedFileChurn>,
433 shallow_clone: bool,
434 author_pool: Vec<String>,
436}
437
438struct FileEvents {
440 events: Vec<CachedCommitEvent>,
441}
442
443struct ChurnEventState {
446 files: FxHashMap<PathBuf, FileEvents>,
447 author_pool: Vec<String>,
448}
449
450fn get_head_sha(root: &Path) -> Option<String> {
452 let mut command = crate::spawn::git();
453 command.args(["rev-parse", "HEAD"]).current_dir(root);
454 command
455 .output()
456 .ok()
457 .filter(|o| o.status.success())
458 .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
459}
460
461fn is_ancestor(root: &Path, ancestor: &str, descendant: &str) -> bool {
463 let mut command = crate::spawn::git();
464 command
465 .args(["merge-base", "--is-ancestor", ancestor, descendant])
466 .current_dir(root);
467 command.status().is_ok_and(|s| s.success())
468}
469
470fn load_churn_cache(cache_dir: &Path, git_after: &str) -> Option<ChurnCache> {
473 let cache_file = cache_dir.join("churn.bin");
474 let data = std::fs::read(&cache_file).ok()?;
475 if data.len() > MAX_CHURN_CACHE_SIZE {
476 return None;
477 }
478 let cache: ChurnCache = bitcode::decode(&data).ok()?;
479 if cache.version != CHURN_CACHE_VERSION || cache.git_after != git_after {
480 return None;
481 }
482 Some(cache)
483}
484
485fn save_churn_cache(
487 cache_dir: &Path,
488 last_indexed_sha: &str,
489 git_after: &str,
490 state: &ChurnEventState,
491 shallow_clone: bool,
492) {
493 let files: Vec<CachedFileChurn> = state
494 .files
495 .iter()
496 .map(|f| CachedFileChurn {
497 path: f.0.to_string_lossy().to_string(),
498 events: f.1.events.clone(),
499 })
500 .collect();
501 let cache = ChurnCache {
502 version: CHURN_CACHE_VERSION,
503 last_indexed_sha: last_indexed_sha.to_string(),
504 git_after: git_after.to_string(),
505 files,
506 shallow_clone,
507 author_pool: state.author_pool.clone(),
508 };
509 let _ = std::fs::create_dir_all(cache_dir);
510 let data = bitcode::encode(&cache);
511 let tmp = cache_dir.join("churn.bin.tmp");
512 if std::fs::write(&tmp, data).is_ok() {
513 let _ = std::fs::rename(&tmp, cache_dir.join("churn.bin"));
514 }
515}
516
517pub fn analyze_churn_cached(
525 root: &Path,
526 since: &SinceDuration,
527 cache_dir: &Path,
528 no_cache: bool,
529) -> Option<(ChurnResult, bool)> {
530 let head_sha = get_head_sha(root)?;
531
532 if !no_cache && let Some(result) = try_reuse_churn_cache(root, since, cache_dir, &head_sha) {
533 return Some((result, true));
534 }
535
536 analyze_fresh_churn(root, since, cache_dir, no_cache, &head_sha).map(|result| (result, false))
537}
538
539fn try_reuse_churn_cache(
540 root: &Path,
541 since: &SinceDuration,
542 cache_dir: &Path,
543 head_sha: &str,
544) -> Option<ChurnResult> {
545 let cache = load_churn_cache(cache_dir, &since.git_after)?;
546 if cache.last_indexed_sha == head_sha {
547 let shallow_clone = cache.shallow_clone;
548 return Some(build_churn_result(cache.into_event_state(), shallow_clone));
549 }
550
551 if !is_ancestor(root, &cache.last_indexed_sha, head_sha) {
552 return None;
553 }
554
555 extend_churn_cache(root, since, cache_dir, head_sha, cache)
556}
557
558fn extend_churn_cache(
559 root: &Path,
560 since: &SinceDuration,
561 cache_dir: &Path,
562 head_sha: &str,
563 cache: ChurnCache,
564) -> Option<ChurnResult> {
565 let shallow_clone = is_shallow_clone(root);
566 let range = format!("{}..HEAD", cache.last_indexed_sha);
567 let delta = analyze_churn_events(root, since, Some(&range))?;
568 let mut state = cache.into_event_state();
569 merge_churn_states(&mut state, delta);
570 save_churn_cache(cache_dir, head_sha, &since.git_after, &state, shallow_clone);
571 Some(build_churn_result(state, shallow_clone))
572}
573
574fn analyze_fresh_churn(
575 root: &Path,
576 since: &SinceDuration,
577 cache_dir: &Path,
578 no_cache: bool,
579 head_sha: &str,
580) -> Option<ChurnResult> {
581 let shallow_clone = is_shallow_clone(root);
582 let state = analyze_churn_events(root, since, None)?;
583 if !no_cache {
584 save_churn_cache(cache_dir, head_sha, &since.git_after, &state, shallow_clone);
585 }
586
587 Some(build_churn_result(state, shallow_clone))
588}
589
590impl ChurnCache {
591 fn into_event_state(self) -> ChurnEventState {
592 let files = self
593 .files
594 .into_iter()
595 .map(|entry| {
596 (
597 PathBuf::from(entry.path),
598 FileEvents {
599 events: entry.events,
600 },
601 )
602 })
603 .collect();
604 ChurnEventState {
605 files,
606 author_pool: self.author_pool,
607 }
608 }
609}
610
611fn analyze_churn_events(
613 root: &Path,
614 since: &SinceDuration,
615 revision_range: Option<&str>,
616) -> Option<ChurnEventState> {
617 let mut command = crate::spawn::git();
618 command.arg("log");
619 if let Some(range) = revision_range {
620 command.arg(range);
621 }
622 command
623 .args([
624 "--numstat",
625 "--no-merges",
626 "--no-renames",
627 "--use-mailmap",
628 "--format=format:%at|%ae",
629 &format!("--after={}", since.git_after),
630 ])
631 .current_dir(root);
632
633 let output = match spawn_output(&mut command) {
634 Ok(o) => o,
635 Err(e) => {
636 tracing::warn!("hotspot analysis skipped: failed to run git: {e}");
637 return None;
638 }
639 };
640
641 if !output.status.success() {
642 let stderr = String::from_utf8_lossy(&output.stderr);
643 tracing::warn!("hotspot analysis skipped: git log failed: {stderr}");
644 return None;
645 }
646
647 let stdout = String::from_utf8_lossy(&output.stdout);
648 Some(parse_git_log_events(&stdout, root))
649}
650
651fn merge_churn_states(base: &mut ChurnEventState, delta: ChurnEventState) {
653 let mut base_author_index: FxHashMap<String, u32> = base
654 .author_pool
655 .iter()
656 .enumerate()
657 .filter_map(|(idx, email)| u32::try_from(idx).ok().map(|idx| (email.clone(), idx)))
658 .collect();
659
660 let mut author_mapping: FxHashMap<u32, u32> = FxHashMap::default();
661 for (old_idx, email) in delta.author_pool.into_iter().enumerate() {
662 let Ok(old_idx) = u32::try_from(old_idx) else {
663 continue;
664 };
665 let new_idx = intern_author(&email, &mut base.author_pool, &mut base_author_index);
666 author_mapping.insert(old_idx, new_idx);
667 }
668
669 for (path, mut file) in delta.files {
670 for event in &mut file.events {
671 event.author_idx = event
672 .author_idx
673 .and_then(|idx| author_mapping.get(&idx).copied());
674 }
675 base.files
676 .entry(path)
677 .and_modify(|existing| existing.events.append(&mut file.events))
678 .or_insert(file);
679 }
680}
681
682fn parse_git_log_events(stdout: &str, root: &Path) -> ChurnEventState {
684 let now_secs = std::time::SystemTime::now()
685 .duration_since(std::time::UNIX_EPOCH)
686 .unwrap_or_default()
687 .as_secs();
688
689 let mut parser = GitLogEventParser::new(root, now_secs);
690
691 for line in stdout.lines() {
692 parser.consume_line(line);
693 }
694
695 parser.finish()
696}
697
698struct GitLogEventParser<'a> {
699 root: &'a Path,
700 now_secs: u64,
701 files: FxHashMap<PathBuf, FileEvents>,
702 author_pool: Vec<String>,
703 author_index: FxHashMap<String, u32>,
704 current_timestamp: Option<u64>,
705 current_author_idx: Option<u32>,
706}
707
708impl<'a> GitLogEventParser<'a> {
709 fn new(root: &'a Path, now_secs: u64) -> Self {
710 Self {
711 root,
712 now_secs,
713 files: FxHashMap::default(),
714 author_pool: Vec::new(),
715 author_index: FxHashMap::default(),
716 current_timestamp: None,
717 current_author_idx: None,
718 }
719 }
720
721 fn consume_line(&mut self, line: &str) {
722 let line = line.trim();
723 if line.is_empty() {
724 return;
725 }
726
727 if self.record_commit_header(line) {
728 return;
729 }
730 if self.record_legacy_timestamp(line) {
731 return;
732 }
733 self.record_numstat(line);
734 }
735
736 fn record_commit_header(&mut self, line: &str) -> bool {
737 let Some((ts_str, email)) = line.split_once('|') else {
738 return false;
739 };
740 let Ok(ts) = ts_str.parse::<u64>() else {
741 return false;
742 };
743
744 self.current_timestamp = Some(ts);
745 self.current_author_idx = Some(intern_author(
746 email,
747 &mut self.author_pool,
748 &mut self.author_index,
749 ));
750 true
751 }
752
753 fn record_legacy_timestamp(&mut self, line: &str) -> bool {
754 let Ok(ts) = line.parse::<u64>() else {
755 return false;
756 };
757
758 self.current_timestamp = Some(ts);
759 self.current_author_idx = None;
760 true
761 }
762
763 fn record_numstat(&mut self, line: &str) {
764 let Some((added, deleted, path)) = parse_numstat_line(line) else {
765 return;
766 };
767
768 let ts = self.current_timestamp.unwrap_or(self.now_secs);
769 self.files
770 .entry(self.root.join(path))
771 .or_insert_with(|| FileEvents { events: Vec::new() })
772 .events
773 .push(CachedCommitEvent {
774 timestamp: ts,
775 lines_added: added,
776 lines_deleted: deleted,
777 author_idx: self.current_author_idx,
778 });
779 }
780
781 fn finish(self) -> ChurnEventState {
782 ChurnEventState {
783 files: self.files,
784 author_pool: self.author_pool,
785 }
786 }
787}
788
789#[expect(
792 clippy::cast_possible_truncation,
793 reason = "commit count per file is bounded by git history depth"
794)]
795fn aggregate_file_churn(path: PathBuf, file: FileEvents, now_secs: u64) -> FileChurn {
796 let mut timestamps = Vec::with_capacity(file.events.len());
797 let mut weighted_commits = 0.0;
798 let mut lines_added = 0;
799 let mut lines_deleted = 0;
800 let mut authors: FxHashMap<u32, AuthorContribution> = FxHashMap::default();
801
802 for event in file.events {
803 timestamps.push(event.timestamp);
804 let age_days = (now_secs.saturating_sub(event.timestamp)) as f64 / SECS_PER_DAY;
805 let weight = 0.5_f64.powf(age_days / HALF_LIFE_DAYS);
806 weighted_commits += weight;
807 lines_added += event.lines_added;
808 lines_deleted += event.lines_deleted;
809 accumulate_author(&mut authors, event.author_idx, weight, event.timestamp);
810 }
811
812 let commits = timestamps.len() as u32;
813 let trend = compute_trend(×tamps);
814 for c in authors.values_mut() {
815 c.weighted_commits = (c.weighted_commits * 100.0).round() / 100.0;
816 }
817 FileChurn {
818 path,
819 commits,
820 weighted_commits: (weighted_commits * 100.0).round() / 100.0,
821 lines_added,
822 lines_deleted,
823 trend,
824 authors,
825 }
826}
827
828fn accumulate_author(
830 authors: &mut FxHashMap<u32, AuthorContribution>,
831 author_idx: Option<u32>,
832 weight: f64,
833 timestamp: u64,
834) {
835 let Some(idx) = author_idx else {
836 return;
837 };
838 authors
839 .entry(idx)
840 .and_modify(|c| {
841 c.commits += 1;
842 c.weighted_commits += weight;
843 c.first_commit_ts = c.first_commit_ts.min(timestamp);
844 c.last_commit_ts = c.last_commit_ts.max(timestamp);
845 })
846 .or_insert(AuthorContribution {
847 commits: 1,
848 weighted_commits: weight,
849 first_commit_ts: timestamp,
850 last_commit_ts: timestamp,
851 });
852}
853
854fn build_churn_result(state: ChurnEventState, shallow_clone: bool) -> ChurnResult {
856 let now_secs = std::time::SystemTime::now()
857 .duration_since(std::time::UNIX_EPOCH)
858 .unwrap_or_default()
859 .as_secs();
860
861 let files = state
862 .files
863 .into_iter()
864 .map(|(path, file)| {
865 let churn = aggregate_file_churn(path.clone(), file, now_secs);
866 (path, churn)
867 })
868 .collect();
869
870 ChurnResult {
871 files,
872 shallow_clone,
873 author_pool: state.author_pool,
874 }
875}
876
877#[cfg(test)]
882fn parse_git_log(stdout: &str, root: &Path) -> (FxHashMap<PathBuf, FileChurn>, Vec<String>) {
883 let result = build_churn_result(parse_git_log_events(stdout, root), false);
884 (result.files, result.author_pool)
885}
886
887fn intern_author(email: &str, pool: &mut Vec<String>, index: &mut FxHashMap<String, u32>) -> u32 {
889 if let Some(&idx) = index.get(email) {
890 return idx;
891 }
892 #[expect(
893 clippy::cast_possible_truncation,
894 reason = "author count is bounded by git history; u32 is far above any realistic ceiling"
895 )]
896 let idx = pool.len() as u32;
897 let owned = email.to_string();
898 index.insert(owned.clone(), idx);
899 pool.push(owned);
900 idx
901}
902
903fn parse_numstat_line(line: &str) -> Option<(u32, u32, &str)> {
906 let mut parts = line.splitn(3, '\t');
907 let added_str = parts.next()?;
908 let deleted_str = parts.next()?;
909 let path = parts.next()?;
910
911 let added: u32 = added_str.parse().ok()?;
912 let deleted: u32 = deleted_str.parse().ok()?;
913
914 Some((added, deleted, path))
915}
916
917fn compute_trend(timestamps: &[u64]) -> ChurnTrend {
925 if timestamps.len() < 2 {
926 return ChurnTrend::Stable;
927 }
928
929 let min_ts = timestamps.iter().copied().min().unwrap_or(0);
930 let max_ts = timestamps.iter().copied().max().unwrap_or(0);
931
932 if max_ts == min_ts {
933 return ChurnTrend::Stable;
934 }
935
936 let midpoint = min_ts + (max_ts - min_ts) / 2;
937 let recent = timestamps.iter().filter(|&&ts| ts > midpoint).count() as f64;
938 let older = timestamps.iter().filter(|&&ts| ts <= midpoint).count() as f64;
939
940 if older < 1.0 {
941 return ChurnTrend::Stable;
942 }
943
944 let ratio = recent / older;
945 if ratio > 1.5 {
946 ChurnTrend::Accelerating
947 } else if ratio < 0.67 {
948 ChurnTrend::Cooling
949 } else {
950 ChurnTrend::Stable
951 }
952}
953
954fn is_iso_date(input: &str) -> bool {
955 input.len() == 10
956 && input.as_bytes().get(4) == Some(&b'-')
957 && input.as_bytes().get(7) == Some(&b'-')
958 && input[..4].bytes().all(|b| b.is_ascii_digit())
959 && input[5..7].bytes().all(|b| b.is_ascii_digit())
960 && input[8..10].bytes().all(|b| b.is_ascii_digit())
961}
962
963fn split_number_unit(input: &str) -> Result<(&str, &str), String> {
964 let pos = input.find(|c: char| !c.is_ascii_digit()).ok_or_else(|| {
965 format!("--since requires a unit suffix (e.g., 6m, 90d, 1y), got: {input}")
966 })?;
967 if pos == 0 {
968 return Err(format!(
969 "--since must start with a number (e.g., 6m, 90d, 1y), got: {input}"
970 ));
971 }
972 Ok((&input[..pos], &input[pos..]))
973}
974
975#[cfg(test)]
976mod tests {
977 use super::*;
978
979 #[test]
980 fn parse_since_months_short() {
981 let d = parse_since("6m").unwrap();
982 assert_eq!(d.git_after, "6 months ago");
983 assert_eq!(d.display, "6 months");
984 }
985
986 #[test]
987 fn parse_since_months_long() {
988 let d = parse_since("6months").unwrap();
989 assert_eq!(d.git_after, "6 months ago");
990 assert_eq!(d.display, "6 months");
991 }
992
993 #[test]
994 fn parse_since_days() {
995 let d = parse_since("90d").unwrap();
996 assert_eq!(d.git_after, "90 days ago");
997 assert_eq!(d.display, "90 days");
998 }
999
1000 #[test]
1001 fn parse_since_year_singular() {
1002 let d = parse_since("1y").unwrap();
1003 assert_eq!(d.git_after, "1 year ago");
1004 assert_eq!(d.display, "1 year");
1005 }
1006
1007 #[test]
1008 fn parse_since_years_plural() {
1009 let d = parse_since("2years").unwrap();
1010 assert_eq!(d.git_after, "2 years ago");
1011 assert_eq!(d.display, "2 years");
1012 }
1013
1014 #[test]
1015 fn parse_since_weeks() {
1016 let d = parse_since("2w").unwrap();
1017 assert_eq!(d.git_after, "2 weeks ago");
1018 assert_eq!(d.display, "2 weeks");
1019 }
1020
1021 #[test]
1022 fn parse_since_iso_date() {
1023 let d = parse_since("2025-06-01").unwrap();
1024 assert_eq!(d.git_after, "2025-06-01");
1025 assert_eq!(d.display, "2025-06-01");
1026 }
1027
1028 #[test]
1029 fn parse_since_month_singular() {
1030 let d = parse_since("1month").unwrap();
1031 assert_eq!(d.display, "1 month");
1032 }
1033
1034 #[test]
1035 fn parse_since_day_singular() {
1036 let d = parse_since("1day").unwrap();
1037 assert_eq!(d.display, "1 day");
1038 }
1039
1040 #[test]
1041 fn parse_since_zero_rejected() {
1042 assert!(parse_since("0m").is_err());
1043 }
1044
1045 #[test]
1046 fn parse_since_no_unit_rejected() {
1047 assert!(parse_since("90").is_err());
1048 }
1049
1050 #[test]
1051 fn parse_since_unknown_unit_rejected() {
1052 assert!(parse_since("6x").is_err());
1053 }
1054
1055 #[test]
1056 fn parse_since_no_number_rejected() {
1057 assert!(parse_since("months").is_err());
1058 }
1059
1060 #[test]
1061 fn numstat_normal() {
1062 let (a, d, p) = parse_numstat_line("10\t5\tsrc/file.ts").unwrap();
1063 assert_eq!(a, 10);
1064 assert_eq!(d, 5);
1065 assert_eq!(p, "src/file.ts");
1066 }
1067
1068 #[test]
1069 fn numstat_binary_skipped() {
1070 assert!(parse_numstat_line("-\t-\tsrc/image.png").is_none());
1071 }
1072
1073 #[test]
1074 fn numstat_zero_lines() {
1075 let (a, d, p) = parse_numstat_line("0\t0\tsrc/empty.ts").unwrap();
1076 assert_eq!(a, 0);
1077 assert_eq!(d, 0);
1078 assert_eq!(p, "src/empty.ts");
1079 }
1080
1081 #[test]
1082 fn trend_empty_is_stable() {
1083 assert_eq!(compute_trend(&[]), ChurnTrend::Stable);
1084 }
1085
1086 #[test]
1087 fn trend_single_commit_is_stable() {
1088 assert_eq!(compute_trend(&[100]), ChurnTrend::Stable);
1089 }
1090
1091 #[test]
1092 fn trend_accelerating() {
1093 let timestamps = vec![100, 200, 800, 850, 900, 950, 1000];
1094 assert_eq!(compute_trend(×tamps), ChurnTrend::Accelerating);
1095 }
1096
1097 #[test]
1098 fn trend_cooling() {
1099 let timestamps = vec![100, 150, 200, 250, 300, 900, 1000];
1100 assert_eq!(compute_trend(×tamps), ChurnTrend::Cooling);
1101 }
1102
1103 #[test]
1104 fn trend_stable_even_distribution() {
1105 let timestamps = vec![100, 200, 300, 700, 800, 900];
1106 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1107 }
1108
1109 #[test]
1110 fn trend_same_timestamp_is_stable() {
1111 let timestamps = vec![500, 500, 500];
1112 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1113 }
1114
1115 #[test]
1116 fn iso_date_valid() {
1117 assert!(is_iso_date("2025-06-01"));
1118 assert!(is_iso_date("2025-12-31"));
1119 }
1120
1121 #[test]
1122 fn iso_date_with_time_rejected() {
1123 assert!(!is_iso_date("2025-06-01T00:00:00"));
1124 }
1125
1126 #[test]
1127 fn iso_date_invalid() {
1128 assert!(!is_iso_date("6months"));
1129 assert!(!is_iso_date("2025"));
1130 assert!(!is_iso_date("not-a-date"));
1131 assert!(!is_iso_date("abcd-ef-gh"));
1132 }
1133
1134 #[test]
1135 fn trend_display() {
1136 assert_eq!(ChurnTrend::Accelerating.to_string(), "accelerating");
1137 assert_eq!(ChurnTrend::Stable.to_string(), "stable");
1138 assert_eq!(ChurnTrend::Cooling.to_string(), "cooling");
1139 }
1140
1141 #[test]
1142 fn parse_git_log_single_commit() {
1143 let root = Path::new("/project");
1144 let output = "1700000000\n10\t5\tsrc/index.ts\n";
1145 let (result, _) = parse_git_log(output, root);
1146 assert_eq!(result.len(), 1);
1147 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1148 assert_eq!(churn.commits, 1);
1149 assert_eq!(churn.lines_added, 10);
1150 assert_eq!(churn.lines_deleted, 5);
1151 }
1152
1153 #[test]
1154 fn parse_git_log_multiple_commits_same_file() {
1155 let root = Path::new("/project");
1156 let output = "1700000000\n10\t5\tsrc/index.ts\n\n1700100000\n3\t2\tsrc/index.ts\n";
1157 let (result, _) = parse_git_log(output, root);
1158 assert_eq!(result.len(), 1);
1159 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1160 assert_eq!(churn.commits, 2);
1161 assert_eq!(churn.lines_added, 13);
1162 assert_eq!(churn.lines_deleted, 7);
1163 }
1164
1165 #[test]
1166 fn parse_git_log_multiple_files() {
1167 let root = Path::new("/project");
1168 let output = "1700000000\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n";
1169 let (result, _) = parse_git_log(output, root);
1170 assert_eq!(result.len(), 2);
1171 assert!(result.contains_key(&PathBuf::from("/project/src/a.ts")));
1172 assert!(result.contains_key(&PathBuf::from("/project/src/b.ts")));
1173 }
1174
1175 #[test]
1176 fn parse_git_log_empty_output() {
1177 let root = Path::new("/project");
1178 let (result, _) = parse_git_log("", root);
1179 assert!(result.is_empty());
1180 }
1181
1182 #[test]
1183 fn parse_git_log_skips_binary_files() {
1184 let root = Path::new("/project");
1185 let output = "1700000000\n-\t-\timage.png\n10\t5\tsrc/a.ts\n";
1186 let (result, _) = parse_git_log(output, root);
1187 assert_eq!(result.len(), 1);
1188 assert!(!result.contains_key(&PathBuf::from("/project/image.png")));
1189 }
1190
1191 #[test]
1192 fn parse_git_log_weighted_commits_are_positive() {
1193 let root = Path::new("/project");
1194 let now_secs = std::time::SystemTime::now()
1195 .duration_since(std::time::UNIX_EPOCH)
1196 .unwrap()
1197 .as_secs();
1198 let output = format!("{now_secs}\n10\t5\tsrc/a.ts\n");
1199 let (result, _) = parse_git_log(&output, root);
1200 let churn = &result[&PathBuf::from("/project/src/a.ts")];
1201 assert!(
1202 churn.weighted_commits > 0.0,
1203 "weighted_commits should be positive for recent commits"
1204 );
1205 }
1206
1207 #[test]
1208 fn trend_boundary_1_5x_ratio() {
1209 let timestamps = vec![100, 200, 600, 800, 1000];
1210 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1211 }
1212
1213 #[test]
1214 fn trend_just_above_1_5x() {
1215 let timestamps = vec![100, 600, 800, 1000];
1216 assert_eq!(compute_trend(×tamps), ChurnTrend::Accelerating);
1217 }
1218
1219 #[test]
1220 fn trend_boundary_0_67x_ratio() {
1221 let timestamps = vec![100, 200, 300, 600, 1000];
1222 assert_eq!(compute_trend(×tamps), ChurnTrend::Cooling);
1223 }
1224
1225 #[test]
1226 fn trend_two_timestamps_different() {
1227 let timestamps = vec![100, 200];
1228 assert_eq!(compute_trend(×tamps), ChurnTrend::Stable);
1229 }
1230
1231 #[test]
1232 fn parse_since_week_singular() {
1233 let d = parse_since("1week").unwrap();
1234 assert_eq!(d.git_after, "1 week ago");
1235 assert_eq!(d.display, "1 week");
1236 }
1237
1238 #[test]
1239 fn parse_since_weeks_long() {
1240 let d = parse_since("3weeks").unwrap();
1241 assert_eq!(d.git_after, "3 weeks ago");
1242 assert_eq!(d.display, "3 weeks");
1243 }
1244
1245 #[test]
1246 fn parse_since_days_long() {
1247 let d = parse_since("30days").unwrap();
1248 assert_eq!(d.git_after, "30 days ago");
1249 assert_eq!(d.display, "30 days");
1250 }
1251
1252 #[test]
1253 fn parse_since_year_long() {
1254 let d = parse_since("1year").unwrap();
1255 assert_eq!(d.git_after, "1 year ago");
1256 assert_eq!(d.display, "1 year");
1257 }
1258
1259 #[test]
1260 fn parse_since_overflow_number_rejected() {
1261 let result = parse_since("99999999999999999999d");
1262 assert!(result.is_err());
1263 let err = result.unwrap_err();
1264 assert!(err.contains("invalid number"));
1265 }
1266
1267 #[test]
1268 fn parse_since_zero_days_rejected() {
1269 assert!(parse_since("0d").is_err());
1270 }
1271
1272 #[test]
1273 fn parse_since_zero_weeks_rejected() {
1274 assert!(parse_since("0w").is_err());
1275 }
1276
1277 #[test]
1278 fn parse_since_zero_years_rejected() {
1279 assert!(parse_since("0y").is_err());
1280 }
1281
1282 #[test]
1283 fn numstat_missing_path() {
1284 assert!(parse_numstat_line("10\t5").is_none());
1285 }
1286
1287 #[test]
1288 fn numstat_single_field() {
1289 assert!(parse_numstat_line("10").is_none());
1290 }
1291
1292 #[test]
1293 fn numstat_empty_string() {
1294 assert!(parse_numstat_line("").is_none());
1295 }
1296
1297 #[test]
1298 fn numstat_only_added_is_binary() {
1299 assert!(parse_numstat_line("-\t5\tsrc/file.ts").is_none());
1300 }
1301
1302 #[test]
1303 fn numstat_only_deleted_is_binary() {
1304 assert!(parse_numstat_line("10\t-\tsrc/file.ts").is_none());
1305 }
1306
1307 #[test]
1308 fn numstat_path_with_spaces() {
1309 let (a, d, p) = parse_numstat_line("3\t1\tpath with spaces/file.ts").unwrap();
1310 assert_eq!(a, 3);
1311 assert_eq!(d, 1);
1312 assert_eq!(p, "path with spaces/file.ts");
1313 }
1314
1315 #[test]
1316 fn numstat_large_numbers() {
1317 let (a, d, p) = parse_numstat_line("9999\t8888\tsrc/big.ts").unwrap();
1318 assert_eq!(a, 9999);
1319 assert_eq!(d, 8888);
1320 assert_eq!(p, "src/big.ts");
1321 }
1322
1323 #[test]
1324 fn iso_date_wrong_separator_positions() {
1325 assert!(!is_iso_date("20-25-0601"));
1326 assert!(!is_iso_date("202506-01-"));
1327 }
1328
1329 #[test]
1330 fn iso_date_too_short() {
1331 assert!(!is_iso_date("2025-06-0"));
1332 }
1333
1334 #[test]
1335 fn iso_date_letters_in_day() {
1336 assert!(!is_iso_date("2025-06-ab"));
1337 }
1338
1339 #[test]
1340 fn iso_date_letters_in_month() {
1341 assert!(!is_iso_date("2025-ab-01"));
1342 }
1343
1344 #[test]
1345 fn split_number_unit_valid() {
1346 let (num, unit) = split_number_unit("42days").unwrap();
1347 assert_eq!(num, "42");
1348 assert_eq!(unit, "days");
1349 }
1350
1351 #[test]
1352 fn split_number_unit_single_digit() {
1353 let (num, unit) = split_number_unit("1m").unwrap();
1354 assert_eq!(num, "1");
1355 assert_eq!(unit, "m");
1356 }
1357
1358 #[test]
1359 fn split_number_unit_no_digits() {
1360 let err = split_number_unit("abc").unwrap_err();
1361 assert!(err.contains("must start with a number"));
1362 }
1363
1364 #[test]
1365 fn split_number_unit_no_unit() {
1366 let err = split_number_unit("123").unwrap_err();
1367 assert!(err.contains("requires a unit suffix"));
1368 }
1369
1370 #[test]
1371 fn parse_git_log_numstat_before_timestamp_uses_now() {
1372 let root = Path::new("/project");
1373 let output = "10\t5\tsrc/no_ts.ts\n";
1374 let (result, _) = parse_git_log(output, root);
1375 assert_eq!(result.len(), 1);
1376 let churn = &result[&PathBuf::from("/project/src/no_ts.ts")];
1377 assert_eq!(churn.commits, 1);
1378 assert_eq!(churn.lines_added, 10);
1379 assert_eq!(churn.lines_deleted, 5);
1380 assert!(
1381 churn.weighted_commits > 0.9,
1382 "weight should be near 1.0 when timestamp defaults to now"
1383 );
1384 }
1385
1386 #[test]
1387 fn parse_git_log_whitespace_lines_ignored() {
1388 let root = Path::new("/project");
1389 let output = " \n1700000000\n \n10\t5\tsrc/a.ts\n \n";
1390 let (result, _) = parse_git_log(output, root);
1391 assert_eq!(result.len(), 1);
1392 }
1393
1394 #[test]
1395 fn parse_git_log_trend_is_computed_per_file() {
1396 let root = Path::new("/project");
1397 let output = "\
13981000\n5\t1\tsrc/old.ts\n\
13992000\n3\t1\tsrc/old.ts\n\
14001000\n1\t0\tsrc/hot.ts\n\
14011800\n1\t0\tsrc/hot.ts\n\
14021900\n1\t0\tsrc/hot.ts\n\
14031950\n1\t0\tsrc/hot.ts\n\
14042000\n1\t0\tsrc/hot.ts\n";
1405 let (result, _) = parse_git_log(output, root);
1406 let old = &result[&PathBuf::from("/project/src/old.ts")];
1407 let hot = &result[&PathBuf::from("/project/src/hot.ts")];
1408 assert_eq!(old.commits, 2);
1409 assert_eq!(hot.commits, 5);
1410 assert_eq!(hot.trend, ChurnTrend::Accelerating);
1411 }
1412
1413 #[test]
1414 fn parse_git_log_weighted_decay_for_old_commits() {
1415 let root = Path::new("/project");
1416 let now = std::time::SystemTime::now()
1417 .duration_since(std::time::UNIX_EPOCH)
1418 .unwrap()
1419 .as_secs();
1420 let old_ts = now - (180 * 86_400);
1421 let output = format!("{old_ts}\n10\t5\tsrc/old.ts\n");
1422 let (result, _) = parse_git_log(&output, root);
1423 let churn = &result[&PathBuf::from("/project/src/old.ts")];
1424 assert!(
1425 churn.weighted_commits < 0.5,
1426 "180-day-old commit should weigh ~0.25, got {}",
1427 churn.weighted_commits
1428 );
1429 assert!(
1430 churn.weighted_commits > 0.1,
1431 "180-day-old commit should weigh ~0.25, got {}",
1432 churn.weighted_commits
1433 );
1434 }
1435
1436 #[test]
1437 fn parse_git_log_path_stored_as_absolute() {
1438 let root = Path::new("/my/project");
1439 let output = "1700000000\n1\t0\tlib/utils.ts\n";
1440 let (result, _) = parse_git_log(output, root);
1441 let key = PathBuf::from("/my/project/lib/utils.ts");
1442 assert!(result.contains_key(&key));
1443 assert_eq!(result[&key].path, key);
1444 }
1445
1446 #[test]
1447 fn parse_git_log_weighted_commits_rounded() {
1448 let root = Path::new("/project");
1449 let now = std::time::SystemTime::now()
1450 .duration_since(std::time::UNIX_EPOCH)
1451 .unwrap()
1452 .as_secs();
1453 let output = format!("{now}\n1\t0\tsrc/a.ts\n");
1454 let (result, _) = parse_git_log(&output, root);
1455 let churn = &result[&PathBuf::from("/project/src/a.ts")];
1456 let decimals = format!("{:.2}", churn.weighted_commits);
1457 assert_eq!(
1458 churn.weighted_commits.to_string().len(),
1459 decimals.len().min(churn.weighted_commits.to_string().len()),
1460 "weighted_commits should be rounded to at most 2 decimal places"
1461 );
1462 }
1463
1464 #[test]
1465 fn trend_serde_serialization() {
1466 assert_eq!(
1467 serde_json::to_string(&ChurnTrend::Accelerating).unwrap(),
1468 "\"accelerating\""
1469 );
1470 assert_eq!(
1471 serde_json::to_string(&ChurnTrend::Stable).unwrap(),
1472 "\"stable\""
1473 );
1474 assert_eq!(
1475 serde_json::to_string(&ChurnTrend::Cooling).unwrap(),
1476 "\"cooling\""
1477 );
1478 }
1479
1480 #[test]
1481 fn parse_git_log_extracts_author_email() {
1482 let root = Path::new("/project");
1483 let output = "1700000000|alice@example.com\n10\t5\tsrc/index.ts\n";
1484 let (result, pool) = parse_git_log(output, root);
1485 assert_eq!(pool, vec!["alice@example.com".to_string()]);
1486 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1487 assert_eq!(churn.authors.len(), 1);
1488 let alice = &churn.authors[&0];
1489 assert_eq!(alice.commits, 1);
1490 assert_eq!(alice.first_commit_ts, 1_700_000_000);
1491 assert_eq!(alice.last_commit_ts, 1_700_000_000);
1492 }
1493
1494 #[test]
1495 fn parse_git_log_intern_dedupes_authors() {
1496 let root = Path::new("/project");
1497 let output = "\
14981700000000|alice@example.com
14991\t0\ta.ts
15001700100000|bob@example.com
15012\t1\tb.ts
15021700200000|alice@example.com
15033\t2\tc.ts
1504";
1505 let (_result, pool) = parse_git_log(output, root);
1506 assert_eq!(pool.len(), 2);
1507 assert!(pool.contains(&"alice@example.com".to_string()));
1508 assert!(pool.contains(&"bob@example.com".to_string()));
1509 }
1510
1511 #[test]
1512 fn parse_git_log_aggregates_per_author() {
1513 let root = Path::new("/project");
1514 let output = "\
15151700000000|alice@example.com
15161\t0\tsrc/index.ts
15171700100000|bob@example.com
15182\t0\tsrc/index.ts
15191700200000|alice@example.com
15201\t1\tsrc/index.ts
1521";
1522 let (result, pool) = parse_git_log(output, root);
1523 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1524 assert_eq!(churn.commits, 3);
1525 assert_eq!(churn.authors.len(), 2);
1526
1527 let alice_idx =
1528 u32::try_from(pool.iter().position(|a| a == "alice@example.com").unwrap()).unwrap();
1529 let alice = &churn.authors[&alice_idx];
1530 assert_eq!(alice.commits, 2);
1531 assert_eq!(alice.first_commit_ts, 1_700_000_000);
1532 assert_eq!(alice.last_commit_ts, 1_700_200_000);
1533 }
1534
1535 #[test]
1536 fn parse_git_log_legacy_bare_timestamp_still_parses() {
1537 let root = Path::new("/project");
1538 let output = "1700000000\n10\t5\tsrc/index.ts\n";
1539 let (result, pool) = parse_git_log(output, root);
1540 assert!(pool.is_empty());
1541 let churn = &result[&PathBuf::from("/project/src/index.ts")];
1542 assert_eq!(churn.commits, 1);
1543 assert!(churn.authors.is_empty());
1544 }
1545
1546 #[test]
1547 fn intern_author_returns_existing_index() {
1548 let mut pool = Vec::new();
1549 let mut index = FxHashMap::default();
1550 let i1 = intern_author("alice@x", &mut pool, &mut index);
1551 let i2 = intern_author("alice@x", &mut pool, &mut index);
1552 assert_eq!(i1, i2);
1553 assert_eq!(pool.len(), 1);
1554 }
1555
1556 #[test]
1557 fn intern_author_assigns_sequential_indices() {
1558 let mut pool = Vec::new();
1559 let mut index = FxHashMap::default();
1560 assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1561 assert_eq!(intern_author("bob@x", &mut pool, &mut index), 1);
1562 assert_eq!(intern_author("carol@x", &mut pool, &mut index), 2);
1563 assert_eq!(intern_author("alice@x", &mut pool, &mut index), 0);
1564 }
1565
1566 fn git(root: &Path, args: &[&str]) {
1567 let status = std::process::Command::new("git")
1568 .args(args)
1569 .current_dir(root)
1570 .status()
1571 .expect("run git");
1572 assert!(status.success(), "git {args:?} failed");
1573 }
1574
1575 fn write(root: &Path, path: &str, contents: &str) {
1576 let path = root.join(path);
1577 std::fs::create_dir_all(path.parent().expect("test path has parent")).unwrap();
1578 std::fs::write(path, contents).unwrap();
1579 }
1580
1581 #[test]
1582 fn cached_churn_merges_new_commits_after_head_advances() {
1583 let repo = tempfile::tempdir().expect("create repo");
1584 let root = repo.path();
1585 git(root, &["init"]);
1586 git(root, &["config", "user.email", "churn@example.test"]);
1587 git(root, &["config", "user.name", "Churn Test"]);
1588 git(root, &["config", "commit.gpgsign", "false"]);
1589
1590 write(root, "src/a.ts", "export const a = 1;\n");
1591 git(root, &["add", "."]);
1592 git(root, &["commit", "-m", "initial"]);
1593
1594 let since = parse_since("1y").unwrap();
1595 let cache = tempfile::tempdir().expect("create cache dir");
1596 let (cold, cold_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1597 assert!(!cold_hit);
1598 let file = root.join("src/a.ts");
1599 assert_eq!(cold.files[&file].commits, 1);
1600
1601 let (_warm, warm_hit) = analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1602 assert!(warm_hit);
1603
1604 write(
1605 root,
1606 "src/a.ts",
1607 "export const a = 1;\nexport const b = 2;\n",
1608 );
1609 git(root, &["add", "."]);
1610 git(root, &["commit", "-m", "update a"]);
1611 let head = get_head_sha(root).unwrap();
1612
1613 let (incremental, incremental_hit) =
1614 analyze_churn_cached(root, &since, cache.path(), false).unwrap();
1615 assert!(incremental_hit);
1616 assert_eq!(incremental.files[&file].commits, 2);
1617
1618 let cache = load_churn_cache(cache.path(), &since.git_after).unwrap();
1619 assert_eq!(cache.last_indexed_sha, head);
1620 }
1621
1622 fn write_churn_file(dir: &std::path::Path, contents: &str) -> PathBuf {
1623 let path = dir.join("churn.json");
1624 std::fs::write(&path, contents).unwrap();
1625 path
1626 }
1627
1628 #[test]
1629 fn churn_file_happy_path() {
1630 let dir = tempfile::tempdir().unwrap();
1631 let root = Path::new("/project");
1632 let path = write_churn_file(
1633 dir.path(),
1634 r#"{
1635 "schema": "fallow-churn/v1",
1636 "events": [
1637 { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1638 { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1639 ]
1640 }"#,
1641 );
1642 let result = analyze_churn_from_file(&path, root).unwrap();
1643 let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1644 assert_eq!(churn.commits, 2);
1645 assert_eq!(churn.lines_added, 13);
1646 assert_eq!(churn.lines_deleted, 7);
1647 assert_eq!(churn.authors.len(), 2);
1648 assert!(result.author_pool.contains(&"alice@corp".to_string()));
1649 assert!(result.author_pool.contains(&"bob@corp".to_string()));
1650 assert!(!result.shallow_clone);
1651 }
1652
1653 #[test]
1654 fn churn_file_matches_git_parse() {
1655 let dir = tempfile::tempdir().unwrap();
1659 let root = Path::new("/project");
1660 let git_output = "1700000000|alice@corp\n10\t5\tsrc/a.ts\n3\t1\tsrc/b.ts\n\n1700100000|bob@corp\n3\t2\tsrc/a.ts\n";
1661 let (git_files, git_pool) = parse_git_log(git_output, root);
1662
1663 let path = write_churn_file(
1664 dir.path(),
1665 r#"{
1666 "schema": "fallow-churn/v1",
1667 "events": [
1668 { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 10, "deleted": 5 },
1669 { "path": "src/b.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 3, "deleted": 1 },
1670 { "path": "src/a.ts", "timestamp": 1700100000, "author": "bob@corp", "added": 3, "deleted": 2 }
1671 ]
1672 }"#,
1673 );
1674 let imported = analyze_churn_from_file(&path, root).unwrap();
1675
1676 assert_eq!(git_pool, imported.author_pool, "author pools diverge");
1677 assert_eq!(git_files.len(), imported.files.len());
1678 for (file, git_churn) in &git_files {
1679 let imp = &imported.files[file];
1680 assert_eq!(git_churn.commits, imp.commits, "commits for {file:?}");
1681 assert_eq!(git_churn.lines_added, imp.lines_added, "added for {file:?}");
1682 assert_eq!(
1683 git_churn.lines_deleted, imp.lines_deleted,
1684 "deleted for {file:?}"
1685 );
1686 assert_eq!(git_churn.trend, imp.trend, "trend for {file:?}");
1687 assert_eq!(
1688 git_churn.authors.len(),
1689 imp.authors.len(),
1690 "authors for {file:?}"
1691 );
1692 assert!(
1693 (git_churn.weighted_commits - imp.weighted_commits).abs() < 0.02,
1694 "weighted_commits for {file:?}: {} vs {}",
1695 git_churn.weighted_commits,
1696 imp.weighted_commits
1697 );
1698 }
1699 }
1700
1701 #[test]
1702 fn churn_file_empty_events_is_valid() {
1703 let dir = tempfile::tempdir().unwrap();
1704 let path = write_churn_file(
1705 dir.path(),
1706 r#"{ "schema": "fallow-churn/v1", "events": [] }"#,
1707 );
1708 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1709 assert!(result.files.is_empty());
1710 assert!(result.author_pool.is_empty());
1711 }
1712
1713 #[test]
1714 fn churn_file_missing_events_key_is_valid() {
1715 let dir = tempfile::tempdir().unwrap();
1716 let path = write_churn_file(dir.path(), r#"{ "schema": "fallow-churn/v1" }"#);
1717 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1718 assert!(result.files.is_empty());
1719 }
1720
1721 #[test]
1722 fn churn_file_bad_schema_rejected() {
1723 let dir = tempfile::tempdir().unwrap();
1724 let path = write_churn_file(
1725 dir.path(),
1726 r#"{ "schema": "fallow-churn/v2", "events": [] }"#,
1727 );
1728 let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1729 assert!(err.contains("expected \"fallow-churn/v1\""), "{err}");
1730 }
1731
1732 #[test]
1733 fn churn_file_malformed_json_rejected() {
1734 let dir = tempfile::tempdir().unwrap();
1735 let path = write_churn_file(dir.path(), "{ not json");
1736 assert!(analyze_churn_from_file(&path, Path::new("/project")).is_err());
1737 }
1738
1739 #[test]
1740 fn churn_file_missing_file_rejected() {
1741 let err = analyze_churn_from_file(Path::new("/no/such/churn.json"), Path::new("/project"))
1742 .unwrap_err();
1743 assert!(err.contains("failed to read churn file"), "{err}");
1744 }
1745
1746 #[test]
1747 fn churn_file_empty_path_rejected() {
1748 let dir = tempfile::tempdir().unwrap();
1749 let path = write_churn_file(
1750 dir.path(),
1751 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": " ", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1752 );
1753 let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1754 assert!(err.contains("empty path"), "{err}");
1755 }
1756
1757 #[test]
1758 fn churn_file_millisecond_timestamp_rejected() {
1759 let dir = tempfile::tempdir().unwrap();
1760 let path = write_churn_file(
1762 dir.path(),
1763 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000000, "added": 1, "deleted": 0 } ] }"#,
1764 );
1765 let err = analyze_churn_from_file(&path, Path::new("/project")).unwrap_err();
1766 assert!(err.contains("milliseconds"), "{err}");
1767 }
1768
1769 #[test]
1770 fn churn_file_missing_author_contributes_no_signal() {
1771 let dir = tempfile::tempdir().unwrap();
1772 let path = write_churn_file(
1773 dir.path(),
1774 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1775 );
1776 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1777 let churn = &result.files[&PathBuf::from("/project/src/a.ts")];
1778 assert_eq!(churn.commits, 1);
1779 assert!(churn.authors.is_empty());
1780 assert!(result.author_pool.is_empty());
1781 }
1782
1783 #[test]
1784 fn churn_file_empty_author_string_treated_as_absent() {
1785 let dir = tempfile::tempdir().unwrap();
1786 let path = write_churn_file(
1787 dir.path(),
1788 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": " ", "added": 1, "deleted": 0 } ] }"#,
1789 );
1790 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1791 assert!(result.author_pool.is_empty());
1792 }
1793
1794 #[test]
1795 fn churn_file_unknown_fields_ignored() {
1796 let dir = tempfile::tempdir().unwrap();
1799 let path = write_churn_file(
1800 dir.path(),
1801 r#"{ "schema": "fallow-churn/v1", "extra": true, "events": [ { "path": "src/a.ts", "timestamp": 1700000000, "author": "alice@corp", "added": 1, "deleted": 0, "commit": "abc123", "tz": "+0200" } ] }"#,
1802 );
1803 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1804 assert_eq!(result.files[&PathBuf::from("/project/src/a.ts")].commits, 1);
1805 }
1806
1807 #[test]
1808 fn churn_file_backslash_paths_normalized() {
1809 let dir = tempfile::tempdir().unwrap();
1810 let path = write_churn_file(
1811 dir.path(),
1812 r#"{ "schema": "fallow-churn/v1", "events": [ { "path": "src\\a.ts", "timestamp": 1700000000, "added": 1, "deleted": 0 } ] }"#,
1813 );
1814 let result = analyze_churn_from_file(&path, Path::new("/project")).unwrap();
1815 assert!(
1816 result
1817 .files
1818 .contains_key(&PathBuf::from("/project/src/a.ts"))
1819 );
1820 }
1821}