1use std::path::{Path, PathBuf};
17use std::process::Output;
18use std::sync::OnceLock;
19
20use rustc_hash::{FxHashMap, FxHashSet};
21
22use crate::duplicates::{DuplicationReport, DuplicationStats, families};
23use crate::results::AnalysisResults;
24
25pub type ChangedFilesSpawnHook = fn(&mut std::process::Command) -> std::io::Result<Output>;
32
33static SPAWN_HOOK: OnceLock<ChangedFilesSpawnHook> = OnceLock::new();
34
35pub fn set_spawn_hook(hook: ChangedFilesSpawnHook) {
42 let _ = SPAWN_HOOK.set(hook);
43}
44
45fn spawn_output(command: &mut std::process::Command) -> std::io::Result<Output> {
46 if let Some(hook) = SPAWN_HOOK.get() {
47 hook(command)
48 } else {
49 command.output()
50 }
51}
52
53pub fn validate_git_ref(s: &str) -> Result<&str, String> {
66 if s.is_empty() {
67 return Err("git ref cannot be empty".to_string());
68 }
69 if s.starts_with('-') {
70 return Err("git ref cannot start with '-'".to_string());
71 }
72 let mut in_braces = false;
73 for c in s.chars() {
74 match c {
75 '{' => in_braces = true,
76 '}' => in_braces = false,
77 ':' | ' ' if in_braces => {}
78 c if c.is_ascii_alphanumeric()
79 || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
80 _ => return Err(format!("git ref contains disallowed character: '{c}'")),
81 }
82 }
83 if in_braces {
84 return Err("git ref has unclosed '{'".to_string());
85 }
86 Ok(s)
87}
88
89#[derive(Debug)]
92pub enum ChangedFilesError {
93 InvalidRef(String),
95 GitMissing(String),
97 NotARepository,
99 GitFailed(String),
101}
102
103impl ChangedFilesError {
104 pub fn describe(&self) -> String {
108 match self {
109 Self::InvalidRef(e) => format!("invalid git ref: {e}"),
110 Self::GitMissing(e) => format!("failed to run git: {e}"),
111 Self::NotARepository => "not a git repository".to_owned(),
112 Self::GitFailed(stderr) => augment_git_failed(stderr),
113 }
114 }
115}
116
117fn augment_git_failed(stderr: &str) -> String {
123 let lower = stderr.to_ascii_lowercase();
124 if lower.contains("not a valid object name")
125 || lower.contains("unknown revision")
126 || lower.contains("ambiguous argument")
127 {
128 format!(
129 "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
130 )
131 } else {
132 stderr.to_owned()
133 }
134}
135
136pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
147 let output = spawn_output(&mut git_command(cwd, &["rev-parse", "--show-toplevel"]))
148 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
149
150 if !output.status.success() {
151 let stderr = String::from_utf8_lossy(&output.stderr);
152 return Err(if stderr.contains("not a git repository") {
153 ChangedFilesError::NotARepository
154 } else {
155 ChangedFilesError::GitFailed(stderr.trim().to_owned())
156 });
157 }
158
159 let raw = String::from_utf8_lossy(&output.stdout);
160 let trimmed = raw.trim();
161 if trimmed.is_empty() {
162 return Err(ChangedFilesError::GitFailed(
163 "git rev-parse --show-toplevel returned empty output".to_owned(),
164 ));
165 }
166
167 let path = PathBuf::from(trimmed);
168 Ok(dunce::canonicalize(&path).unwrap_or(path))
169}
170
171fn collect_git_paths(
172 cwd: &Path,
173 toplevel: &Path,
174 args: &[&str],
175) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
176 let output = spawn_output(&mut git_command(cwd, args))
177 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
178
179 if !output.status.success() {
180 let stderr = String::from_utf8_lossy(&output.stderr);
181 return Err(if stderr.contains("not a git repository") {
182 ChangedFilesError::NotARepository
183 } else {
184 ChangedFilesError::GitFailed(stderr.trim().to_owned())
185 });
186 }
187
188 #[cfg(windows)]
189 let normalise_segment = |line: &str| line.replace('/', "\\");
190 #[cfg(not(windows))]
191 let normalise_segment = |line: &str| line.to_owned();
192
193 let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
194 .lines()
195 .filter(|line| !line.is_empty())
196 .map(|line| toplevel.join(normalise_segment(line)))
197 .collect();
198
199 Ok(files)
200}
201
202fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
203 let mut command = crate::spawn::git();
204 command.args(args).current_dir(cwd);
205 command
206}
207
208pub fn try_get_changed_files(
226 root: &Path,
227 git_ref: &str,
228) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
229 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
230 let toplevel = resolve_git_toplevel(root)?;
231 try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
232}
233
234pub fn try_get_changed_files_with_toplevel(
242 cwd: &Path,
243 toplevel: &Path,
244 git_ref: &str,
245) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
246 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
247
248 let mut files = collect_git_paths(
249 cwd,
250 toplevel,
251 &[
252 "diff",
253 "--name-only",
254 "--end-of-options",
255 &format!("{git_ref}...HEAD"),
256 ],
257 )?;
258 files.extend(collect_git_paths(
259 cwd,
260 toplevel,
261 &["diff", "--name-only", "HEAD"],
262 )?);
263 files.extend(collect_git_paths(
264 cwd,
265 toplevel,
266 &["ls-files", "--full-name", "--others", "--exclude-standard"],
267 )?);
268 Ok(files)
269}
270
271#[expect(
275 clippy::print_stderr,
276 reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
277)]
278pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
279 match try_get_changed_files(root, git_ref) {
280 Ok(files) => Some(files),
281 Err(ChangedFilesError::InvalidRef(e)) => {
282 eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
283 None
284 }
285 Err(ChangedFilesError::GitMissing(e)) => {
286 eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
287 None
288 }
289 Err(ChangedFilesError::NotARepository) => {
290 eprintln!("Warning: --changed-since ignored: not a git repository");
291 None
292 }
293 Err(ChangedFilesError::GitFailed(stderr)) => {
294 eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
295 None
296 }
297 }
298}
299
300#[expect(
308 clippy::implicit_hasher,
309 reason = "fallow standardizes on FxHashSet across the workspace"
310)]
311pub fn filter_results_by_changed_files(
312 results: &mut AnalysisResults,
313 changed_files: &FxHashSet<PathBuf>,
314) {
315 let cf = normalize_changed_files_set(changed_files);
316 results
317 .unused_files
318 .retain(|f| contains_normalized(&cf, &f.file.path));
319 results
320 .unused_exports
321 .retain(|e| contains_normalized(&cf, &e.export.path));
322 results
323 .unused_types
324 .retain(|e| contains_normalized(&cf, &e.export.path));
325 results
326 .private_type_leaks
327 .retain(|e| contains_normalized(&cf, &e.leak.path));
328 results
329 .unused_enum_members
330 .retain(|m| contains_normalized(&cf, &m.member.path));
331 results
332 .unused_class_members
333 .retain(|m| contains_normalized(&cf, &m.member.path));
334 results
335 .unresolved_imports
336 .retain(|i| contains_normalized(&cf, &i.import.path));
337
338 results.unlisted_dependencies.retain(|d| {
339 d.dep
340 .imported_from
341 .iter()
342 .any(|s| contains_normalized(&cf, &s.path))
343 });
344
345 for dup in &mut results.duplicate_exports {
346 dup.export
347 .locations
348 .retain(|loc| contains_normalized(&cf, &loc.path));
349 }
350 results
351 .duplicate_exports
352 .retain(|d| d.export.locations.len() >= 2);
353
354 results
355 .circular_dependencies
356 .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
357
358 results
359 .re_export_cycles
360 .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
361
362 results
363 .boundary_violations
364 .retain(|v| contains_normalized(&cf, &v.violation.from_path));
365
366 results
367 .stale_suppressions
368 .retain(|s| contains_normalized(&cf, &s.path));
369
370 results.security_findings.retain(|f| {
371 contains_normalized(&cf, &f.path)
372 || f.trace
373 .iter()
374 .any(|hop| contains_normalized(&cf, &hop.path))
375 });
376
377 results
378 .unresolved_catalog_references
379 .retain(|r| contains_normalized(&cf, &r.reference.path));
380 results
381 .empty_catalog_groups
382 .retain(|g| normalized_set_contains_path(&cf, &g.group.path));
383
384 results
385 .unused_dependency_overrides
386 .retain(|o| contains_normalized(&cf, &o.entry.path));
387 results
388 .misconfigured_dependency_overrides
389 .retain(|o| contains_normalized(&cf, &o.entry.path));
390}
391
392fn normalize_changed_files_set(changed_files: &FxHashSet<PathBuf>) -> FxHashSet<PathBuf> {
405 changed_files
406 .iter()
407 .map(|p| dunce::simplified(p).to_path_buf())
408 .collect()
409}
410
411fn contains_normalized(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
412 normalized.contains(dunce::simplified(path))
413}
414
415fn normalized_set_contains_path(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
416 contains_normalized(normalized, path)
417 || (path.is_relative() && normalized.iter().any(|changed| changed.ends_with(path)))
418}
419
420fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
426 let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
427 let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
428 let mut duplicated_tokens = 0_usize;
429 let mut clone_instances = 0_usize;
430
431 for group in &report.clone_groups {
432 for instance in &group.instances {
433 files_with_clones.insert(&instance.file);
434 clone_instances += 1;
435 let lines = file_dup_lines.entry(&instance.file).or_default();
436 for line in instance.start_line..=instance.end_line {
437 lines.insert(line);
438 }
439 }
440 duplicated_tokens += group.token_count * group.instances.len();
441 }
442
443 let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
444
445 DuplicationStats {
446 total_files: report.stats.total_files,
447 files_with_clones: files_with_clones.len(),
448 total_lines: report.stats.total_lines,
449 duplicated_lines,
450 total_tokens: report.stats.total_tokens,
451 duplicated_tokens,
452 clone_groups: report.clone_groups.len(),
453 clone_instances,
454 #[expect(
455 clippy::cast_precision_loss,
456 reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
457 )]
458 duplication_percentage: if report.stats.total_lines > 0 {
459 (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
460 } else {
461 0.0
462 },
463 clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
464 }
465}
466
467#[expect(
472 clippy::implicit_hasher,
473 reason = "fallow standardizes on FxHashSet across the workspace"
474)]
475pub fn filter_duplication_by_changed_files(
476 report: &mut DuplicationReport,
477 changed_files: &FxHashSet<PathBuf>,
478 root: &Path,
479) {
480 let cf = normalize_changed_files_set(changed_files);
481 report.clone_groups.retain(|g| {
482 g.instances
483 .iter()
484 .any(|i| contains_normalized(&cf, &i.file))
485 });
486 report.clone_families = families::group_into_families(&report.clone_groups, root);
487 report.mirrored_directories =
488 families::detect_mirrored_directories(&report.clone_families, root);
489 report.stats = recompute_duplication_stats(report);
490}
491
492#[cfg(test)]
493mod tests {
494 use super::*;
495 use crate::duplicates::{CloneGroup, CloneInstance};
496 use crate::results::{
497 BoundaryViolation, CircularDependency, EmptyCatalogGroup, SecurityFinding,
498 SecurityFindingKind, TraceHop, TraceHopRole, UnusedExport, UnusedFile,
499 };
500 use fallow_types::output_dead_code::{
501 BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
502 UnusedExportFinding, UnusedFileFinding,
503 };
504
505 #[test]
506 fn changed_files_error_describe_variants() {
507 assert!(
508 ChangedFilesError::InvalidRef("bad".to_owned())
509 .describe()
510 .contains("invalid git ref")
511 );
512 assert!(
513 ChangedFilesError::GitMissing("oops".to_owned())
514 .describe()
515 .contains("oops")
516 );
517 assert_eq!(
518 ChangedFilesError::NotARepository.describe(),
519 "not a git repository"
520 );
521 assert!(
522 ChangedFilesError::GitFailed("bad ref".to_owned())
523 .describe()
524 .contains("bad ref")
525 );
526 }
527
528 #[test]
529 fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
530 let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
531 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
532 assert!(described.contains(stderr), "original stderr preserved");
533 assert!(
534 described.contains("shallow clone"),
535 "hint surfaced: {described}"
536 );
537 assert!(
538 described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
539 "hint actionable: {described}"
540 );
541 }
542
543 #[test]
544 fn augment_git_failed_passthrough_for_other_errors() {
545 let stderr = "fatal: refusing to merge unrelated histories";
546 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
547 assert_eq!(described, stderr);
548 }
549
550 #[test]
551 fn validate_git_ref_rejects_leading_dash() {
552 assert!(validate_git_ref("--upload-pack=evil").is_err());
553 assert!(validate_git_ref("-flag").is_err());
554 }
555
556 #[test]
557 fn validate_git_ref_accepts_baseline_tag() {
558 assert_eq!(
559 validate_git_ref("fallow-baseline").unwrap(),
560 "fallow-baseline"
561 );
562 }
563
564 #[test]
565 fn try_get_changed_files_rejects_invalid_ref() {
566 let err = try_get_changed_files(Path::new("/"), "--evil")
567 .expect_err("leading-dash ref must be rejected");
568 assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
569 assert!(err.describe().contains("cannot start with"));
570 }
571
572 #[test]
573 fn validate_git_ref_rejects_option_like_ref() {
574 assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
575 }
576
577 #[test]
578 fn validate_git_ref_allows_reflog_relative_date() {
579 assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
580 }
581
582 #[test]
583 fn try_get_changed_files_rejects_option_like_ref_before_git() {
584 let root = tempfile::tempdir().expect("create temp dir");
585 let proof_path = root.path().join("proof");
586
587 let result = try_get_changed_files(
588 root.path(),
589 &format!("--output={}", proof_path.to_string_lossy()),
590 );
591
592 assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
593 assert!(
594 !proof_path.exists(),
595 "invalid changedSince ref must not be passed through to git as an option"
596 );
597 }
598
599 #[test]
600 fn git_command_clears_parent_git_environment() {
601 let command = git_command(Path::new("."), &["status", "--short"]);
602 let overrides: Vec<_> = command.get_envs().collect();
603
604 for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
605 assert!(
606 overrides
607 .iter()
608 .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
609 "git helper must clear inherited {var}",
610 );
611 }
612 }
613
614 #[test]
615 fn filter_results_keeps_only_changed_files() {
616 let mut results = AnalysisResults::default();
617 results
618 .unused_files
619 .push(UnusedFileFinding::with_actions(UnusedFile {
620 path: "/a.ts".into(),
621 }));
622 results
623 .unused_files
624 .push(UnusedFileFinding::with_actions(UnusedFile {
625 path: "/b.ts".into(),
626 }));
627 results
628 .unused_exports
629 .push(UnusedExportFinding::with_actions(UnusedExport {
630 path: "/a.ts".into(),
631 export_name: "foo".into(),
632 is_type_only: false,
633 line: 1,
634 col: 0,
635 span_start: 0,
636 is_re_export: false,
637 }));
638
639 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
640 changed.insert("/a.ts".into());
641
642 filter_results_by_changed_files(&mut results, &changed);
643
644 assert_eq!(results.unused_files.len(), 1);
645 assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
646 assert_eq!(results.unused_exports.len(), 1);
647 }
648
649 #[test]
650 fn filter_results_preserves_dependency_level_issues() {
651 let mut results = AnalysisResults::default();
652 results.unused_dependencies.push(
653 fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
654 crate::results::UnusedDependency {
655 package_name: "lodash".into(),
656 location: crate::results::DependencyLocation::Dependencies,
657 path: "/pkg.json".into(),
658 line: 3,
659 used_in_workspaces: Vec::new(),
660 },
661 ),
662 );
663
664 let changed: FxHashSet<PathBuf> = FxHashSet::default();
665 filter_results_by_changed_files(&mut results, &changed);
666
667 assert_eq!(results.unused_dependencies.len(), 1);
668 }
669
670 #[test]
671 fn filter_results_keeps_circular_dep_when_any_file_changed() {
672 let mut results = AnalysisResults::default();
673 results
674 .circular_dependencies
675 .push(CircularDependencyFinding::with_actions(
676 CircularDependency {
677 files: vec!["/a.ts".into(), "/b.ts".into()],
678 length: 2,
679 line: 1,
680 col: 0,
681 is_cross_package: false,
682 },
683 ));
684
685 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
686 changed.insert("/b.ts".into());
687
688 filter_results_by_changed_files(&mut results, &changed);
689 assert_eq!(results.circular_dependencies.len(), 1);
690 }
691
692 #[test]
693 fn filter_results_drops_circular_dep_when_no_file_changed() {
694 let mut results = AnalysisResults::default();
695 results
696 .circular_dependencies
697 .push(CircularDependencyFinding::with_actions(
698 CircularDependency {
699 files: vec!["/a.ts".into(), "/b.ts".into()],
700 length: 2,
701 line: 1,
702 col: 0,
703 is_cross_package: false,
704 },
705 ));
706
707 let changed: FxHashSet<PathBuf> = FxHashSet::default();
708 filter_results_by_changed_files(&mut results, &changed);
709 assert!(results.circular_dependencies.is_empty());
710 }
711
712 #[test]
713 fn filter_results_drops_boundary_violation_when_importer_unchanged() {
714 let mut results = AnalysisResults::default();
715 results
716 .boundary_violations
717 .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
718 from_path: "/a.ts".into(),
719 to_path: "/b.ts".into(),
720 from_zone: "ui".into(),
721 to_zone: "data".into(),
722 import_specifier: "../data/db".into(),
723 line: 1,
724 col: 0,
725 }));
726
727 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
728 changed.insert("/b.ts".into());
729
730 filter_results_by_changed_files(&mut results, &changed);
731 assert!(results.boundary_violations.is_empty());
732 }
733
734 #[test]
735 fn filter_results_keeps_security_finding_when_trace_file_changed() {
736 let mut results = AnalysisResults::default();
737 results.security_findings.push(SecurityFinding {
738 kind: SecurityFindingKind::ClientServerLeak,
739 category: None,
740 cwe: None,
741 path: "/project/src/client.tsx".into(),
742 line: 2,
743 col: 0,
744 evidence: "candidate".into(),
745 source_backed: false,
746 trace: vec![
747 TraceHop {
748 path: "/project/src/client.tsx".into(),
749 line: 2,
750 col: 0,
751 role: TraceHopRole::ClientBoundary,
752 },
753 TraceHop {
754 path: "/project/src/server.ts".into(),
755 line: 1,
756 col: 0,
757 role: TraceHopRole::SecretSource,
758 },
759 ],
760 actions: Vec::new(),
761 reachability: None,
762 });
763
764 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
765 changed.insert("/project/src/server.ts".into());
766
767 filter_results_by_changed_files(&mut results, &changed);
768
769 assert_eq!(results.security_findings.len(), 1);
770 }
771
772 #[test]
773 fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
774 let mut results = AnalysisResults::default();
775 results
776 .empty_catalog_groups
777 .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
778 catalog_name: "legacy".into(),
779 path: PathBuf::from("pnpm-workspace.yaml"),
780 line: 4,
781 }));
782
783 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
784 changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
785
786 filter_results_by_changed_files(&mut results, &changed);
787
788 assert_eq!(results.empty_catalog_groups.len(), 1);
789 assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
790 }
791
792 #[test]
793 fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
794 let mut report = DuplicationReport {
795 clone_groups: vec![CloneGroup {
796 instances: vec![
797 CloneInstance {
798 file: "/a.ts".into(),
799 start_line: 1,
800 end_line: 5,
801 start_col: 0,
802 end_col: 10,
803 fragment: "code".into(),
804 },
805 CloneInstance {
806 file: "/b.ts".into(),
807 start_line: 1,
808 end_line: 5,
809 start_col: 0,
810 end_col: 10,
811 fragment: "code".into(),
812 },
813 ],
814 token_count: 20,
815 line_count: 5,
816 }],
817 clone_families: vec![],
818 mirrored_directories: vec![],
819 stats: DuplicationStats {
820 total_files: 2,
821 files_with_clones: 2,
822 total_lines: 100,
823 duplicated_lines: 10,
824 total_tokens: 200,
825 duplicated_tokens: 40,
826 clone_groups: 1,
827 clone_instances: 2,
828 duplication_percentage: 10.0,
829 clone_groups_below_min_occurrences: 0,
830 },
831 };
832
833 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
834 changed.insert("/a.ts".into());
835
836 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
837 assert_eq!(report.clone_groups.len(), 1);
838 assert_eq!(report.stats.clone_groups, 1);
839 assert_eq!(report.stats.clone_instances, 2);
840 }
841
842 #[cfg(windows)]
850 #[test]
851 fn filter_duplication_normalises_verbatim_prefix_mismatch() {
852 let mut report = DuplicationReport {
853 clone_groups: vec![CloneGroup {
854 instances: vec![
855 CloneInstance {
856 file: PathBuf::from(r"\\?\C:\repo\src\changed.ts"),
857 start_line: 1,
858 end_line: 5,
859 start_col: 0,
860 end_col: 10,
861 fragment: "code".into(),
862 },
863 CloneInstance {
864 file: PathBuf::from(r"\\?\C:\repo\src\focused-copy.ts"),
865 start_line: 1,
866 end_line: 5,
867 start_col: 0,
868 end_col: 10,
869 fragment: "code".into(),
870 },
871 ],
872 token_count: 20,
873 line_count: 5,
874 }],
875 clone_families: vec![],
876 mirrored_directories: vec![],
877 stats: DuplicationStats {
878 total_files: 2,
879 files_with_clones: 2,
880 total_lines: 100,
881 duplicated_lines: 10,
882 total_tokens: 200,
883 duplicated_tokens: 40,
884 clone_groups: 1,
885 clone_instances: 2,
886 duplication_percentage: 10.0,
887 clone_groups_below_min_occurrences: 0,
888 },
889 };
890
891 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
892 changed.insert(PathBuf::from(r"C:\repo\src\changed.ts"));
893
894 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
895 assert_eq!(
896 report.clone_groups.len(),
897 1,
898 "verbatim instance path must match non-verbatim changed-file entry"
899 );
900 }
901
902 #[cfg(windows)]
903 #[test]
904 fn filter_results_normalises_verbatim_prefix_mismatch() {
905 let mut results = AnalysisResults::default();
906 results
907 .unused_exports
908 .push(UnusedExportFinding::with_actions(UnusedExport {
909 path: PathBuf::from(r"\\?\C:\repo\src\a.ts"),
910 export_name: "foo".into(),
911 is_type_only: false,
912 line: 1,
913 col: 0,
914 span_start: 0,
915 is_re_export: false,
916 }));
917
918 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
919 changed.insert(PathBuf::from(r"C:\repo\src\a.ts"));
920
921 filter_results_by_changed_files(&mut results, &changed);
922 assert_eq!(
923 results.unused_exports.len(),
924 1,
925 "verbatim finding path must match non-verbatim changed-file entry"
926 );
927 }
928
929 fn init_repo(repo: &Path) -> PathBuf {
941 run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
942 run_git(repo, &["config", "user.email", "test@example.com"]);
943 run_git(repo, &["config", "user.name", "test"]);
944 run_git(repo, &["config", "commit.gpgsign", "false"]);
945 std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
946 run_git(repo, &["add", "seed.txt"]);
947 run_git(repo, &["commit", "--quiet", "-m", "initial"]);
948 run_git(repo, &["tag", "fallow-baseline"]);
949 dunce::canonicalize(repo).unwrap()
950 }
951
952 fn run_git(cwd: &Path, args: &[&str]) {
953 let output = std::process::Command::new("git")
954 .args(args)
955 .current_dir(cwd)
956 .output()
957 .expect("git available");
958 assert!(
959 output.status.success(),
960 "git {args:?} failed: {}",
961 String::from_utf8_lossy(&output.stderr)
962 );
963 }
964
965 #[test]
968 fn try_get_changed_files_workspace_at_repo_root() {
969 let tmp = tempfile::tempdir().unwrap();
970 let repo = init_repo(tmp.path());
971 std::fs::create_dir_all(repo.join("src")).unwrap();
972 std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
973
974 let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
975
976 let expected = repo.join("src/new.ts");
977 assert!(
978 changed.contains(&expected),
979 "changed set should contain {expected:?}; actual: {changed:?}"
980 );
981 }
982
983 #[test]
991 fn try_get_changed_files_workspace_in_subdirectory() {
992 let tmp = tempfile::tempdir().unwrap();
993 let repo = init_repo(tmp.path());
994 let frontend = repo.join("frontend");
995 std::fs::create_dir_all(frontend.join("src")).unwrap();
996 std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
997
998 let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
999
1000 let expected = repo.join("frontend/src/new.ts");
1001 assert!(
1002 changed.contains(&expected),
1003 "changed set should contain canonical {expected:?}; actual: {changed:?}"
1004 );
1005 let bogus = frontend.join("frontend/src/new.ts");
1006 assert!(
1007 !changed.contains(&bogus),
1008 "changed set must not contain double-frontend path {bogus:?}"
1009 );
1010 }
1011
1012 #[test]
1027 fn try_get_changed_files_includes_committed_sibling_changes() {
1028 let tmp = tempfile::tempdir().unwrap();
1029 let repo = init_repo(tmp.path());
1030 let backend = repo.join("backend");
1031 std::fs::create_dir_all(&backend).unwrap();
1032 std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
1033 run_git(&repo, &["add", "."]);
1034 run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
1035
1036 let frontend = repo.join("frontend");
1037 std::fs::create_dir_all(&frontend).unwrap();
1038
1039 let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1040
1041 let expected = repo.join("backend/server.py");
1042 assert!(
1043 changed.contains(&expected),
1044 "committed sibling backend/server.py should be in the set: {changed:?}"
1045 );
1046 }
1047
1048 #[test]
1052 fn try_get_changed_files_includes_modified_tracked_file() {
1053 let tmp = tempfile::tempdir().unwrap();
1054 let repo = init_repo(tmp.path());
1055 let frontend = repo.join("frontend");
1056 std::fs::create_dir_all(frontend.join("src")).unwrap();
1057 std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
1058 run_git(&repo, &["add", "."]);
1059 run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
1060 run_git(&repo, &["tag", "fallow-baseline-v2"]);
1061 std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
1062
1063 let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
1064
1065 let expected = repo.join("frontend/src/old.ts");
1066 assert!(
1067 changed.contains(&expected),
1068 "modified tracked file {expected:?} missing from set: {changed:?}"
1069 );
1070 }
1071
1072 #[test]
1078 fn resolve_git_toplevel_returns_canonical_path() {
1079 let tmp = tempfile::tempdir().unwrap();
1080 let repo = init_repo(tmp.path());
1081 let frontend = repo.join("frontend");
1082 std::fs::create_dir_all(&frontend).unwrap();
1083
1084 let toplevel = resolve_git_toplevel(&frontend).unwrap();
1085 assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
1086 assert_eq!(
1087 toplevel,
1088 dunce::canonicalize(&toplevel).unwrap(),
1089 "resolved toplevel should already be canonical"
1090 );
1091 }
1092
1093 #[test]
1097 fn resolve_git_toplevel_not_a_repository() {
1098 let tmp = tempfile::tempdir().unwrap();
1099 let result = resolve_git_toplevel(tmp.path());
1100 assert!(
1101 matches!(result, Err(ChangedFilesError::NotARepository)),
1102 "expected NotARepository, got {result:?}"
1103 );
1104 }
1105
1106 #[test]
1109 fn try_get_changed_files_not_a_repository() {
1110 let tmp = tempfile::tempdir().unwrap();
1111 let result = try_get_changed_files(tmp.path(), "main");
1112 assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
1113 }
1114
1115 #[test]
1116 fn filter_duplication_drops_groups_with_no_changed_instance() {
1117 let mut report = DuplicationReport {
1118 clone_groups: vec![CloneGroup {
1119 instances: vec![CloneInstance {
1120 file: "/a.ts".into(),
1121 start_line: 1,
1122 end_line: 5,
1123 start_col: 0,
1124 end_col: 10,
1125 fragment: "code".into(),
1126 }],
1127 token_count: 20,
1128 line_count: 5,
1129 }],
1130 clone_families: vec![],
1131 mirrored_directories: vec![],
1132 stats: DuplicationStats {
1133 total_files: 1,
1134 files_with_clones: 1,
1135 total_lines: 100,
1136 duplicated_lines: 5,
1137 total_tokens: 100,
1138 duplicated_tokens: 20,
1139 clone_groups: 1,
1140 clone_instances: 1,
1141 duplication_percentage: 5.0,
1142 clone_groups_below_min_occurrences: 0,
1143 },
1144 };
1145
1146 let changed: FxHashSet<PathBuf> = FxHashSet::default();
1147 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1148 assert!(report.clone_groups.is_empty());
1149 assert_eq!(report.stats.clone_groups, 0);
1150 assert_eq!(report.stats.clone_instances, 0);
1151 assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
1152 }
1153}