1use std::path::{Path, PathBuf};
17use std::process::Output;
18use std::sync::OnceLock;
19
20use rustc_hash::{FxHashMap, FxHashSet};
21
22use crate::duplicates::{DuplicationReport, DuplicationStats, families};
23use crate::results::AnalysisResults;
24
25pub type ChangedFilesSpawnHook = fn(&mut std::process::Command) -> std::io::Result<Output>;
32
33static SPAWN_HOOK: OnceLock<ChangedFilesSpawnHook> = OnceLock::new();
34
35pub fn set_spawn_hook(hook: ChangedFilesSpawnHook) {
42 let _ = SPAWN_HOOK.set(hook);
43}
44
45fn spawn_output(command: &mut std::process::Command) -> std::io::Result<Output> {
46 if let Some(hook) = SPAWN_HOOK.get() {
47 hook(command)
48 } else {
49 command.output()
50 }
51}
52
53pub fn validate_git_ref(s: &str) -> Result<&str, String> {
66 if s.is_empty() {
67 return Err("git ref cannot be empty".to_string());
68 }
69 if s.starts_with('-') {
70 return Err("git ref cannot start with '-'".to_string());
71 }
72 let mut in_braces = false;
73 for c in s.chars() {
74 match c {
75 '{' => in_braces = true,
76 '}' => in_braces = false,
77 ':' | ' ' if in_braces => {}
78 c if c.is_ascii_alphanumeric()
79 || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
80 _ => return Err(format!("git ref contains disallowed character: '{c}'")),
81 }
82 }
83 if in_braces {
84 return Err("git ref has unclosed '{'".to_string());
85 }
86 Ok(s)
87}
88
89#[derive(Debug)]
92pub enum ChangedFilesError {
93 InvalidRef(String),
95 GitMissing(String),
97 NotARepository,
99 GitFailed(String),
101}
102
103impl ChangedFilesError {
104 pub fn describe(&self) -> String {
108 match self {
109 Self::InvalidRef(e) => format!("invalid git ref: {e}"),
110 Self::GitMissing(e) => format!("failed to run git: {e}"),
111 Self::NotARepository => "not a git repository".to_owned(),
112 Self::GitFailed(stderr) => augment_git_failed(stderr),
113 }
114 }
115}
116
117fn augment_git_failed(stderr: &str) -> String {
123 let lower = stderr.to_ascii_lowercase();
124 if lower.contains("not a valid object name")
125 || lower.contains("unknown revision")
126 || lower.contains("ambiguous argument")
127 {
128 format!(
129 "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
130 )
131 } else {
132 stderr.to_owned()
133 }
134}
135
136pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
147 let output = spawn_output(&mut git_command(cwd, &["rev-parse", "--show-toplevel"]))
148 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
149
150 if !output.status.success() {
151 let stderr = String::from_utf8_lossy(&output.stderr);
152 return Err(if stderr.contains("not a git repository") {
153 ChangedFilesError::NotARepository
154 } else {
155 ChangedFilesError::GitFailed(stderr.trim().to_owned())
156 });
157 }
158
159 let raw = String::from_utf8_lossy(&output.stdout);
160 let trimmed = raw.trim();
161 if trimmed.is_empty() {
162 return Err(ChangedFilesError::GitFailed(
163 "git rev-parse --show-toplevel returned empty output".to_owned(),
164 ));
165 }
166
167 let path = PathBuf::from(trimmed);
168 Ok(dunce::canonicalize(&path).unwrap_or(path))
169}
170
171fn collect_git_paths(
172 cwd: &Path,
173 toplevel: &Path,
174 args: &[&str],
175) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
176 let output = spawn_output(&mut git_command(cwd, args))
177 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
178
179 if !output.status.success() {
180 let stderr = String::from_utf8_lossy(&output.stderr);
181 return Err(if stderr.contains("not a git repository") {
182 ChangedFilesError::NotARepository
183 } else {
184 ChangedFilesError::GitFailed(stderr.trim().to_owned())
185 });
186 }
187
188 #[cfg(windows)]
189 let normalise_segment = |line: &str| line.replace('/', "\\");
190 #[cfg(not(windows))]
191 let normalise_segment = |line: &str| line.to_owned();
192
193 let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
194 .lines()
195 .filter(|line| !line.is_empty())
196 .map(|line| toplevel.join(normalise_segment(line)))
197 .collect();
198
199 Ok(files)
200}
201
202fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
203 let mut command = crate::spawn::git();
204 command.args(args).current_dir(cwd);
205 command
206}
207
208pub fn try_get_changed_files(
226 root: &Path,
227 git_ref: &str,
228) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
229 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
230 let toplevel = resolve_git_toplevel(root)?;
231 try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
232}
233
234pub fn try_get_changed_files_with_toplevel(
242 cwd: &Path,
243 toplevel: &Path,
244 git_ref: &str,
245) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
246 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
247
248 let mut files = collect_git_paths(
249 cwd,
250 toplevel,
251 &[
252 "diff",
253 "--name-only",
254 "--end-of-options",
255 &format!("{git_ref}...HEAD"),
256 ],
257 )?;
258 files.extend(collect_git_paths(
259 cwd,
260 toplevel,
261 &["diff", "--name-only", "HEAD"],
262 )?);
263 files.extend(collect_git_paths(
264 cwd,
265 toplevel,
266 &["ls-files", "--full-name", "--others", "--exclude-standard"],
267 )?);
268 Ok(files)
269}
270
271pub fn try_get_changed_diff(root: &Path, git_ref: &str) -> Result<String, ChangedFilesError> {
287 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
288 let output = spawn_output(&mut git_command(
289 root,
290 &[
291 "diff",
292 "--relative",
293 "--unified=0",
294 "--end-of-options",
295 &format!("{git_ref}...HEAD"),
296 ],
297 ))
298 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
299
300 if !output.status.success() {
301 let stderr = String::from_utf8_lossy(&output.stderr);
302 return Err(if stderr.contains("not a git repository") {
303 ChangedFilesError::NotARepository
304 } else {
305 ChangedFilesError::GitFailed(stderr.trim().to_owned())
306 });
307 }
308
309 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
310}
311
312#[expect(
316 clippy::print_stderr,
317 reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
318)]
319pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
320 match try_get_changed_files(root, git_ref) {
321 Ok(files) => Some(files),
322 Err(ChangedFilesError::InvalidRef(e)) => {
323 eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
324 None
325 }
326 Err(ChangedFilesError::GitMissing(e)) => {
327 eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
328 None
329 }
330 Err(ChangedFilesError::NotARepository) => {
331 eprintln!("Warning: --changed-since ignored: not a git repository");
332 None
333 }
334 Err(ChangedFilesError::GitFailed(stderr)) => {
335 eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
336 None
337 }
338 }
339}
340
341#[expect(
349 clippy::implicit_hasher,
350 reason = "fallow standardizes on FxHashSet across the workspace"
351)]
352pub fn filter_results_by_changed_files(
353 results: &mut AnalysisResults,
354 changed_files: &FxHashSet<PathBuf>,
355) {
356 let cf = normalize_changed_files_set(changed_files);
357 results
358 .unused_files
359 .retain(|f| contains_normalized(&cf, &f.file.path));
360 results
361 .unused_exports
362 .retain(|e| contains_normalized(&cf, &e.export.path));
363 results
364 .unused_types
365 .retain(|e| contains_normalized(&cf, &e.export.path));
366 results
367 .private_type_leaks
368 .retain(|e| contains_normalized(&cf, &e.leak.path));
369 results
370 .unused_enum_members
371 .retain(|m| contains_normalized(&cf, &m.member.path));
372 results
373 .unused_class_members
374 .retain(|m| contains_normalized(&cf, &m.member.path));
375 results
376 .unresolved_imports
377 .retain(|i| contains_normalized(&cf, &i.import.path));
378
379 results.unlisted_dependencies.retain(|d| {
380 d.dep
381 .imported_from
382 .iter()
383 .any(|s| contains_normalized(&cf, &s.path))
384 });
385
386 for dup in &mut results.duplicate_exports {
387 dup.export
388 .locations
389 .retain(|loc| contains_normalized(&cf, &loc.path));
390 }
391 results
392 .duplicate_exports
393 .retain(|d| d.export.locations.len() >= 2);
394
395 results
396 .circular_dependencies
397 .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
398
399 results
400 .re_export_cycles
401 .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
402
403 results
404 .boundary_violations
405 .retain(|v| contains_normalized(&cf, &v.violation.from_path));
406
407 results
408 .stale_suppressions
409 .retain(|s| contains_normalized(&cf, &s.path));
410
411 results.security_findings.retain(|f| {
412 contains_normalized(&cf, &f.path)
413 || f.trace
414 .iter()
415 .any(|hop| contains_normalized(&cf, &hop.path))
416 || f.reachability.as_ref().is_some_and(|reachability| {
417 reachability
418 .untrusted_source_trace
419 .iter()
420 .any(|hop| contains_normalized(&cf, &hop.path))
421 })
422 });
423
424 results
425 .unresolved_catalog_references
426 .retain(|r| contains_normalized(&cf, &r.reference.path));
427 results
428 .empty_catalog_groups
429 .retain(|g| normalized_set_contains_path(&cf, &g.group.path));
430
431 results
432 .unused_dependency_overrides
433 .retain(|o| contains_normalized(&cf, &o.entry.path));
434 results
435 .misconfigured_dependency_overrides
436 .retain(|o| contains_normalized(&cf, &o.entry.path));
437}
438
439fn normalize_changed_files_set(changed_files: &FxHashSet<PathBuf>) -> FxHashSet<PathBuf> {
452 changed_files
453 .iter()
454 .map(|p| dunce::simplified(p).to_path_buf())
455 .collect()
456}
457
458fn contains_normalized(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
459 normalized.contains(dunce::simplified(path))
460}
461
462fn normalized_set_contains_path(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
463 contains_normalized(normalized, path)
464 || (path.is_relative() && normalized.iter().any(|changed| changed.ends_with(path)))
465}
466
467fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
473 let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
474 let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
475 let mut duplicated_tokens = 0_usize;
476 let mut clone_instances = 0_usize;
477
478 for group in &report.clone_groups {
479 for instance in &group.instances {
480 files_with_clones.insert(&instance.file);
481 clone_instances += 1;
482 let lines = file_dup_lines.entry(&instance.file).or_default();
483 for line in instance.start_line..=instance.end_line {
484 lines.insert(line);
485 }
486 }
487 duplicated_tokens += group.token_count * group.instances.len();
488 }
489
490 let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
491
492 DuplicationStats {
493 total_files: report.stats.total_files,
494 files_with_clones: files_with_clones.len(),
495 total_lines: report.stats.total_lines,
496 duplicated_lines,
497 total_tokens: report.stats.total_tokens,
498 duplicated_tokens,
499 clone_groups: report.clone_groups.len(),
500 clone_instances,
501 #[expect(
502 clippy::cast_precision_loss,
503 reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
504 )]
505 duplication_percentage: if report.stats.total_lines > 0 {
506 (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
507 } else {
508 0.0
509 },
510 clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
511 }
512}
513
514#[expect(
519 clippy::implicit_hasher,
520 reason = "fallow standardizes on FxHashSet across the workspace"
521)]
522pub fn filter_duplication_by_changed_files(
523 report: &mut DuplicationReport,
524 changed_files: &FxHashSet<PathBuf>,
525 root: &Path,
526) {
527 let cf = normalize_changed_files_set(changed_files);
528 report.clone_groups.retain(|g| {
529 g.instances
530 .iter()
531 .any(|i| contains_normalized(&cf, &i.file))
532 });
533 report.clone_families = families::group_into_families(&report.clone_groups, root);
534 report.mirrored_directories =
535 families::detect_mirrored_directories(&report.clone_families, root);
536 report.stats = recompute_duplication_stats(report);
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542 use crate::duplicates::{CloneGroup, CloneInstance};
543 use crate::results::{
544 BoundaryViolation, CircularDependency, EmptyCatalogGroup, SecurityFinding,
545 SecurityFindingKind, TraceHop, TraceHopRole, UnusedExport, UnusedFile,
546 };
547 use fallow_types::output_dead_code::{
548 BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
549 UnusedExportFinding, UnusedFileFinding,
550 };
551 use fallow_types::results::SecurityReachability;
552
553 #[test]
554 fn changed_files_error_describe_variants() {
555 assert!(
556 ChangedFilesError::InvalidRef("bad".to_owned())
557 .describe()
558 .contains("invalid git ref")
559 );
560 assert!(
561 ChangedFilesError::GitMissing("oops".to_owned())
562 .describe()
563 .contains("oops")
564 );
565 assert_eq!(
566 ChangedFilesError::NotARepository.describe(),
567 "not a git repository"
568 );
569 assert!(
570 ChangedFilesError::GitFailed("bad ref".to_owned())
571 .describe()
572 .contains("bad ref")
573 );
574 }
575
576 #[test]
577 fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
578 let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
579 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
580 assert!(described.contains(stderr), "original stderr preserved");
581 assert!(
582 described.contains("shallow clone"),
583 "hint surfaced: {described}"
584 );
585 assert!(
586 described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
587 "hint actionable: {described}"
588 );
589 }
590
591 #[test]
592 fn augment_git_failed_passthrough_for_other_errors() {
593 let stderr = "fatal: refusing to merge unrelated histories";
594 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
595 assert_eq!(described, stderr);
596 }
597
598 #[test]
599 fn validate_git_ref_rejects_leading_dash() {
600 assert!(validate_git_ref("--upload-pack=evil").is_err());
601 assert!(validate_git_ref("-flag").is_err());
602 }
603
604 #[test]
605 fn validate_git_ref_accepts_baseline_tag() {
606 assert_eq!(
607 validate_git_ref("fallow-baseline").unwrap(),
608 "fallow-baseline"
609 );
610 }
611
612 #[test]
613 fn try_get_changed_files_rejects_invalid_ref() {
614 let err = try_get_changed_files(Path::new("/"), "--evil")
615 .expect_err("leading-dash ref must be rejected");
616 assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
617 assert!(err.describe().contains("cannot start with"));
618 }
619
620 #[test]
621 fn validate_git_ref_rejects_option_like_ref() {
622 assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
623 }
624
625 #[test]
626 fn validate_git_ref_allows_reflog_relative_date() {
627 assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
628 }
629
630 #[test]
631 fn try_get_changed_files_rejects_option_like_ref_before_git() {
632 let root = tempfile::tempdir().expect("create temp dir");
633 let proof_path = root.path().join("proof");
634
635 let result = try_get_changed_files(
636 root.path(),
637 &format!("--output={}", proof_path.to_string_lossy()),
638 );
639
640 assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
641 assert!(
642 !proof_path.exists(),
643 "invalid changedSince ref must not be passed through to git as an option"
644 );
645 }
646
647 #[test]
648 fn git_command_clears_parent_git_environment() {
649 let command = git_command(Path::new("."), &["status", "--short"]);
650 let overrides: Vec<_> = command.get_envs().collect();
651
652 for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
653 assert!(
654 overrides
655 .iter()
656 .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
657 "git helper must clear inherited {var}",
658 );
659 }
660 }
661
662 #[test]
663 fn filter_results_keeps_only_changed_files() {
664 let mut results = AnalysisResults::default();
665 results
666 .unused_files
667 .push(UnusedFileFinding::with_actions(UnusedFile {
668 path: "/a.ts".into(),
669 }));
670 results
671 .unused_files
672 .push(UnusedFileFinding::with_actions(UnusedFile {
673 path: "/b.ts".into(),
674 }));
675 results
676 .unused_exports
677 .push(UnusedExportFinding::with_actions(UnusedExport {
678 path: "/a.ts".into(),
679 export_name: "foo".into(),
680 is_type_only: false,
681 line: 1,
682 col: 0,
683 span_start: 0,
684 is_re_export: false,
685 }));
686
687 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
688 changed.insert("/a.ts".into());
689
690 filter_results_by_changed_files(&mut results, &changed);
691
692 assert_eq!(results.unused_files.len(), 1);
693 assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
694 assert_eq!(results.unused_exports.len(), 1);
695 }
696
697 #[test]
698 fn filter_results_preserves_dependency_level_issues() {
699 let mut results = AnalysisResults::default();
700 results.unused_dependencies.push(
701 fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
702 crate::results::UnusedDependency {
703 package_name: "lodash".into(),
704 location: crate::results::DependencyLocation::Dependencies,
705 path: "/pkg.json".into(),
706 line: 3,
707 used_in_workspaces: Vec::new(),
708 },
709 ),
710 );
711
712 let changed: FxHashSet<PathBuf> = FxHashSet::default();
713 filter_results_by_changed_files(&mut results, &changed);
714
715 assert_eq!(results.unused_dependencies.len(), 1);
716 }
717
718 #[test]
719 fn filter_results_keeps_circular_dep_when_any_file_changed() {
720 let mut results = AnalysisResults::default();
721 results
722 .circular_dependencies
723 .push(CircularDependencyFinding::with_actions(
724 CircularDependency {
725 files: vec!["/a.ts".into(), "/b.ts".into()],
726 length: 2,
727 line: 1,
728 col: 0,
729 edges: Vec::new(),
730 is_cross_package: false,
731 },
732 ));
733
734 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
735 changed.insert("/b.ts".into());
736
737 filter_results_by_changed_files(&mut results, &changed);
738 assert_eq!(results.circular_dependencies.len(), 1);
739 }
740
741 #[test]
742 fn filter_results_drops_circular_dep_when_no_file_changed() {
743 let mut results = AnalysisResults::default();
744 results
745 .circular_dependencies
746 .push(CircularDependencyFinding::with_actions(
747 CircularDependency {
748 files: vec!["/a.ts".into(), "/b.ts".into()],
749 length: 2,
750 line: 1,
751 col: 0,
752 edges: Vec::new(),
753 is_cross_package: false,
754 },
755 ));
756
757 let changed: FxHashSet<PathBuf> = FxHashSet::default();
758 filter_results_by_changed_files(&mut results, &changed);
759 assert!(results.circular_dependencies.is_empty());
760 }
761
762 #[test]
763 fn filter_results_drops_boundary_violation_when_importer_unchanged() {
764 let mut results = AnalysisResults::default();
765 results
766 .boundary_violations
767 .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
768 from_path: "/a.ts".into(),
769 to_path: "/b.ts".into(),
770 from_zone: "ui".into(),
771 to_zone: "data".into(),
772 import_specifier: "../data/db".into(),
773 line: 1,
774 col: 0,
775 }));
776
777 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
778 changed.insert("/b.ts".into());
779
780 filter_results_by_changed_files(&mut results, &changed);
781 assert!(results.boundary_violations.is_empty());
782 }
783
784 #[test]
785 fn filter_results_keeps_security_finding_when_trace_file_changed() {
786 let mut results = AnalysisResults::default();
787 results.security_findings.push(SecurityFinding {
788 finding_id: String::new(),
789 candidate: fallow_types::results::SecurityCandidate::default(),
790 taint_flow: None,
791 attack_surface: None,
792 kind: SecurityFindingKind::ClientServerLeak,
793 category: None,
794 cwe: None,
795 path: "/project/src/client.tsx".into(),
796 line: 2,
797 col: 0,
798 evidence: "candidate".into(),
799 source_backed: false,
800 trace: vec![
801 TraceHop {
802 path: "/project/src/client.tsx".into(),
803 line: 2,
804 col: 0,
805 role: TraceHopRole::ClientBoundary,
806 },
807 TraceHop {
808 path: "/project/src/server.ts".into(),
809 line: 1,
810 col: 0,
811 role: TraceHopRole::SecretSource,
812 },
813 ],
814 actions: Vec::new(),
815 dead_code: None,
816 reachability: None,
817 runtime: None,
818 });
819
820 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
821 changed.insert("/project/src/server.ts".into());
822
823 filter_results_by_changed_files(&mut results, &changed);
824
825 assert_eq!(results.security_findings.len(), 1);
826 }
827
828 #[test]
829 fn filter_results_keeps_security_finding_when_untrusted_source_trace_file_changed() {
830 let mut results = AnalysisResults::default();
831 results.security_findings.push(SecurityFinding {
832 finding_id: String::new(),
833 candidate: fallow_types::results::SecurityCandidate::default(),
834 taint_flow: None,
835 attack_surface: None,
836 kind: SecurityFindingKind::TaintedSink,
837 category: Some("command-injection".into()),
838 cwe: Some(78),
839 path: "/project/src/runner.ts".into(),
840 line: 4,
841 col: 2,
842 evidence: "candidate".into(),
843 source_backed: false,
844 trace: Vec::new(),
845 actions: Vec::new(),
846 dead_code: None,
847 reachability: Some(SecurityReachability {
848 reachable_from_entry: false,
849 reachable_from_untrusted_source: true,
850 untrusted_source_hop_count: Some(1),
851 untrusted_source_trace: vec![
852 TraceHop {
853 path: "/project/src/route.ts".into(),
854 line: 1,
855 col: 0,
856 role: TraceHopRole::UntrustedSource,
857 },
858 TraceHop {
859 path: "/project/src/runner.ts".into(),
860 line: 4,
861 col: 2,
862 role: TraceHopRole::Sink,
863 },
864 ],
865 blast_radius: 0,
866 crosses_boundary: false,
867 }),
868 runtime: None,
869 });
870
871 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
872 changed.insert("/project/src/route.ts".into());
873
874 filter_results_by_changed_files(&mut results, &changed);
875
876 assert_eq!(results.security_findings.len(), 1);
877 }
878
879 #[test]
880 fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
881 let mut results = AnalysisResults::default();
882 results
883 .empty_catalog_groups
884 .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
885 catalog_name: "legacy".into(),
886 path: PathBuf::from("pnpm-workspace.yaml"),
887 line: 4,
888 }));
889
890 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
891 changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
892
893 filter_results_by_changed_files(&mut results, &changed);
894
895 assert_eq!(results.empty_catalog_groups.len(), 1);
896 assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
897 }
898
899 #[test]
900 fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
901 let mut report = DuplicationReport {
902 clone_groups: vec![CloneGroup {
903 instances: vec![
904 CloneInstance {
905 file: "/a.ts".into(),
906 start_line: 1,
907 end_line: 5,
908 start_col: 0,
909 end_col: 10,
910 fragment: "code".into(),
911 },
912 CloneInstance {
913 file: "/b.ts".into(),
914 start_line: 1,
915 end_line: 5,
916 start_col: 0,
917 end_col: 10,
918 fragment: "code".into(),
919 },
920 ],
921 token_count: 20,
922 line_count: 5,
923 }],
924 clone_families: vec![],
925 mirrored_directories: vec![],
926 stats: DuplicationStats {
927 total_files: 2,
928 files_with_clones: 2,
929 total_lines: 100,
930 duplicated_lines: 10,
931 total_tokens: 200,
932 duplicated_tokens: 40,
933 clone_groups: 1,
934 clone_instances: 2,
935 duplication_percentage: 10.0,
936 clone_groups_below_min_occurrences: 0,
937 },
938 };
939
940 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
941 changed.insert("/a.ts".into());
942
943 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
944 assert_eq!(report.clone_groups.len(), 1);
945 assert_eq!(report.stats.clone_groups, 1);
946 assert_eq!(report.stats.clone_instances, 2);
947 }
948
949 #[cfg(windows)]
957 #[test]
958 fn filter_duplication_normalises_verbatim_prefix_mismatch() {
959 let mut report = DuplicationReport {
960 clone_groups: vec![CloneGroup {
961 instances: vec![
962 CloneInstance {
963 file: PathBuf::from(r"\\?\C:\repo\src\changed.ts"),
964 start_line: 1,
965 end_line: 5,
966 start_col: 0,
967 end_col: 10,
968 fragment: "code".into(),
969 },
970 CloneInstance {
971 file: PathBuf::from(r"\\?\C:\repo\src\focused-copy.ts"),
972 start_line: 1,
973 end_line: 5,
974 start_col: 0,
975 end_col: 10,
976 fragment: "code".into(),
977 },
978 ],
979 token_count: 20,
980 line_count: 5,
981 }],
982 clone_families: vec![],
983 mirrored_directories: vec![],
984 stats: DuplicationStats {
985 total_files: 2,
986 files_with_clones: 2,
987 total_lines: 100,
988 duplicated_lines: 10,
989 total_tokens: 200,
990 duplicated_tokens: 40,
991 clone_groups: 1,
992 clone_instances: 2,
993 duplication_percentage: 10.0,
994 clone_groups_below_min_occurrences: 0,
995 },
996 };
997
998 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
999 changed.insert(PathBuf::from(r"C:\repo\src\changed.ts"));
1000
1001 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1002 assert_eq!(
1003 report.clone_groups.len(),
1004 1,
1005 "verbatim instance path must match non-verbatim changed-file entry"
1006 );
1007 }
1008
1009 #[cfg(windows)]
1010 #[test]
1011 fn filter_results_normalises_verbatim_prefix_mismatch() {
1012 let mut results = AnalysisResults::default();
1013 results
1014 .unused_exports
1015 .push(UnusedExportFinding::with_actions(UnusedExport {
1016 path: PathBuf::from(r"\\?\C:\repo\src\a.ts"),
1017 export_name: "foo".into(),
1018 is_type_only: false,
1019 line: 1,
1020 col: 0,
1021 span_start: 0,
1022 is_re_export: false,
1023 }));
1024
1025 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1026 changed.insert(PathBuf::from(r"C:\repo\src\a.ts"));
1027
1028 filter_results_by_changed_files(&mut results, &changed);
1029 assert_eq!(
1030 results.unused_exports.len(),
1031 1,
1032 "verbatim finding path must match non-verbatim changed-file entry"
1033 );
1034 }
1035
1036 fn init_repo(repo: &Path) -> PathBuf {
1048 run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
1049 run_git(repo, &["config", "user.email", "test@example.com"]);
1050 run_git(repo, &["config", "user.name", "test"]);
1051 run_git(repo, &["config", "commit.gpgsign", "false"]);
1052 std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
1053 run_git(repo, &["add", "seed.txt"]);
1054 run_git(repo, &["commit", "--quiet", "-m", "initial"]);
1055 run_git(repo, &["tag", "fallow-baseline"]);
1056 dunce::canonicalize(repo).unwrap()
1057 }
1058
1059 fn run_git(cwd: &Path, args: &[&str]) {
1060 let output = std::process::Command::new("git")
1061 .args(args)
1062 .current_dir(cwd)
1063 .output()
1064 .expect("git available");
1065 assert!(
1066 output.status.success(),
1067 "git {args:?} failed: {}",
1068 String::from_utf8_lossy(&output.stderr)
1069 );
1070 }
1071
1072 #[test]
1075 fn try_get_changed_files_workspace_at_repo_root() {
1076 let tmp = tempfile::tempdir().unwrap();
1077 let repo = init_repo(tmp.path());
1078 std::fs::create_dir_all(repo.join("src")).unwrap();
1079 std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
1080
1081 let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
1082
1083 let expected = repo.join("src/new.ts");
1084 assert!(
1085 changed.contains(&expected),
1086 "changed set should contain {expected:?}; actual: {changed:?}"
1087 );
1088 }
1089
1090 #[test]
1098 fn try_get_changed_files_workspace_in_subdirectory() {
1099 let tmp = tempfile::tempdir().unwrap();
1100 let repo = init_repo(tmp.path());
1101 let frontend = repo.join("frontend");
1102 std::fs::create_dir_all(frontend.join("src")).unwrap();
1103 std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
1104
1105 let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1106
1107 let expected = repo.join("frontend/src/new.ts");
1108 assert!(
1109 changed.contains(&expected),
1110 "changed set should contain canonical {expected:?}; actual: {changed:?}"
1111 );
1112 let bogus = frontend.join("frontend/src/new.ts");
1113 assert!(
1114 !changed.contains(&bogus),
1115 "changed set must not contain double-frontend path {bogus:?}"
1116 );
1117 }
1118
1119 #[test]
1134 fn try_get_changed_files_includes_committed_sibling_changes() {
1135 let tmp = tempfile::tempdir().unwrap();
1136 let repo = init_repo(tmp.path());
1137 let backend = repo.join("backend");
1138 std::fs::create_dir_all(&backend).unwrap();
1139 std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
1140 run_git(&repo, &["add", "."]);
1141 run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
1142
1143 let frontend = repo.join("frontend");
1144 std::fs::create_dir_all(&frontend).unwrap();
1145
1146 let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1147
1148 let expected = repo.join("backend/server.py");
1149 assert!(
1150 changed.contains(&expected),
1151 "committed sibling backend/server.py should be in the set: {changed:?}"
1152 );
1153 }
1154
1155 #[test]
1159 fn try_get_changed_files_includes_modified_tracked_file() {
1160 let tmp = tempfile::tempdir().unwrap();
1161 let repo = init_repo(tmp.path());
1162 let frontend = repo.join("frontend");
1163 std::fs::create_dir_all(frontend.join("src")).unwrap();
1164 std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
1165 run_git(&repo, &["add", "."]);
1166 run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
1167 run_git(&repo, &["tag", "fallow-baseline-v2"]);
1168 std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
1169
1170 let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
1171
1172 let expected = repo.join("frontend/src/old.ts");
1173 assert!(
1174 changed.contains(&expected),
1175 "modified tracked file {expected:?} missing from set: {changed:?}"
1176 );
1177 }
1178
1179 #[test]
1185 fn resolve_git_toplevel_returns_canonical_path() {
1186 let tmp = tempfile::tempdir().unwrap();
1187 let repo = init_repo(tmp.path());
1188 let frontend = repo.join("frontend");
1189 std::fs::create_dir_all(&frontend).unwrap();
1190
1191 let toplevel = resolve_git_toplevel(&frontend).unwrap();
1192 assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
1193 assert_eq!(
1194 toplevel,
1195 dunce::canonicalize(&toplevel).unwrap(),
1196 "resolved toplevel should already be canonical"
1197 );
1198 }
1199
1200 #[test]
1204 fn resolve_git_toplevel_not_a_repository() {
1205 let tmp = tempfile::tempdir().unwrap();
1206 let result = resolve_git_toplevel(tmp.path());
1207 assert!(
1208 matches!(result, Err(ChangedFilesError::NotARepository)),
1209 "expected NotARepository, got {result:?}"
1210 );
1211 }
1212
1213 #[test]
1216 fn try_get_changed_files_not_a_repository() {
1217 let tmp = tempfile::tempdir().unwrap();
1218 let result = try_get_changed_files(tmp.path(), "main");
1219 assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
1220 }
1221
1222 #[test]
1223 fn filter_duplication_drops_groups_with_no_changed_instance() {
1224 let mut report = DuplicationReport {
1225 clone_groups: vec![CloneGroup {
1226 instances: vec![CloneInstance {
1227 file: "/a.ts".into(),
1228 start_line: 1,
1229 end_line: 5,
1230 start_col: 0,
1231 end_col: 10,
1232 fragment: "code".into(),
1233 }],
1234 token_count: 20,
1235 line_count: 5,
1236 }],
1237 clone_families: vec![],
1238 mirrored_directories: vec![],
1239 stats: DuplicationStats {
1240 total_files: 1,
1241 files_with_clones: 1,
1242 total_lines: 100,
1243 duplicated_lines: 5,
1244 total_tokens: 100,
1245 duplicated_tokens: 20,
1246 clone_groups: 1,
1247 clone_instances: 1,
1248 duplication_percentage: 5.0,
1249 clone_groups_below_min_occurrences: 0,
1250 },
1251 };
1252
1253 let changed: FxHashSet<PathBuf> = FxHashSet::default();
1254 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1255 assert!(report.clone_groups.is_empty());
1256 assert_eq!(report.stats.clone_groups, 0);
1257 assert_eq!(report.stats.clone_instances, 0);
1258 assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
1259 }
1260}