1use std::path::{Path, PathBuf};
17use std::process::Output;
18use std::sync::OnceLock;
19
20use rustc_hash::{FxHashMap, FxHashSet};
21
22use crate::duplicates::{DuplicationReport, DuplicationStats, families};
23use crate::results::AnalysisResults;
24
25pub type ChangedFilesSpawnHook = fn(&mut std::process::Command) -> std::io::Result<Output>;
32
33static SPAWN_HOOK: OnceLock<ChangedFilesSpawnHook> = OnceLock::new();
34
35pub fn set_spawn_hook(hook: ChangedFilesSpawnHook) {
42 let _ = SPAWN_HOOK.set(hook);
43}
44
45fn spawn_output(command: &mut std::process::Command) -> std::io::Result<Output> {
46 if let Some(hook) = SPAWN_HOOK.get() {
47 hook(command)
48 } else {
49 command.output()
50 }
51}
52
53pub fn validate_git_ref(s: &str) -> Result<&str, String> {
66 if s.is_empty() {
67 return Err("git ref cannot be empty".to_string());
68 }
69 if s.starts_with('-') {
70 return Err("git ref cannot start with '-'".to_string());
71 }
72 let mut in_braces = false;
73 for c in s.chars() {
74 match c {
75 '{' => in_braces = true,
76 '}' => in_braces = false,
77 ':' | ' ' if in_braces => {}
78 c if c.is_ascii_alphanumeric()
79 || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
80 _ => return Err(format!("git ref contains disallowed character: '{c}'")),
81 }
82 }
83 if in_braces {
84 return Err("git ref has unclosed '{'".to_string());
85 }
86 Ok(s)
87}
88
89#[derive(Debug)]
92pub enum ChangedFilesError {
93 InvalidRef(String),
95 GitMissing(String),
97 NotARepository,
99 GitFailed(String),
101}
102
103impl ChangedFilesError {
104 pub fn describe(&self) -> String {
108 match self {
109 Self::InvalidRef(e) => format!("invalid git ref: {e}"),
110 Self::GitMissing(e) => format!("failed to run git: {e}"),
111 Self::NotARepository => "not a git repository".to_owned(),
112 Self::GitFailed(stderr) => augment_git_failed(stderr),
113 }
114 }
115}
116
117fn augment_git_failed(stderr: &str) -> String {
123 let lower = stderr.to_ascii_lowercase();
124 if lower.contains("not a valid object name")
125 || lower.contains("unknown revision")
126 || lower.contains("ambiguous argument")
127 {
128 format!(
129 "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
130 )
131 } else {
132 stderr.to_owned()
133 }
134}
135
136pub fn resolve_git_toplevel(cwd: &Path) -> Result<PathBuf, ChangedFilesError> {
147 let output = spawn_output(&mut git_command(cwd, &["rev-parse", "--show-toplevel"]))
148 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
149
150 if !output.status.success() {
151 let stderr = String::from_utf8_lossy(&output.stderr);
152 return Err(if stderr.contains("not a git repository") {
153 ChangedFilesError::NotARepository
154 } else {
155 ChangedFilesError::GitFailed(stderr.trim().to_owned())
156 });
157 }
158
159 let raw = String::from_utf8_lossy(&output.stdout);
160 let trimmed = raw.trim();
161 if trimmed.is_empty() {
162 return Err(ChangedFilesError::GitFailed(
163 "git rev-parse --show-toplevel returned empty output".to_owned(),
164 ));
165 }
166
167 let path = PathBuf::from(trimmed);
168 Ok(dunce::canonicalize(&path).unwrap_or(path))
169}
170
171fn collect_git_paths(
172 cwd: &Path,
173 toplevel: &Path,
174 args: &[&str],
175) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
176 let output = spawn_output(&mut git_command(cwd, args))
177 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
178
179 if !output.status.success() {
180 let stderr = String::from_utf8_lossy(&output.stderr);
181 return Err(if stderr.contains("not a git repository") {
182 ChangedFilesError::NotARepository
183 } else {
184 ChangedFilesError::GitFailed(stderr.trim().to_owned())
185 });
186 }
187
188 #[cfg(windows)]
189 let normalise_segment = |line: &str| line.replace('/', "\\");
190 #[cfg(not(windows))]
191 let normalise_segment = |line: &str| line.to_owned();
192
193 let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
194 .lines()
195 .filter(|line| !line.is_empty())
196 .map(|line| toplevel.join(normalise_segment(line)))
197 .collect();
198
199 Ok(files)
200}
201
202fn git_command(cwd: &Path, args: &[&str]) -> std::process::Command {
203 let mut command = crate::spawn::git();
204 command.args(args).current_dir(cwd);
205 command
206}
207
208pub fn try_get_changed_files(
226 root: &Path,
227 git_ref: &str,
228) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
229 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
230 let toplevel = resolve_git_toplevel(root)?;
231 try_get_changed_files_with_toplevel(root, &toplevel, git_ref)
232}
233
234pub fn try_get_changed_files_with_toplevel(
242 cwd: &Path,
243 toplevel: &Path,
244 git_ref: &str,
245) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
246 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
247
248 let mut files = collect_git_paths(
249 cwd,
250 toplevel,
251 &[
252 "diff",
253 "--name-only",
254 "--end-of-options",
255 &format!("{git_ref}...HEAD"),
256 ],
257 )?;
258 files.extend(collect_git_paths(
259 cwd,
260 toplevel,
261 &["diff", "--name-only", "HEAD"],
262 )?);
263 files.extend(collect_git_paths(
264 cwd,
265 toplevel,
266 &["ls-files", "--full-name", "--others", "--exclude-standard"],
267 )?);
268 Ok(files)
269}
270
271pub fn try_get_changed_diff(root: &Path, git_ref: &str) -> Result<String, ChangedFilesError> {
287 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
288 let output = spawn_output(&mut git_command(
289 root,
290 &[
291 "diff",
292 "--relative",
293 "--unified=0",
294 "--end-of-options",
295 &format!("{git_ref}...HEAD"),
296 ],
297 ))
298 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
299
300 if !output.status.success() {
301 let stderr = String::from_utf8_lossy(&output.stderr);
302 return Err(if stderr.contains("not a git repository") {
303 ChangedFilesError::NotARepository
304 } else {
305 ChangedFilesError::GitFailed(stderr.trim().to_owned())
306 });
307 }
308
309 Ok(String::from_utf8_lossy(&output.stdout).into_owned())
310}
311
312#[expect(
316 clippy::print_stderr,
317 reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
318)]
319pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
320 match try_get_changed_files(root, git_ref) {
321 Ok(files) => Some(files),
322 Err(ChangedFilesError::InvalidRef(e)) => {
323 eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
324 None
325 }
326 Err(ChangedFilesError::GitMissing(e)) => {
327 eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
328 None
329 }
330 Err(ChangedFilesError::NotARepository) => {
331 eprintln!("Warning: --changed-since ignored: not a git repository");
332 None
333 }
334 Err(ChangedFilesError::GitFailed(stderr)) => {
335 eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
336 None
337 }
338 }
339}
340
341#[expect(
349 clippy::implicit_hasher,
350 reason = "fallow standardizes on FxHashSet across the workspace"
351)]
352pub fn filter_results_by_changed_files(
353 results: &mut AnalysisResults,
354 changed_files: &FxHashSet<PathBuf>,
355) {
356 let cf = normalize_changed_files_set(changed_files);
357 results
358 .unused_files
359 .retain(|f| contains_normalized(&cf, &f.file.path));
360 results
361 .unused_exports
362 .retain(|e| contains_normalized(&cf, &e.export.path));
363 results
364 .unused_types
365 .retain(|e| contains_normalized(&cf, &e.export.path));
366 results
367 .private_type_leaks
368 .retain(|e| contains_normalized(&cf, &e.leak.path));
369 results
370 .unused_enum_members
371 .retain(|m| contains_normalized(&cf, &m.member.path));
372 results
373 .unused_class_members
374 .retain(|m| contains_normalized(&cf, &m.member.path));
375 results
376 .unresolved_imports
377 .retain(|i| contains_normalized(&cf, &i.import.path));
378
379 results.unlisted_dependencies.retain(|d| {
380 d.dep
381 .imported_from
382 .iter()
383 .any(|s| contains_normalized(&cf, &s.path))
384 });
385
386 for dup in &mut results.duplicate_exports {
387 dup.export
388 .locations
389 .retain(|loc| contains_normalized(&cf, &loc.path));
390 }
391 results
392 .duplicate_exports
393 .retain(|d| d.export.locations.len() >= 2);
394
395 results
396 .circular_dependencies
397 .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
398
399 results
400 .re_export_cycles
401 .retain(|c| c.cycle.files.iter().any(|f| contains_normalized(&cf, f)));
402
403 results
404 .boundary_violations
405 .retain(|v| contains_normalized(&cf, &v.violation.from_path));
406
407 results
408 .stale_suppressions
409 .retain(|s| contains_normalized(&cf, &s.path));
410
411 results.security_findings.retain(|f| {
412 contains_normalized(&cf, &f.path)
413 || f.trace
414 .iter()
415 .any(|hop| contains_normalized(&cf, &hop.path))
416 || f.reachability.as_ref().is_some_and(|reachability| {
417 reachability
418 .untrusted_source_trace
419 .iter()
420 .any(|hop| contains_normalized(&cf, &hop.path))
421 })
422 });
423
424 results
425 .unresolved_catalog_references
426 .retain(|r| contains_normalized(&cf, &r.reference.path));
427 results
428 .empty_catalog_groups
429 .retain(|g| normalized_set_contains_path(&cf, &g.group.path));
430
431 results
432 .unused_dependency_overrides
433 .retain(|o| contains_normalized(&cf, &o.entry.path));
434 results
435 .misconfigured_dependency_overrides
436 .retain(|o| contains_normalized(&cf, &o.entry.path));
437}
438
439fn normalize_changed_files_set(changed_files: &FxHashSet<PathBuf>) -> FxHashSet<PathBuf> {
452 changed_files
453 .iter()
454 .map(|p| dunce::simplified(p).to_path_buf())
455 .collect()
456}
457
458fn contains_normalized(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
459 normalized.contains(dunce::simplified(path))
460}
461
462fn normalized_set_contains_path(normalized: &FxHashSet<PathBuf>, path: &Path) -> bool {
463 contains_normalized(normalized, path)
464 || (path.is_relative() && normalized.iter().any(|changed| changed.ends_with(path)))
465}
466
467fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
473 let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
474 let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
475 let mut duplicated_tokens = 0_usize;
476 let mut clone_instances = 0_usize;
477
478 for group in &report.clone_groups {
479 for instance in &group.instances {
480 files_with_clones.insert(&instance.file);
481 clone_instances += 1;
482 let lines = file_dup_lines.entry(&instance.file).or_default();
483 for line in instance.start_line..=instance.end_line {
484 lines.insert(line);
485 }
486 }
487 duplicated_tokens += group.token_count * group.instances.len();
488 }
489
490 let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
491
492 DuplicationStats {
493 total_files: report.stats.total_files,
494 files_with_clones: files_with_clones.len(),
495 total_lines: report.stats.total_lines,
496 duplicated_lines,
497 total_tokens: report.stats.total_tokens,
498 duplicated_tokens,
499 clone_groups: report.clone_groups.len(),
500 clone_instances,
501 #[expect(
502 clippy::cast_precision_loss,
503 reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
504 )]
505 duplication_percentage: if report.stats.total_lines > 0 {
506 (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
507 } else {
508 0.0
509 },
510 clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
511 }
512}
513
514#[expect(
519 clippy::implicit_hasher,
520 reason = "fallow standardizes on FxHashSet across the workspace"
521)]
522pub fn filter_duplication_by_changed_files(
523 report: &mut DuplicationReport,
524 changed_files: &FxHashSet<PathBuf>,
525 root: &Path,
526) {
527 let cf = normalize_changed_files_set(changed_files);
528 report.clone_groups.retain(|g| {
529 g.instances
530 .iter()
531 .any(|i| contains_normalized(&cf, &i.file))
532 });
533 report.clone_families = families::group_into_families(&report.clone_groups, root);
534 report.mirrored_directories =
535 families::detect_mirrored_directories(&report.clone_families, root);
536 report.stats = recompute_duplication_stats(report);
537}
538
539#[cfg(test)]
540mod tests {
541 use super::*;
542 use crate::duplicates::{CloneGroup, CloneInstance};
543 use crate::results::{
544 BoundaryViolation, CircularDependency, EmptyCatalogGroup, SecurityFinding,
545 SecurityFindingKind, TraceHop, TraceHopRole, UnusedExport, UnusedFile,
546 };
547 use fallow_types::output_dead_code::{
548 BoundaryViolationFinding, CircularDependencyFinding, EmptyCatalogGroupFinding,
549 UnusedExportFinding, UnusedFileFinding,
550 };
551 use fallow_types::results::{SecurityReachability, SecuritySeverity};
552
553 #[test]
554 fn changed_files_error_describe_variants() {
555 assert!(
556 ChangedFilesError::InvalidRef("bad".to_owned())
557 .describe()
558 .contains("invalid git ref")
559 );
560 assert!(
561 ChangedFilesError::GitMissing("oops".to_owned())
562 .describe()
563 .contains("oops")
564 );
565 assert_eq!(
566 ChangedFilesError::NotARepository.describe(),
567 "not a git repository"
568 );
569 assert!(
570 ChangedFilesError::GitFailed("bad ref".to_owned())
571 .describe()
572 .contains("bad ref")
573 );
574 }
575
576 #[test]
577 fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
578 let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
579 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
580 assert!(described.contains(stderr), "original stderr preserved");
581 assert!(
582 described.contains("shallow clone"),
583 "hint surfaced: {described}"
584 );
585 assert!(
586 described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
587 "hint actionable: {described}"
588 );
589 }
590
591 #[test]
592 fn augment_git_failed_passthrough_for_other_errors() {
593 let stderr = "fatal: refusing to merge unrelated histories";
594 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
595 assert_eq!(described, stderr);
596 }
597
598 #[test]
599 fn validate_git_ref_rejects_leading_dash() {
600 assert!(validate_git_ref("--upload-pack=evil").is_err());
601 assert!(validate_git_ref("-flag").is_err());
602 }
603
604 #[test]
605 fn validate_git_ref_accepts_baseline_tag() {
606 assert_eq!(
607 validate_git_ref("fallow-baseline").unwrap(),
608 "fallow-baseline"
609 );
610 }
611
612 #[test]
613 fn try_get_changed_files_rejects_invalid_ref() {
614 let err = try_get_changed_files(Path::new("/"), "--evil")
615 .expect_err("leading-dash ref must be rejected");
616 assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
617 assert!(err.describe().contains("cannot start with"));
618 }
619
620 #[test]
621 fn validate_git_ref_rejects_option_like_ref() {
622 assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
623 }
624
625 #[test]
626 fn validate_git_ref_allows_reflog_relative_date() {
627 assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
628 }
629
630 #[test]
631 fn try_get_changed_files_rejects_option_like_ref_before_git() {
632 let root = tempfile::tempdir().expect("create temp dir");
633 let proof_path = root.path().join("proof");
634
635 let result = try_get_changed_files(
636 root.path(),
637 &format!("--output={}", proof_path.to_string_lossy()),
638 );
639
640 assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
641 assert!(
642 !proof_path.exists(),
643 "invalid changedSince ref must not be passed through to git as an option"
644 );
645 }
646
647 #[test]
648 fn git_command_clears_parent_git_environment() {
649 let command = git_command(Path::new("."), &["status", "--short"]);
650 let overrides: Vec<_> = command.get_envs().collect();
651
652 for var in crate::git_env::AMBIENT_GIT_ENV_VARS {
653 assert!(
654 overrides
655 .iter()
656 .any(|(key, value)| key.to_str() == Some(*var) && value.is_none()),
657 "git helper must clear inherited {var}",
658 );
659 }
660 }
661
662 #[test]
663 fn filter_results_keeps_only_changed_files() {
664 let mut results = AnalysisResults::default();
665 results
666 .unused_files
667 .push(UnusedFileFinding::with_actions(UnusedFile {
668 path: "/a.ts".into(),
669 }));
670 results
671 .unused_files
672 .push(UnusedFileFinding::with_actions(UnusedFile {
673 path: "/b.ts".into(),
674 }));
675 results
676 .unused_exports
677 .push(UnusedExportFinding::with_actions(UnusedExport {
678 path: "/a.ts".into(),
679 export_name: "foo".into(),
680 is_type_only: false,
681 line: 1,
682 col: 0,
683 span_start: 0,
684 is_re_export: false,
685 }));
686
687 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
688 changed.insert("/a.ts".into());
689
690 filter_results_by_changed_files(&mut results, &changed);
691
692 assert_eq!(results.unused_files.len(), 1);
693 assert_eq!(results.unused_files[0].file.path, PathBuf::from("/a.ts"));
694 assert_eq!(results.unused_exports.len(), 1);
695 }
696
697 #[test]
698 fn filter_results_preserves_dependency_level_issues() {
699 let mut results = AnalysisResults::default();
700 results.unused_dependencies.push(
701 fallow_types::output_dead_code::UnusedDependencyFinding::with_actions(
702 crate::results::UnusedDependency {
703 package_name: "lodash".into(),
704 location: crate::results::DependencyLocation::Dependencies,
705 path: "/pkg.json".into(),
706 line: 3,
707 used_in_workspaces: Vec::new(),
708 },
709 ),
710 );
711
712 let changed: FxHashSet<PathBuf> = FxHashSet::default();
713 filter_results_by_changed_files(&mut results, &changed);
714
715 assert_eq!(results.unused_dependencies.len(), 1);
716 }
717
718 #[test]
719 fn filter_results_keeps_circular_dep_when_any_file_changed() {
720 let mut results = AnalysisResults::default();
721 results
722 .circular_dependencies
723 .push(CircularDependencyFinding::with_actions(
724 CircularDependency {
725 files: vec!["/a.ts".into(), "/b.ts".into()],
726 length: 2,
727 line: 1,
728 col: 0,
729 edges: Vec::new(),
730 is_cross_package: false,
731 },
732 ));
733
734 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
735 changed.insert("/b.ts".into());
736
737 filter_results_by_changed_files(&mut results, &changed);
738 assert_eq!(results.circular_dependencies.len(), 1);
739 }
740
741 #[test]
742 fn filter_results_drops_circular_dep_when_no_file_changed() {
743 let mut results = AnalysisResults::default();
744 results
745 .circular_dependencies
746 .push(CircularDependencyFinding::with_actions(
747 CircularDependency {
748 files: vec!["/a.ts".into(), "/b.ts".into()],
749 length: 2,
750 line: 1,
751 col: 0,
752 edges: Vec::new(),
753 is_cross_package: false,
754 },
755 ));
756
757 let changed: FxHashSet<PathBuf> = FxHashSet::default();
758 filter_results_by_changed_files(&mut results, &changed);
759 assert!(results.circular_dependencies.is_empty());
760 }
761
762 #[test]
763 fn filter_results_drops_boundary_violation_when_importer_unchanged() {
764 let mut results = AnalysisResults::default();
765 results
766 .boundary_violations
767 .push(BoundaryViolationFinding::with_actions(BoundaryViolation {
768 from_path: "/a.ts".into(),
769 to_path: "/b.ts".into(),
770 from_zone: "ui".into(),
771 to_zone: "data".into(),
772 import_specifier: "../data/db".into(),
773 line: 1,
774 col: 0,
775 }));
776
777 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
778 changed.insert("/b.ts".into());
779
780 filter_results_by_changed_files(&mut results, &changed);
781 assert!(results.boundary_violations.is_empty());
782 }
783
784 #[test]
785 fn filter_results_keeps_security_finding_when_trace_file_changed() {
786 let mut results = AnalysisResults::default();
787 results.security_findings.push(SecurityFinding {
788 finding_id: String::new(),
789 candidate: fallow_types::results::SecurityCandidate::default(),
790 taint_flow: None,
791 attack_surface: None,
792 kind: SecurityFindingKind::ClientServerLeak,
793 category: None,
794 cwe: None,
795 path: "/project/src/client.tsx".into(),
796 line: 2,
797 col: 0,
798 evidence: "candidate".into(),
799 source_backed: false,
800 source_read: None,
801 severity: SecuritySeverity::Low,
802 trace: vec![
803 TraceHop {
804 path: "/project/src/client.tsx".into(),
805 line: 2,
806 col: 0,
807 role: TraceHopRole::ClientBoundary,
808 },
809 TraceHop {
810 path: "/project/src/server.ts".into(),
811 line: 1,
812 col: 0,
813 role: TraceHopRole::SecretSource,
814 },
815 ],
816 actions: Vec::new(),
817 dead_code: None,
818 reachability: None,
819 runtime: None,
820 });
821
822 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
823 changed.insert("/project/src/server.ts".into());
824
825 filter_results_by_changed_files(&mut results, &changed);
826
827 assert_eq!(results.security_findings.len(), 1);
828 }
829
830 #[test]
831 fn filter_results_keeps_security_finding_when_untrusted_source_trace_file_changed() {
832 let mut results = AnalysisResults::default();
833 results.security_findings.push(SecurityFinding {
834 finding_id: String::new(),
835 candidate: fallow_types::results::SecurityCandidate::default(),
836 taint_flow: None,
837 attack_surface: None,
838 kind: SecurityFindingKind::TaintedSink,
839 category: Some("command-injection".into()),
840 cwe: Some(78),
841 path: "/project/src/runner.ts".into(),
842 line: 4,
843 col: 2,
844 evidence: "candidate".into(),
845 source_backed: false,
846 source_read: None,
847 severity: SecuritySeverity::Low,
848 trace: Vec::new(),
849 actions: Vec::new(),
850 dead_code: None,
851 reachability: Some(SecurityReachability {
852 reachable_from_entry: false,
853 reachable_from_untrusted_source: true,
854 taint_confidence: Some(fallow_types::results::TaintConfidence::ModuleLevel),
855 untrusted_source_hop_count: Some(1),
856 untrusted_source_trace: vec![
857 TraceHop {
858 path: "/project/src/route.ts".into(),
859 line: 1,
860 col: 0,
861 role: TraceHopRole::UntrustedSource,
862 },
863 TraceHop {
864 path: "/project/src/runner.ts".into(),
865 line: 4,
866 col: 2,
867 role: TraceHopRole::Sink,
868 },
869 ],
870 blast_radius: 0,
871 crosses_boundary: false,
872 }),
873 runtime: None,
874 });
875
876 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
877 changed.insert("/project/src/route.ts".into());
878
879 filter_results_by_changed_files(&mut results, &changed);
880
881 assert_eq!(results.security_findings.len(), 1);
882 }
883
884 #[test]
885 fn filter_results_keeps_relative_empty_catalog_group_when_manifest_changed() {
886 let mut results = AnalysisResults::default();
887 results
888 .empty_catalog_groups
889 .push(EmptyCatalogGroupFinding::with_actions(EmptyCatalogGroup {
890 catalog_name: "legacy".into(),
891 path: PathBuf::from("pnpm-workspace.yaml"),
892 line: 4,
893 }));
894
895 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
896 changed.insert(PathBuf::from("/repo/pnpm-workspace.yaml"));
897
898 filter_results_by_changed_files(&mut results, &changed);
899
900 assert_eq!(results.empty_catalog_groups.len(), 1);
901 assert_eq!(results.empty_catalog_groups[0].group.catalog_name, "legacy");
902 }
903
904 #[test]
905 fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
906 let mut report = DuplicationReport {
907 clone_groups: vec![CloneGroup {
908 instances: vec![
909 CloneInstance {
910 file: "/a.ts".into(),
911 start_line: 1,
912 end_line: 5,
913 start_col: 0,
914 end_col: 10,
915 fragment: "code".into(),
916 },
917 CloneInstance {
918 file: "/b.ts".into(),
919 start_line: 1,
920 end_line: 5,
921 start_col: 0,
922 end_col: 10,
923 fragment: "code".into(),
924 },
925 ],
926 token_count: 20,
927 line_count: 5,
928 }],
929 clone_families: vec![],
930 mirrored_directories: vec![],
931 stats: DuplicationStats {
932 total_files: 2,
933 files_with_clones: 2,
934 total_lines: 100,
935 duplicated_lines: 10,
936 total_tokens: 200,
937 duplicated_tokens: 40,
938 clone_groups: 1,
939 clone_instances: 2,
940 duplication_percentage: 10.0,
941 clone_groups_below_min_occurrences: 0,
942 },
943 };
944
945 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
946 changed.insert("/a.ts".into());
947
948 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
949 assert_eq!(report.clone_groups.len(), 1);
950 assert_eq!(report.stats.clone_groups, 1);
951 assert_eq!(report.stats.clone_instances, 2);
952 }
953
954 #[cfg(windows)]
962 #[test]
963 fn filter_duplication_normalises_verbatim_prefix_mismatch() {
964 let mut report = DuplicationReport {
965 clone_groups: vec![CloneGroup {
966 instances: vec![
967 CloneInstance {
968 file: PathBuf::from(r"\\?\C:\repo\src\changed.ts"),
969 start_line: 1,
970 end_line: 5,
971 start_col: 0,
972 end_col: 10,
973 fragment: "code".into(),
974 },
975 CloneInstance {
976 file: PathBuf::from(r"\\?\C:\repo\src\focused-copy.ts"),
977 start_line: 1,
978 end_line: 5,
979 start_col: 0,
980 end_col: 10,
981 fragment: "code".into(),
982 },
983 ],
984 token_count: 20,
985 line_count: 5,
986 }],
987 clone_families: vec![],
988 mirrored_directories: vec![],
989 stats: DuplicationStats {
990 total_files: 2,
991 files_with_clones: 2,
992 total_lines: 100,
993 duplicated_lines: 10,
994 total_tokens: 200,
995 duplicated_tokens: 40,
996 clone_groups: 1,
997 clone_instances: 2,
998 duplication_percentage: 10.0,
999 clone_groups_below_min_occurrences: 0,
1000 },
1001 };
1002
1003 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1004 changed.insert(PathBuf::from(r"C:\repo\src\changed.ts"));
1005
1006 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1007 assert_eq!(
1008 report.clone_groups.len(),
1009 1,
1010 "verbatim instance path must match non-verbatim changed-file entry"
1011 );
1012 }
1013
1014 #[cfg(windows)]
1015 #[test]
1016 fn filter_results_normalises_verbatim_prefix_mismatch() {
1017 let mut results = AnalysisResults::default();
1018 results
1019 .unused_exports
1020 .push(UnusedExportFinding::with_actions(UnusedExport {
1021 path: PathBuf::from(r"\\?\C:\repo\src\a.ts"),
1022 export_name: "foo".into(),
1023 is_type_only: false,
1024 line: 1,
1025 col: 0,
1026 span_start: 0,
1027 is_re_export: false,
1028 }));
1029
1030 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
1031 changed.insert(PathBuf::from(r"C:\repo\src\a.ts"));
1032
1033 filter_results_by_changed_files(&mut results, &changed);
1034 assert_eq!(
1035 results.unused_exports.len(),
1036 1,
1037 "verbatim finding path must match non-verbatim changed-file entry"
1038 );
1039 }
1040
1041 fn init_repo(repo: &Path) -> PathBuf {
1053 run_git(repo, &["init", "--quiet", "--initial-branch=main"]);
1054 run_git(repo, &["config", "user.email", "test@example.com"]);
1055 run_git(repo, &["config", "user.name", "test"]);
1056 run_git(repo, &["config", "commit.gpgsign", "false"]);
1057 std::fs::write(repo.join("seed.txt"), "seed\n").unwrap();
1058 run_git(repo, &["add", "seed.txt"]);
1059 run_git(repo, &["commit", "--quiet", "-m", "initial"]);
1060 run_git(repo, &["tag", "fallow-baseline"]);
1061 dunce::canonicalize(repo).unwrap()
1062 }
1063
1064 fn run_git(cwd: &Path, args: &[&str]) {
1065 let output = std::process::Command::new("git")
1066 .args(args)
1067 .current_dir(cwd)
1068 .output()
1069 .expect("git available");
1070 assert!(
1071 output.status.success(),
1072 "git {args:?} failed: {}",
1073 String::from_utf8_lossy(&output.stderr)
1074 );
1075 }
1076
1077 #[test]
1080 fn try_get_changed_files_workspace_at_repo_root() {
1081 let tmp = tempfile::tempdir().unwrap();
1082 let repo = init_repo(tmp.path());
1083 std::fs::create_dir_all(repo.join("src")).unwrap();
1084 std::fs::write(repo.join("src/new.ts"), "export const x = 1;\n").unwrap();
1085
1086 let changed = try_get_changed_files(&repo, "fallow-baseline").unwrap();
1087
1088 let expected = repo.join("src/new.ts");
1089 assert!(
1090 changed.contains(&expected),
1091 "changed set should contain {expected:?}; actual: {changed:?}"
1092 );
1093 }
1094
1095 #[test]
1103 fn try_get_changed_files_workspace_in_subdirectory() {
1104 let tmp = tempfile::tempdir().unwrap();
1105 let repo = init_repo(tmp.path());
1106 let frontend = repo.join("frontend");
1107 std::fs::create_dir_all(frontend.join("src")).unwrap();
1108 std::fs::write(frontend.join("src/new.ts"), "export const x = 1;\n").unwrap();
1109
1110 let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1111
1112 let expected = repo.join("frontend/src/new.ts");
1113 assert!(
1114 changed.contains(&expected),
1115 "changed set should contain canonical {expected:?}; actual: {changed:?}"
1116 );
1117 let bogus = frontend.join("frontend/src/new.ts");
1118 assert!(
1119 !changed.contains(&bogus),
1120 "changed set must not contain double-frontend path {bogus:?}"
1121 );
1122 }
1123
1124 #[test]
1139 fn try_get_changed_files_includes_committed_sibling_changes() {
1140 let tmp = tempfile::tempdir().unwrap();
1141 let repo = init_repo(tmp.path());
1142 let backend = repo.join("backend");
1143 std::fs::create_dir_all(&backend).unwrap();
1144 std::fs::write(backend.join("server.py"), "print('hi')\n").unwrap();
1145 run_git(&repo, &["add", "."]);
1146 run_git(&repo, &["commit", "--quiet", "-m", "add backend"]);
1147
1148 let frontend = repo.join("frontend");
1149 std::fs::create_dir_all(&frontend).unwrap();
1150
1151 let changed = try_get_changed_files(&frontend, "fallow-baseline").unwrap();
1152
1153 let expected = repo.join("backend/server.py");
1154 assert!(
1155 changed.contains(&expected),
1156 "committed sibling backend/server.py should be in the set: {changed:?}"
1157 );
1158 }
1159
1160 #[test]
1164 fn try_get_changed_files_includes_modified_tracked_file() {
1165 let tmp = tempfile::tempdir().unwrap();
1166 let repo = init_repo(tmp.path());
1167 let frontend = repo.join("frontend");
1168 std::fs::create_dir_all(frontend.join("src")).unwrap();
1169 std::fs::write(frontend.join("src/old.ts"), "export const x = 1;\n").unwrap();
1170 run_git(&repo, &["add", "."]);
1171 run_git(&repo, &["commit", "--quiet", "-m", "add old"]);
1172 run_git(&repo, &["tag", "fallow-baseline-v2"]);
1173 std::fs::write(frontend.join("src/old.ts"), "export const x = 2;\n").unwrap();
1174
1175 let changed = try_get_changed_files(&frontend, "fallow-baseline-v2").unwrap();
1176
1177 let expected = repo.join("frontend/src/old.ts");
1178 assert!(
1179 changed.contains(&expected),
1180 "modified tracked file {expected:?} missing from set: {changed:?}"
1181 );
1182 }
1183
1184 #[test]
1190 fn resolve_git_toplevel_returns_canonical_path() {
1191 let tmp = tempfile::tempdir().unwrap();
1192 let repo = init_repo(tmp.path());
1193 let frontend = repo.join("frontend");
1194 std::fs::create_dir_all(&frontend).unwrap();
1195
1196 let toplevel = resolve_git_toplevel(&frontend).unwrap();
1197 assert_eq!(toplevel, repo, "toplevel should equal canonical repo root");
1198 assert_eq!(
1199 toplevel,
1200 dunce::canonicalize(&toplevel).unwrap(),
1201 "resolved toplevel should already be canonical"
1202 );
1203 }
1204
1205 #[test]
1209 fn resolve_git_toplevel_not_a_repository() {
1210 let tmp = tempfile::tempdir().unwrap();
1211 let result = resolve_git_toplevel(tmp.path());
1212 assert!(
1213 matches!(result, Err(ChangedFilesError::NotARepository)),
1214 "expected NotARepository, got {result:?}"
1215 );
1216 }
1217
1218 #[test]
1221 fn try_get_changed_files_not_a_repository() {
1222 let tmp = tempfile::tempdir().unwrap();
1223 let result = try_get_changed_files(tmp.path(), "main");
1224 assert!(matches!(result, Err(ChangedFilesError::NotARepository)));
1225 }
1226
1227 #[test]
1228 fn filter_duplication_drops_groups_with_no_changed_instance() {
1229 let mut report = DuplicationReport {
1230 clone_groups: vec![CloneGroup {
1231 instances: vec![CloneInstance {
1232 file: "/a.ts".into(),
1233 start_line: 1,
1234 end_line: 5,
1235 start_col: 0,
1236 end_col: 10,
1237 fragment: "code".into(),
1238 }],
1239 token_count: 20,
1240 line_count: 5,
1241 }],
1242 clone_families: vec![],
1243 mirrored_directories: vec![],
1244 stats: DuplicationStats {
1245 total_files: 1,
1246 files_with_clones: 1,
1247 total_lines: 100,
1248 duplicated_lines: 5,
1249 total_tokens: 100,
1250 duplicated_tokens: 20,
1251 clone_groups: 1,
1252 clone_instances: 1,
1253 duplication_percentage: 5.0,
1254 clone_groups_below_min_occurrences: 0,
1255 },
1256 };
1257
1258 let changed: FxHashSet<PathBuf> = FxHashSet::default();
1259 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
1260 assert!(report.clone_groups.is_empty());
1261 assert_eq!(report.stats.clone_groups, 0);
1262 assert_eq!(report.stats.clone_instances, 0);
1263 assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
1264 }
1265}