1use std::path::{Path, PathBuf};
17
18use rustc_hash::{FxHashMap, FxHashSet};
19
20use crate::duplicates::{DuplicationReport, DuplicationStats, families};
21use crate::results::AnalysisResults;
22
23pub fn validate_git_ref(s: &str) -> Result<&str, String> {
36 if s.is_empty() {
37 return Err("git ref cannot be empty".to_string());
38 }
39 if s.starts_with('-') {
40 return Err("git ref cannot start with '-'".to_string());
41 }
42 let mut in_braces = false;
43 for c in s.chars() {
44 match c {
45 '{' => in_braces = true,
46 '}' => in_braces = false,
47 ':' | ' ' if in_braces => {}
48 c if c.is_ascii_alphanumeric()
49 || matches!(c, '.' | '_' | '-' | '/' | '~' | '^' | '@' | '{' | '}') => {}
50 _ => return Err(format!("git ref contains disallowed character: '{c}'")),
51 }
52 }
53 if in_braces {
54 return Err("git ref has unclosed '{'".to_string());
55 }
56 Ok(s)
57}
58
59#[derive(Debug)]
62pub enum ChangedFilesError {
63 InvalidRef(String),
65 GitMissing(String),
67 NotARepository,
69 GitFailed(String),
71}
72
73impl ChangedFilesError {
74 pub fn describe(&self) -> String {
78 match self {
79 Self::InvalidRef(e) => format!("invalid git ref: {e}"),
80 Self::GitMissing(e) => format!("failed to run git: {e}"),
81 Self::NotARepository => "not a git repository".to_owned(),
82 Self::GitFailed(stderr) => augment_git_failed(stderr),
83 }
84 }
85}
86
87fn augment_git_failed(stderr: &str) -> String {
93 let lower = stderr.to_ascii_lowercase();
94 if lower.contains("not a valid object name")
95 || lower.contains("unknown revision")
96 || lower.contains("ambiguous argument")
97 {
98 format!(
99 "{stderr} (shallow clone? try `git fetch --unshallow`, or set `fetch-depth: 0` on actions/checkout / `GIT_DEPTH: 0` in GitLab CI)"
100 )
101 } else {
102 stderr.to_owned()
103 }
104}
105
106fn collect_git_paths(root: &Path, args: &[&str]) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
107 let output = std::process::Command::new("git")
108 .args(args)
109 .current_dir(root)
110 .output()
111 .map_err(|e| ChangedFilesError::GitMissing(e.to_string()))?;
112
113 if !output.status.success() {
114 let stderr = String::from_utf8_lossy(&output.stderr);
115 return Err(if stderr.contains("not a git repository") {
116 ChangedFilesError::NotARepository
117 } else {
118 ChangedFilesError::GitFailed(stderr.trim().to_owned())
119 });
120 }
121
122 let files: FxHashSet<PathBuf> = String::from_utf8_lossy(&output.stdout)
123 .lines()
124 .map(|line| root.join(line))
125 .collect();
126
127 Ok(files)
128}
129
130pub fn try_get_changed_files(
142 root: &Path,
143 git_ref: &str,
144) -> Result<FxHashSet<PathBuf>, ChangedFilesError> {
145 validate_git_ref(git_ref).map_err(ChangedFilesError::InvalidRef)?;
146
147 let mut files = collect_git_paths(
148 root,
149 &[
150 "diff",
151 "--name-only",
152 "--end-of-options",
153 &format!("{git_ref}...HEAD"),
154 ],
155 )?;
156 files.extend(collect_git_paths(root, &["diff", "--name-only", "HEAD"])?);
157 files.extend(collect_git_paths(
158 root,
159 &["ls-files", "--others", "--exclude-standard"],
160 )?);
161 Ok(files)
162}
163
164#[expect(
168 clippy::print_stderr,
169 reason = "intentional user-facing warning for the CLI's --changed-since fallback path; LSP callers use try_get_changed_files instead"
170)]
171pub fn get_changed_files(root: &Path, git_ref: &str) -> Option<FxHashSet<PathBuf>> {
172 match try_get_changed_files(root, git_ref) {
173 Ok(files) => Some(files),
174 Err(ChangedFilesError::InvalidRef(e)) => {
175 eprintln!("Warning: --changed-since ignored: invalid git ref: {e}");
176 None
177 }
178 Err(ChangedFilesError::GitMissing(e)) => {
179 eprintln!("Warning: --changed-since ignored: failed to run git: {e}");
180 None
181 }
182 Err(ChangedFilesError::NotARepository) => {
183 eprintln!("Warning: --changed-since ignored: not a git repository");
184 None
185 }
186 Err(ChangedFilesError::GitFailed(stderr)) => {
187 eprintln!("Warning: --changed-since failed for ref '{git_ref}': {stderr}");
188 None
189 }
190 }
191}
192
193#[expect(
201 clippy::implicit_hasher,
202 reason = "fallow standardizes on FxHashSet across the workspace"
203)]
204pub fn filter_results_by_changed_files(
205 results: &mut AnalysisResults,
206 changed_files: &FxHashSet<PathBuf>,
207) {
208 results
209 .unused_files
210 .retain(|f| changed_files.contains(&f.path));
211 results
212 .unused_exports
213 .retain(|e| changed_files.contains(&e.path));
214 results
215 .unused_types
216 .retain(|e| changed_files.contains(&e.path));
217 results
218 .unused_enum_members
219 .retain(|m| changed_files.contains(&m.path));
220 results
221 .unused_class_members
222 .retain(|m| changed_files.contains(&m.path));
223 results
224 .unresolved_imports
225 .retain(|i| changed_files.contains(&i.path));
226
227 results.unlisted_dependencies.retain(|d| {
229 d.imported_from
230 .iter()
231 .any(|s| changed_files.contains(&s.path))
232 });
233
234 for dup in &mut results.duplicate_exports {
236 dup.locations
237 .retain(|loc| changed_files.contains(&loc.path));
238 }
239 results.duplicate_exports.retain(|d| d.locations.len() >= 2);
240
241 results
243 .circular_dependencies
244 .retain(|c| c.files.iter().any(|f| changed_files.contains(f)));
245
246 results
248 .boundary_violations
249 .retain(|v| changed_files.contains(&v.from_path));
250
251 results
253 .stale_suppressions
254 .retain(|s| changed_files.contains(&s.path));
255}
256
257fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
263 let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
264 let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
265 let mut duplicated_tokens = 0_usize;
266 let mut clone_instances = 0_usize;
267
268 for group in &report.clone_groups {
269 for instance in &group.instances {
270 files_with_clones.insert(&instance.file);
271 clone_instances += 1;
272 let lines = file_dup_lines.entry(&instance.file).or_default();
273 for line in instance.start_line..=instance.end_line {
274 lines.insert(line);
275 }
276 }
277 duplicated_tokens += group.token_count * group.instances.len();
278 }
279
280 let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
281
282 DuplicationStats {
283 total_files: report.stats.total_files,
284 files_with_clones: files_with_clones.len(),
285 total_lines: report.stats.total_lines,
286 duplicated_lines,
287 total_tokens: report.stats.total_tokens,
288 duplicated_tokens,
289 clone_groups: report.clone_groups.len(),
290 clone_instances,
291 #[expect(
292 clippy::cast_precision_loss,
293 reason = "stat percentages are display-only; precision loss at usize::MAX line counts is acceptable"
294 )]
295 duplication_percentage: if report.stats.total_lines > 0 {
296 (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
297 } else {
298 0.0
299 },
300 }
301}
302
303#[expect(
308 clippy::implicit_hasher,
309 reason = "fallow standardizes on FxHashSet across the workspace"
310)]
311pub fn filter_duplication_by_changed_files(
312 report: &mut DuplicationReport,
313 changed_files: &FxHashSet<PathBuf>,
314 root: &Path,
315) {
316 report
317 .clone_groups
318 .retain(|g| g.instances.iter().any(|i| changed_files.contains(&i.file)));
319 report.clone_families = families::group_into_families(&report.clone_groups, root);
320 report.mirrored_directories =
321 families::detect_mirrored_directories(&report.clone_families, root);
322 report.stats = recompute_duplication_stats(report);
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328 use crate::duplicates::{CloneGroup, CloneInstance};
329 use crate::results::{BoundaryViolation, CircularDependency, UnusedExport, UnusedFile};
330
331 #[test]
332 fn changed_files_error_describe_variants() {
333 assert!(
334 ChangedFilesError::InvalidRef("bad".to_owned())
335 .describe()
336 .contains("invalid git ref")
337 );
338 assert!(
339 ChangedFilesError::GitMissing("oops".to_owned())
340 .describe()
341 .contains("oops")
342 );
343 assert_eq!(
344 ChangedFilesError::NotARepository.describe(),
345 "not a git repository"
346 );
347 assert!(
348 ChangedFilesError::GitFailed("bad ref".to_owned())
349 .describe()
350 .contains("bad ref")
351 );
352 }
353
354 #[test]
355 fn augment_git_failed_appends_shallow_clone_hint_for_unknown_revision() {
356 let stderr = "fatal: ambiguous argument 'fallow-baseline...HEAD': unknown revision or path not in the working tree.";
357 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
358 assert!(described.contains(stderr), "original stderr preserved");
359 assert!(
360 described.contains("shallow clone"),
361 "hint surfaced: {described}"
362 );
363 assert!(
364 described.contains("fetch-depth: 0") || described.contains("git fetch --unshallow"),
365 "hint actionable: {described}"
366 );
367 }
368
369 #[test]
370 fn augment_git_failed_passthrough_for_other_errors() {
371 let stderr = "fatal: refusing to merge unrelated histories";
373 let described = ChangedFilesError::GitFailed(stderr.to_owned()).describe();
374 assert_eq!(described, stderr);
375 }
376
377 #[test]
378 fn validate_git_ref_rejects_leading_dash() {
379 assert!(validate_git_ref("--upload-pack=evil").is_err());
380 assert!(validate_git_ref("-flag").is_err());
381 }
382
383 #[test]
384 fn validate_git_ref_accepts_baseline_tag() {
385 assert_eq!(
386 validate_git_ref("fallow-baseline").unwrap(),
387 "fallow-baseline"
388 );
389 }
390
391 #[test]
392 fn try_get_changed_files_rejects_invalid_ref() {
393 let err = try_get_changed_files(Path::new("/"), "--evil")
395 .expect_err("leading-dash ref must be rejected");
396 assert!(matches!(err, ChangedFilesError::InvalidRef(_)));
397 assert!(err.describe().contains("cannot start with"));
398 }
399
400 #[test]
401 fn validate_git_ref_rejects_option_like_ref() {
402 assert!(validate_git_ref("--output=/tmp/fallow-proof").is_err());
403 }
404
405 #[test]
406 fn validate_git_ref_allows_reflog_relative_date() {
407 assert!(validate_git_ref("HEAD@{1 week ago}").is_ok());
408 }
409
410 #[test]
411 fn try_get_changed_files_rejects_option_like_ref_before_git() {
412 let root = tempfile::tempdir().expect("create temp dir");
413 let proof_path = root.path().join("proof");
414
415 let result = try_get_changed_files(
416 root.path(),
417 &format!("--output={}", proof_path.to_string_lossy()),
418 );
419
420 assert!(matches!(result, Err(ChangedFilesError::InvalidRef(_))));
421 assert!(
422 !proof_path.exists(),
423 "invalid changedSince ref must not be passed through to git as an option"
424 );
425 }
426
427 #[test]
428 fn filter_results_keeps_only_changed_files() {
429 let mut results = AnalysisResults::default();
430 results.unused_files.push(UnusedFile {
431 path: "/a.ts".into(),
432 });
433 results.unused_files.push(UnusedFile {
434 path: "/b.ts".into(),
435 });
436 results.unused_exports.push(UnusedExport {
437 path: "/a.ts".into(),
438 export_name: "foo".into(),
439 is_type_only: false,
440 line: 1,
441 col: 0,
442 span_start: 0,
443 is_re_export: false,
444 });
445
446 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
447 changed.insert("/a.ts".into());
448
449 filter_results_by_changed_files(&mut results, &changed);
450
451 assert_eq!(results.unused_files.len(), 1);
452 assert_eq!(results.unused_files[0].path, PathBuf::from("/a.ts"));
453 assert_eq!(results.unused_exports.len(), 1);
454 }
455
456 #[test]
457 fn filter_results_preserves_dependency_level_issues() {
458 let mut results = AnalysisResults::default();
459 results
460 .unused_dependencies
461 .push(crate::results::UnusedDependency {
462 package_name: "lodash".into(),
463 location: crate::results::DependencyLocation::Dependencies,
464 path: "/pkg.json".into(),
465 line: 3,
466 });
467
468 let changed: FxHashSet<PathBuf> = FxHashSet::default();
469 filter_results_by_changed_files(&mut results, &changed);
470
471 assert_eq!(results.unused_dependencies.len(), 1);
473 }
474
475 #[test]
476 fn filter_results_keeps_circular_dep_when_any_file_changed() {
477 let mut results = AnalysisResults::default();
478 results.circular_dependencies.push(CircularDependency {
479 files: vec!["/a.ts".into(), "/b.ts".into()],
480 length: 2,
481 line: 1,
482 col: 0,
483 is_cross_package: false,
484 });
485
486 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
487 changed.insert("/b.ts".into());
488
489 filter_results_by_changed_files(&mut results, &changed);
490 assert_eq!(results.circular_dependencies.len(), 1);
491 }
492
493 #[test]
494 fn filter_results_drops_circular_dep_when_no_file_changed() {
495 let mut results = AnalysisResults::default();
496 results.circular_dependencies.push(CircularDependency {
497 files: vec!["/a.ts".into(), "/b.ts".into()],
498 length: 2,
499 line: 1,
500 col: 0,
501 is_cross_package: false,
502 });
503
504 let changed: FxHashSet<PathBuf> = FxHashSet::default();
505 filter_results_by_changed_files(&mut results, &changed);
506 assert!(results.circular_dependencies.is_empty());
507 }
508
509 #[test]
510 fn filter_results_drops_boundary_violation_when_importer_unchanged() {
511 let mut results = AnalysisResults::default();
512 results.boundary_violations.push(BoundaryViolation {
513 from_path: "/a.ts".into(),
514 to_path: "/b.ts".into(),
515 from_zone: "ui".into(),
516 to_zone: "data".into(),
517 import_specifier: "../data/db".into(),
518 line: 1,
519 col: 0,
520 });
521
522 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
523 changed.insert("/b.ts".into());
525
526 filter_results_by_changed_files(&mut results, &changed);
527 assert!(results.boundary_violations.is_empty());
528 }
529
530 #[test]
531 fn filter_duplication_keeps_groups_with_at_least_one_changed_instance() {
532 let mut report = DuplicationReport {
533 clone_groups: vec![CloneGroup {
534 instances: vec![
535 CloneInstance {
536 file: "/a.ts".into(),
537 start_line: 1,
538 end_line: 5,
539 start_col: 0,
540 end_col: 10,
541 fragment: "code".into(),
542 },
543 CloneInstance {
544 file: "/b.ts".into(),
545 start_line: 1,
546 end_line: 5,
547 start_col: 0,
548 end_col: 10,
549 fragment: "code".into(),
550 },
551 ],
552 token_count: 20,
553 line_count: 5,
554 }],
555 clone_families: vec![],
556 mirrored_directories: vec![],
557 stats: DuplicationStats {
558 total_files: 2,
559 files_with_clones: 2,
560 total_lines: 100,
561 duplicated_lines: 10,
562 total_tokens: 200,
563 duplicated_tokens: 40,
564 clone_groups: 1,
565 clone_instances: 2,
566 duplication_percentage: 10.0,
567 },
568 };
569
570 let mut changed: FxHashSet<PathBuf> = FxHashSet::default();
571 changed.insert("/a.ts".into());
572
573 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
574 assert_eq!(report.clone_groups.len(), 1);
575 assert_eq!(report.stats.clone_groups, 1);
577 assert_eq!(report.stats.clone_instances, 2);
578 }
579
580 #[test]
581 fn filter_duplication_drops_groups_with_no_changed_instance() {
582 let mut report = DuplicationReport {
583 clone_groups: vec![CloneGroup {
584 instances: vec![CloneInstance {
585 file: "/a.ts".into(),
586 start_line: 1,
587 end_line: 5,
588 start_col: 0,
589 end_col: 10,
590 fragment: "code".into(),
591 }],
592 token_count: 20,
593 line_count: 5,
594 }],
595 clone_families: vec![],
596 mirrored_directories: vec![],
597 stats: DuplicationStats {
598 total_files: 1,
599 files_with_clones: 1,
600 total_lines: 100,
601 duplicated_lines: 5,
602 total_tokens: 100,
603 duplicated_tokens: 20,
604 clone_groups: 1,
605 clone_instances: 1,
606 duplication_percentage: 5.0,
607 },
608 };
609
610 let changed: FxHashSet<PathBuf> = FxHashSet::default();
611 filter_duplication_by_changed_files(&mut report, &changed, Path::new(""));
612 assert!(report.clone_groups.is_empty());
613 assert_eq!(report.stats.clone_groups, 0);
614 assert_eq!(report.stats.clone_instances, 0);
615 assert!((report.stats.duplication_percentage - 0.0).abs() < f64::EPSILON);
616 }
617}