1mod cache;
9pub mod detect;
10pub mod families;
11pub mod normalize;
12mod shingle_filter;
13pub mod token_types;
14mod token_visitor;
15pub mod tokenize;
16pub(crate) mod types;
17
18use rustc_hash::FxHashMap;
19use std::path::{Path, PathBuf};
20use std::sync::atomic::{AtomicUsize, Ordering};
21
22use globset::{Glob, GlobMatcher, GlobSet, GlobSetBuilder};
23use rayon::prelude::*;
24use rustc_hash::FxHashSet;
25
26use cache::{TokenCache, TokenCacheEntry, TokenCacheMode};
27use detect::CloneDetector;
28use normalize::normalize_and_hash_resolved;
29use tokenize::{tokenize_file, tokenize_file_cross_language};
30pub use types::{
31 CloneFamily, CloneGroup, CloneInstance, DefaultIgnoreSkipCount, DefaultIgnoreSkips,
32 DetectionMode, DuplicatesConfig, DuplicationReport, DuplicationStats, MirroredDirectory,
33 RefactoringKind, RefactoringSuggestion,
34};
35
36use crate::discover::{self, DiscoveredFile};
37use crate::suppress::{self, IssueKind, Suppression};
38
39pub const DUPES_DEFAULT_IGNORES: &[&str] = &[
45 "**/.next/**",
46 "**/.nuxt/**",
47 "**/.svelte-kit/**",
48 "**/.turbo/**",
49 "**/.parcel-cache/**",
50 "**/.vite/**",
51 "**/.cache/**",
52 "**/out/**",
53 "**/storybook-static/**",
54];
55
56#[derive(Clone)]
57pub(super) struct TokenizedFile {
58 path: PathBuf,
59 hashed_tokens: Vec<normalize::HashedToken>,
60 file_tokens: tokenize::FileTokens,
61 metadata: Option<std::fs::Metadata>,
62 cache_hit: bool,
63 suppressions: Vec<Suppression>,
64}
65
66struct IgnoreSet {
67 all: GlobSet,
68 defaults: Vec<(&'static str, GlobMatcher)>,
69}
70
71impl IgnoreSet {
72 fn is_match(&self, path: &Path) -> bool {
73 self.all.is_match(path)
74 }
75
76 fn default_match_index(&self, path: &Path) -> Option<usize> {
77 self.defaults
78 .iter()
79 .position(|(_, matcher)| matcher.is_match(path))
80 }
81}
82
83struct DuplicationRun {
84 report: DuplicationReport,
85 default_ignore_skips: DefaultIgnoreSkips,
86}
87
88pub fn find_duplicates(
97 root: &Path,
98 files: &[DiscoveredFile],
99 config: &DuplicatesConfig,
100) -> DuplicationReport {
101 find_duplicates_inner(root, files, config, None, None).report
102}
103
104pub fn find_duplicates_with_default_ignore_skips(
107 root: &Path,
108 files: &[DiscoveredFile],
109 config: &DuplicatesConfig,
110) -> (DuplicationReport, DefaultIgnoreSkips) {
111 let run = find_duplicates_inner(root, files, config, None, None);
112 (run.report, run.default_ignore_skips)
113}
114
115pub fn find_duplicates_cached(
117 root: &Path,
118 files: &[DiscoveredFile],
119 config: &DuplicatesConfig,
120 cache_root: &Path,
121) -> DuplicationReport {
122 find_duplicates_inner(root, files, config, None, Some(cache_root)).report
123}
124
125pub fn find_duplicates_cached_with_default_ignore_skips(
128 root: &Path,
129 files: &[DiscoveredFile],
130 config: &DuplicatesConfig,
131 cache_root: &Path,
132) -> (DuplicationReport, DefaultIgnoreSkips) {
133 let run = find_duplicates_inner(root, files, config, None, Some(cache_root));
134 (run.report, run.default_ignore_skips)
135}
136
137#[expect(
143 clippy::implicit_hasher,
144 reason = "fallow uses FxHashSet for changed-file sets throughout analysis"
145)]
146pub fn find_duplicates_touching_files(
147 root: &Path,
148 files: &[DiscoveredFile],
149 config: &DuplicatesConfig,
150 focus_files: &FxHashSet<PathBuf>,
151) -> DuplicationReport {
152 find_duplicates_inner(root, files, config, Some(focus_files), None).report
153}
154
155#[expect(
158 clippy::implicit_hasher,
159 reason = "fallow uses FxHashSet for changed-file sets throughout analysis"
160)]
161pub fn find_duplicates_touching_files_with_default_ignore_skips(
162 root: &Path,
163 files: &[DiscoveredFile],
164 config: &DuplicatesConfig,
165 focus_files: &FxHashSet<PathBuf>,
166) -> (DuplicationReport, DefaultIgnoreSkips) {
167 let run = find_duplicates_inner(root, files, config, Some(focus_files), None);
168 (run.report, run.default_ignore_skips)
169}
170
171#[expect(
173 clippy::implicit_hasher,
174 reason = "fallow uses FxHashSet for changed-file sets throughout analysis"
175)]
176pub fn find_duplicates_touching_files_cached(
177 root: &Path,
178 files: &[DiscoveredFile],
179 config: &DuplicatesConfig,
180 focus_files: &FxHashSet<PathBuf>,
181 cache_root: &Path,
182) -> DuplicationReport {
183 find_duplicates_inner(root, files, config, Some(focus_files), Some(cache_root)).report
184}
185
186#[expect(
189 clippy::implicit_hasher,
190 reason = "fallow uses FxHashSet for changed-file sets throughout analysis"
191)]
192pub fn find_duplicates_touching_files_cached_with_default_ignore_skips(
193 root: &Path,
194 files: &[DiscoveredFile],
195 config: &DuplicatesConfig,
196 focus_files: &FxHashSet<PathBuf>,
197 cache_root: &Path,
198) -> (DuplicationReport, DefaultIgnoreSkips) {
199 let run = find_duplicates_inner(root, files, config, Some(focus_files), Some(cache_root));
200 (run.report, run.default_ignore_skips)
201}
202
203fn find_duplicates_inner(
204 root: &Path,
205 files: &[DiscoveredFile],
206 config: &DuplicatesConfig,
207 focus_files: Option<&FxHashSet<PathBuf>>,
208 cache_root: Option<&Path>,
209) -> DuplicationRun {
210 let _span = tracing::info_span!("find_duplicates").entered();
211
212 let extra_ignores = build_ignore_set(config);
213 let default_skip_counts = extra_ignores
214 .as_ref()
215 .map(|ignores| {
216 std::iter::repeat_with(|| AtomicUsize::new(0))
217 .take(ignores.defaults.len())
218 .collect::<Vec<_>>()
219 })
220 .unwrap_or_default();
221
222 let normalization =
224 fallow_config::ResolvedNormalization::resolve(config.mode, &config.normalization);
225
226 let strip_types = config.cross_language;
227 let skip_imports = config.ignore_imports;
228
229 tracing::debug!(
230 ignore_imports = skip_imports,
231 "duplication tokenization config"
232 );
233
234 let token_cache_mode = TokenCacheMode::new(normalization, strip_types, skip_imports);
235 let cache_root = cache_root.filter(|_| files.len() >= config.min_corpus_size_for_token_cache);
236 let token_cache = cache_root.map(TokenCache::load);
237
238 let mut file_data: Vec<TokenizedFile> = files
240 .par_iter()
241 .filter_map(|file| {
242 let relative = file.path.strip_prefix(root).unwrap_or(&file.path);
244 if let Some(ref ignores) = extra_ignores {
245 if let Some(index) = ignores.default_match_index(relative) {
246 default_skip_counts[index].fetch_add(1, Ordering::Relaxed);
247 return None;
248 }
249 if ignores.is_match(relative) {
250 return None;
251 }
252 }
253
254 let metadata = std::fs::metadata(&file.path).ok()?;
255
256 let cached_entry = token_cache
257 .as_ref()
258 .and_then(|cache| cache.get(&file.path, &metadata, token_cache_mode));
259 let cache_hit = cached_entry.is_some();
260
261 let (mut entry, suppressions) = if let Some(entry) = cached_entry {
262 let suppressions =
263 suppress::parse_suppressions_from_source(&entry.file_tokens.source);
264 if suppress::is_file_suppressed(&suppressions, IssueKind::CodeDuplication) {
265 return None;
266 }
267 (entry, suppressions)
268 } else {
269 let source = std::fs::read_to_string(&file.path).ok()?;
270 let suppressions = suppress::parse_suppressions_from_source(&source);
271 if suppress::is_file_suppressed(&suppressions, IssueKind::CodeDuplication) {
272 return None;
273 }
274
275 let file_tokens = if strip_types {
277 tokenize_file_cross_language(&file.path, &source, true, skip_imports)
278 } else {
279 tokenize_file(&file.path, &source, skip_imports)
280 };
281 if file_tokens.tokens.is_empty() {
282 return None;
283 }
284
285 let hashed = normalize_and_hash_resolved(&file_tokens.tokens, normalization);
287 let entry = TokenCacheEntry {
288 hashed_tokens: hashed,
289 file_tokens,
290 };
291 (entry, suppressions)
292 };
293 if entry.file_tokens.tokens.is_empty() {
294 return None;
295 }
296 if entry.hashed_tokens.len() < config.min_tokens {
297 return None;
298 }
299
300 Some(TokenizedFile {
301 path: file.path.clone(),
302 hashed_tokens: std::mem::take(&mut entry.hashed_tokens),
303 file_tokens: entry.file_tokens,
304 metadata: Some(metadata),
305 cache_hit,
306 suppressions,
307 })
308 })
309 .collect();
310
311 if let (Some(cache_root), Some(mut cache)) = (cache_root, token_cache) {
312 for file in &file_data {
313 if !file.cache_hit
314 && let Some(metadata) = &file.metadata
315 {
316 cache.insert(
317 &file.path,
318 metadata,
319 token_cache_mode,
320 &file.hashed_tokens,
321 &file.file_tokens,
322 );
323 }
324 }
325 cache.retain_paths(files);
326 match cache.save_if_dirty() {
327 Ok(true) => {
328 tracing::debug!(cache_root = %cache_root.display(), "saved duplication token cache");
329 }
330 Ok(false) => {
331 tracing::debug!(cache_root = %cache_root.display(), "duplication token cache unchanged");
332 }
333 Err(err) => {
334 tracing::warn!("Failed to save duplication token cache: {err}");
335 }
336 }
337 }
338
339 tracing::info!(
340 files = file_data.len(),
341 "tokenized files for duplication analysis"
342 );
343
344 if let Some(focus_files) = focus_files
345 && file_data.len() >= config.min_corpus_size_for_shingle_filter
346 {
347 shingle_filter::filter_to_focus_candidates(&mut file_data, focus_files, config.min_tokens);
348 }
349
350 let suppressions_by_file: FxHashMap<PathBuf, Vec<Suppression>> = file_data
352 .iter()
353 .filter(|file| !file.suppressions.is_empty())
354 .map(|file| (file.path.clone(), file.suppressions.clone()))
355 .collect();
356
357 let detector_data: Vec<(PathBuf, Vec<normalize::HashedToken>, tokenize::FileTokens)> =
359 file_data
360 .into_iter()
361 .map(|file| (file.path, file.hashed_tokens, file.file_tokens))
362 .collect();
363
364 let detector = CloneDetector::new(config.min_tokens, config.min_lines, config.skip_local);
366 let mut report = if let Some(focus_files) = focus_files {
367 detector.detect_touching_files(detector_data, focus_files)
368 } else {
369 detector.detect(detector_data)
370 };
371
372 if !suppressions_by_file.is_empty() {
374 apply_line_suppressions(&mut report, &suppressions_by_file);
375 }
376
377 let default_ignore_skips =
378 build_default_ignore_skips(extra_ignores.as_ref(), &default_skip_counts);
379
380 report.clone_families = families::group_into_families(&report.clone_groups, root);
382
383 report.mirrored_directories =
385 families::detect_mirrored_directories(&report.clone_families, root);
386
387 report.sort();
390
391 DuplicationRun {
392 report,
393 default_ignore_skips,
394 }
395}
396
397#[expect(
399 clippy::cast_possible_truncation,
400 reason = "line numbers are bounded by source size"
401)]
402fn apply_line_suppressions(
403 report: &mut DuplicationReport,
404 suppressions_by_file: &FxHashMap<PathBuf, Vec<Suppression>>,
405) {
406 report.clone_groups.retain_mut(|group| {
407 group.instances.retain(|instance| {
408 if let Some(supps) = suppressions_by_file.get(&instance.file) {
409 for line in instance.start_line..=instance.end_line {
411 if suppress::is_suppressed(supps, line as u32, IssueKind::CodeDuplication) {
412 return false;
413 }
414 }
415 }
416 true
417 });
418 group.instances.len() >= 2
420 });
421}
422
423#[must_use]
427pub fn find_duplicates_in_project(root: &Path, config: &DuplicatesConfig) -> DuplicationReport {
428 let resolved = crate::default_config(root);
429 let files = discover::discover_files(&resolved);
430 find_duplicates(root, &files, config)
431}
432
433fn build_ignore_set(config: &DuplicatesConfig) -> Option<IgnoreSet> {
435 if !config.ignore_defaults && config.ignore.is_empty() {
436 return None;
437 }
438
439 let mut builder = GlobSetBuilder::new();
440 let mut defaults = Vec::new();
441
442 if config.ignore_defaults {
443 for pattern in DUPES_DEFAULT_IGNORES {
444 match Glob::new(pattern) {
445 Ok(glob) => {
446 defaults.push((*pattern, glob.compile_matcher()));
447 builder.add(glob);
448 }
449 Err(e) => {
450 tracing::warn!("Invalid default duplication ignore pattern '{pattern}': {e}");
451 }
452 }
453 }
454 }
455
456 for pattern in &config.ignore {
457 match Glob::new(pattern) {
458 Ok(glob) => {
459 builder.add(glob);
460 }
461 Err(e) => {
462 tracing::warn!("Invalid duplication ignore pattern '{pattern}': {e}");
463 }
464 }
465 }
466
467 builder.build().ok().map(|all| IgnoreSet { all, defaults })
468}
469
470fn build_default_ignore_skips(
471 ignores: Option<&IgnoreSet>,
472 counts: &[AtomicUsize],
473) -> DefaultIgnoreSkips {
474 let Some(ignores) = ignores else {
475 return DefaultIgnoreSkips::default();
476 };
477
478 let by_pattern = ignores
479 .defaults
480 .iter()
481 .zip(counts)
482 .filter_map(|((pattern, _), count)| {
483 let count = count.load(Ordering::Relaxed);
484 (count > 0).then_some(DefaultIgnoreSkipCount { pattern, count })
485 })
486 .collect::<Vec<_>>();
487 let total = by_pattern.iter().map(|entry| entry.count).sum();
488
489 DefaultIgnoreSkips { total, by_pattern }
490}
491
492#[cfg(test)]
493mod tests {
494 use super::*;
495 use crate::discover::FileId;
496
497 #[test]
498 fn find_duplicates_empty_files() {
499 let config = DuplicatesConfig::default();
500 let report = find_duplicates(Path::new("/tmp"), &[], &config);
501 assert!(report.clone_groups.is_empty());
502 assert!(report.clone_families.is_empty());
503 assert_eq!(report.stats.total_files, 0);
504 }
505
506 #[test]
507 fn build_ignore_set_empty() {
508 let config = DuplicatesConfig {
509 ignore_defaults: false,
510 ..DuplicatesConfig::default()
511 };
512 assert!(build_ignore_set(&config).is_none());
513 }
514
515 #[test]
516 fn build_ignore_set_valid_patterns() {
517 let config = DuplicatesConfig {
518 ignore_defaults: false,
519 ignore: vec!["**/*.test.ts".to_string(), "**/*.spec.ts".to_string()],
520 ..DuplicatesConfig::default()
521 };
522 let set = build_ignore_set(&config);
523 assert!(set.is_some());
524 let set = set.unwrap();
525 assert!(set.is_match(Path::new("src/foo.test.ts")));
526 assert!(set.is_match(Path::new("src/bar.spec.ts")));
527 assert!(!set.is_match(Path::new("src/baz.ts")));
528 }
529
530 #[test]
531 fn build_ignore_set_merges_defaults_with_user_patterns() {
532 let config = DuplicatesConfig {
533 ignore: vec!["**/foo/**".to_string()],
534 ..DuplicatesConfig::default()
535 };
536 let set = build_ignore_set(&config).expect("ignore set");
537 assert!(set.is_match(Path::new(".next/static/chunks/app.js")));
538 assert!(set.is_match(Path::new("src/foo/generated.js")));
539 }
540
541 #[test]
542 fn build_ignore_set_ignore_defaults_false_uses_only_user_patterns() {
543 let config = DuplicatesConfig {
544 ignore_defaults: false,
545 ignore: vec!["**/foo/**".to_string()],
546 ..DuplicatesConfig::default()
547 };
548 let set = build_ignore_set(&config).expect("ignore set");
549 assert!(!set.is_match(Path::new(".next/static/chunks/app.js")));
550 assert!(set.is_match(Path::new("src/foo/generated.js")));
551 }
552
553 #[test]
554 fn find_duplicates_with_real_files() {
555 let dir = tempfile::tempdir().expect("create temp dir");
557 let src_dir = dir.path().join("src");
558 std::fs::create_dir_all(&src_dir).expect("create src dir");
559
560 let code = r#"
561export function processData(input: string): string {
562 const trimmed = input.trim();
563 if (trimmed.length === 0) {
564 return "";
565 }
566 const parts = trimmed.split(",");
567 const filtered = parts.filter(p => p.length > 0);
568 const mapped = filtered.map(p => p.toUpperCase());
569 return mapped.join(", ");
570}
571
572export function validateInput(data: string): boolean {
573 if (data === null || data === undefined) {
574 return false;
575 }
576 const cleaned = data.trim();
577 if (cleaned.length < 3) {
578 return false;
579 }
580 return true;
581}
582"#;
583
584 std::fs::write(src_dir.join("original.ts"), code).expect("write original");
585 std::fs::write(src_dir.join("copy.ts"), code).expect("write copy");
586 std::fs::write(dir.path().join("package.json"), r#"{"name": "test"}"#)
587 .expect("write package.json");
588
589 let files = vec![
590 DiscoveredFile {
591 id: FileId(0),
592 path: src_dir.join("original.ts"),
593 size_bytes: code.len() as u64,
594 },
595 DiscoveredFile {
596 id: FileId(1),
597 path: src_dir.join("copy.ts"),
598 size_bytes: code.len() as u64,
599 },
600 ];
601
602 let config = DuplicatesConfig {
603 min_tokens: 10,
604 min_lines: 2,
605 ..DuplicatesConfig::default()
606 };
607
608 let report = find_duplicates(dir.path(), &files, &config);
609 assert!(
610 !report.clone_groups.is_empty(),
611 "Should detect clones in identical files"
612 );
613 assert!(report.stats.files_with_clones >= 2);
614
615 assert!(
617 !report.clone_families.is_empty(),
618 "Should group clones into families"
619 );
620 }
621
622 #[test]
623 fn find_duplicates_cached_skips_token_cache_for_small_corpus() {
624 let dir = tempfile::tempdir().expect("create temp dir");
625 let src_dir = dir.path().join("src");
626 std::fs::create_dir_all(&src_dir).expect("create src dir");
627
628 let code = "export function same(input: number): number {\n const doubled = input * 2;\n return doubled + 1;\n}\n";
629 let first = src_dir.join("first.ts");
630 let second = src_dir.join("second.ts");
631 std::fs::write(&first, code).expect("write first");
632 std::fs::write(&second, code).expect("write second");
633
634 let files = vec![
635 DiscoveredFile {
636 id: FileId(0),
637 path: first,
638 size_bytes: code.len() as u64,
639 },
640 DiscoveredFile {
641 id: FileId(1),
642 path: second,
643 size_bytes: code.len() as u64,
644 },
645 ];
646 let config = DuplicatesConfig {
647 min_tokens: 5,
648 min_lines: 2,
649 ..DuplicatesConfig::default()
650 };
651 let cache_root = dir.path().join(".fallow");
652
653 let report = find_duplicates_cached(dir.path(), &files, &config, &cache_root);
654
655 assert!(!report.clone_groups.is_empty());
656 assert!(
657 !cache_root.exists(),
658 "small projects should avoid token-cache IO overhead"
659 );
660 }
661
662 #[test]
663 fn find_duplicates_touching_files_keeps_cross_corpus_matches_only_for_focus() {
664 let dir = tempfile::tempdir().expect("create temp dir");
665 let src_dir = dir.path().join("src");
666 std::fs::create_dir_all(&src_dir).expect("create src dir");
667
668 let focused_code = r"
669export function focused(input: number): number {
670 const doubled = input * 2;
671 const shifted = doubled + 10;
672 return shifted / 2;
673}
674";
675 let untouched_code = r#"
676export function untouched(input: string): string {
677 const lowered = input.toLowerCase();
678 const padded = lowered.padStart(10, "x");
679 return padded.slice(0, 8);
680}
681"#;
682
683 let changed_path = src_dir.join("changed.ts");
684 let focused_copy_path = src_dir.join("focused-copy.ts");
685 let untouched_a_path = src_dir.join("untouched-a.ts");
686 let untouched_b_path = src_dir.join("untouched-b.ts");
687 std::fs::write(&changed_path, focused_code).expect("write changed");
688 std::fs::write(&focused_copy_path, focused_code).expect("write focused copy");
689 std::fs::write(&untouched_a_path, untouched_code).expect("write untouched a");
690 std::fs::write(&untouched_b_path, untouched_code).expect("write untouched b");
691
692 let files = vec![
693 DiscoveredFile {
694 id: FileId(0),
695 path: changed_path.clone(),
696 size_bytes: focused_code.len() as u64,
697 },
698 DiscoveredFile {
699 id: FileId(1),
700 path: focused_copy_path,
701 size_bytes: focused_code.len() as u64,
702 },
703 DiscoveredFile {
704 id: FileId(2),
705 path: untouched_a_path,
706 size_bytes: untouched_code.len() as u64,
707 },
708 DiscoveredFile {
709 id: FileId(3),
710 path: untouched_b_path,
711 size_bytes: untouched_code.len() as u64,
712 },
713 ];
714
715 let config = DuplicatesConfig {
716 mode: DetectionMode::Strict,
717 min_tokens: 5,
718 min_lines: 2,
719 min_corpus_size_for_shingle_filter: 1,
720 ..DuplicatesConfig::default()
721 };
722 let mut focus = FxHashSet::default();
723 focus.insert(changed_path.clone());
724
725 let full_report = find_duplicates(dir.path(), &files, &config);
726 let report = find_duplicates_touching_files(dir.path(), &files, &config, &focus);
727 let expected_touching = full_report
728 .clone_groups
729 .iter()
730 .filter(|group| {
731 group
732 .instances
733 .iter()
734 .any(|instance| instance.file == changed_path)
735 })
736 .count();
737
738 assert!(
739 !report.clone_groups.is_empty(),
740 "focused file should still match an unchanged duplicate"
741 );
742 assert_eq!(
743 report.clone_groups.len(),
744 expected_touching,
745 "focused shingle filtering must not drop clone groups touching the focused file"
746 );
747 assert!(report.clone_groups.iter().all(|group| {
748 group
749 .instances
750 .iter()
751 .any(|instance| instance.file == changed_path)
752 }));
753 }
754
755 #[test]
756 fn file_wide_suppression_excludes_file() {
757 let dir = tempfile::tempdir().expect("create temp dir");
758 let src_dir = dir.path().join("src");
759 std::fs::create_dir_all(&src_dir).expect("create src dir");
760
761 let code = r#"
762export function processData(input: string): string {
763 const trimmed = input.trim();
764 if (trimmed.length === 0) {
765 return "";
766 }
767 const parts = trimmed.split(",");
768 const filtered = parts.filter(p => p.length > 0);
769 const mapped = filtered.map(p => p.toUpperCase());
770 return mapped.join(", ");
771}
772"#;
773 let suppressed_code = format!("// fallow-ignore-file code-duplication\n{code}");
774
775 std::fs::write(src_dir.join("original.ts"), code).expect("write original");
776 std::fs::write(src_dir.join("suppressed.ts"), &suppressed_code).expect("write suppressed");
777 std::fs::write(dir.path().join("package.json"), r#"{"name": "test"}"#)
778 .expect("write package.json");
779
780 let files = vec![
781 DiscoveredFile {
782 id: FileId(0),
783 path: src_dir.join("original.ts"),
784 size_bytes: code.len() as u64,
785 },
786 DiscoveredFile {
787 id: FileId(1),
788 path: src_dir.join("suppressed.ts"),
789 size_bytes: suppressed_code.len() as u64,
790 },
791 ];
792
793 let config = DuplicatesConfig {
794 min_tokens: 10,
795 min_lines: 2,
796 ..DuplicatesConfig::default()
797 };
798
799 let report = find_duplicates(dir.path(), &files, &config);
800 assert!(
802 report.clone_groups.is_empty(),
803 "File-wide suppression should exclude file from duplication analysis"
804 );
805 }
806}