1use std::path::{Path, PathBuf};
4
5use fallow_config::DuplicatesConfig;
6use fallow_types::discover::DiscoveredFile;
7use rustc_hash::{FxHashMap, FxHashSet};
8
9use crate::{core_backend, results::DuplicationAnalysis};
10
11pub const FINGERPRINT_PREFIX: &str = "dup:";
12
13pub type CloneGroup = fallow_types::duplicates::CloneGroup;
14pub type CloneInstance = fallow_types::duplicates::CloneInstance;
15pub type DefaultIgnoreSkips = fallow_types::duplicates::DefaultIgnoreSkips;
16pub type DuplicationReport = fallow_types::duplicates::DuplicationReport;
17pub type DuplicationStats = fallow_types::duplicates::DuplicationStats;
18
19#[derive(Debug, Clone)]
21pub struct CloneFingerprintSet {
22 inner: core_backend::BackendCloneFingerprintSet,
23}
24
25impl CloneFingerprintSet {
26 #[must_use]
28 pub fn from_groups(groups: &[CloneGroup]) -> Self {
29 Self {
30 inner: core_backend::BackendCloneFingerprintSet::from_groups(groups),
31 }
32 }
33
34 #[must_use]
36 pub fn fingerprint_for_group(&self, group: &CloneGroup) -> String {
37 self.inner.fingerprint_for_group(group)
38 }
39
40 #[must_use]
42 pub fn fingerprint_for_parts(
43 &self,
44 instances: &[CloneInstance],
45 token_count: usize,
46 line_count: usize,
47 ) -> String {
48 self.inner
49 .fingerprint_for_parts(instances, token_count, line_count)
50 }
51
52 #[must_use]
54 pub fn find_group<'a>(
55 &self,
56 groups: &'a [CloneGroup],
57 fingerprint: &str,
58 ) -> Option<&'a CloneGroup> {
59 self.inner.find_group(groups, fingerprint)
60 }
61}
62
63#[must_use]
65pub fn clone_fingerprint(instances: &[CloneInstance]) -> String {
66 core_backend::clone_fingerprint(instances)
67}
68
69#[must_use]
71pub fn fingerprint_for_fragment(fragment: &str) -> String {
72 core_backend::fingerprint_for_fragment(fragment)
73}
74
75#[must_use]
77pub fn dominant_identifier(group: &CloneGroup) -> Option<String> {
78 core_backend::dominant_identifier(group)
79}
80
81pub fn refresh_clone_families(report: &mut DuplicationReport, root: &Path) {
83 core_backend::refresh_clone_families(report, root);
84}
85
86#[must_use]
91pub fn recompute_stats(report: &DuplicationReport) -> DuplicationStats {
92 let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
93 let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
94 let mut duplicated_tokens = 0usize;
95 let mut clone_instances = 0usize;
96
97 for group in &report.clone_groups {
98 for instance in &group.instances {
99 files_with_clones.insert(&instance.file);
100 clone_instances += 1;
101 let lines = file_dup_lines.entry(&instance.file).or_default();
102 for line in instance.start_line..=instance.end_line {
103 lines.insert(line);
104 }
105 }
106 duplicated_tokens += group.token_count * group.instances.len();
107 }
108
109 let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
110
111 DuplicationStats {
112 total_files: report.stats.total_files,
113 files_with_clones: files_with_clones.len(),
114 total_lines: report.stats.total_lines,
115 duplicated_lines,
116 total_tokens: report.stats.total_tokens,
117 duplicated_tokens,
118 clone_groups: report.clone_groups.len(),
119 clone_instances,
120 duplication_percentage: if report.stats.total_lines > 0 {
121 (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
122 } else {
123 0.0
124 },
125 clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
126 }
127}
128
129#[must_use]
134pub fn source_token_kinds_equivalent(
135 path: &Path,
136 current: &str,
137 base: &str,
138 cross_language: bool,
139) -> bool {
140 core_backend::source_token_kinds_equivalent(path, current, base, cross_language)
141}
142
143#[must_use]
145pub fn find_duplicates(
146 root: &Path,
147 files: &[DiscoveredFile],
148 config: &DuplicatesConfig,
149) -> DuplicationReport {
150 core_backend::find_duplicates(root, files, config)
151}
152
153#[must_use]
155pub fn find_duplicates_cached(
156 root: &Path,
157 files: &[DiscoveredFile],
158 config: &DuplicatesConfig,
159 cache_dir: &Path,
160) -> DuplicationReport {
161 core_backend::find_duplicates_cached(root, files, config, cache_dir)
162}
163
164#[must_use]
166pub fn find_duplicates_with_defaults(
167 root: &Path,
168 files: &[DiscoveredFile],
169 config: &DuplicatesConfig,
170 cache_dir: Option<&Path>,
171) -> DuplicationAnalysis {
172 core_backend::find_duplicates_with_defaults(root, files, config, cache_dir)
173}
174
175#[must_use]
177pub fn find_duplicates_touching_files_with_defaults(
178 root: &Path,
179 files: &[DiscoveredFile],
180 config: &DuplicatesConfig,
181 changed_files: &[PathBuf],
182 cache_dir: Option<&Path>,
183) -> DuplicationAnalysis {
184 core_backend::find_duplicates_touching_files_with_defaults(
185 root,
186 files,
187 config,
188 changed_files,
189 cache_dir,
190 )
191}
192
193#[cfg(test)]
194mod tests {
195 use std::path::PathBuf;
196
197 use super::*;
198
199 fn instance(file: &str, start_line: usize, end_line: usize) -> CloneInstance {
200 CloneInstance {
201 file: PathBuf::from(file),
202 start_line,
203 end_line,
204 start_col: 0,
205 end_col: 0,
206 fragment: String::new(),
207 }
208 }
209
210 fn report(clone_groups: Vec<CloneGroup>) -> DuplicationReport {
211 DuplicationReport {
212 clone_groups,
213 clone_families: Vec::new(),
214 mirrored_directories: Vec::new(),
215 stats: DuplicationStats {
216 total_files: 3,
217 total_lines: 100,
218 total_tokens: 1_000,
219 clone_groups_below_min_occurrences: 4,
220 ..DuplicationStats::default()
221 },
222 }
223 }
224
225 #[test]
226 fn recompute_stats_deduplicates_overlapping_lines_per_file() {
227 let report = report(vec![
228 CloneGroup {
229 instances: vec![instance("src/a.ts", 1, 10), instance("src/b.ts", 20, 24)],
230 token_count: 30,
231 line_count: 10,
232 },
233 CloneGroup {
234 instances: vec![instance("src/a.ts", 5, 12), instance("src/c.ts", 40, 44)],
235 token_count: 20,
236 line_count: 8,
237 },
238 ]);
239
240 let stats = recompute_stats(&report);
241
242 assert_eq!(stats.total_files, 3);
243 assert_eq!(stats.files_with_clones, 3);
244 assert_eq!(stats.total_lines, 100);
245 assert_eq!(stats.duplicated_lines, 22);
246 assert_eq!(stats.total_tokens, 1_000);
247 assert_eq!(stats.duplicated_tokens, 100);
248 assert_eq!(stats.clone_groups, 2);
249 assert_eq!(stats.clone_instances, 4);
250 assert!((stats.duplication_percentage - 22.0).abs() < f64::EPSILON);
251 assert_eq!(stats.clone_groups_below_min_occurrences, 4);
252 }
253
254 #[test]
255 fn recompute_stats_handles_zero_total_lines() {
256 let mut report = report(vec![CloneGroup {
257 instances: vec![instance("src/a.ts", 1, 1)],
258 token_count: 5,
259 line_count: 1,
260 }]);
261 report.stats.total_lines = 0;
262
263 let stats = recompute_stats(&report);
264
265 assert_eq!(stats.duplicated_lines, 1);
266 assert!(stats.duplication_percentage.abs() < f64::EPSILON);
267 }
268
269 #[test]
270 fn clone_fingerprint_set_delegates_without_leaking_core_type() {
271 let groups = vec![CloneGroup {
272 instances: vec![
273 CloneInstance {
274 fragment: "const value = 1;".to_string(),
275 ..instance("src/a.ts", 1, 1)
276 },
277 CloneInstance {
278 fragment: "const value = 1;".to_string(),
279 ..instance("src/b.ts", 2, 2)
280 },
281 ],
282 token_count: 5,
283 line_count: 1,
284 }];
285 let fingerprints = CloneFingerprintSet::from_groups(&groups);
286 let fingerprint = fingerprints.fingerprint_for_group(&groups[0]);
287
288 assert!(fingerprint.starts_with(FINGERPRINT_PREFIX));
289 assert!(fingerprints.find_group(&groups, &fingerprint).is_some());
290 }
291}