1use std::path::{Path, PathBuf};
4
5use fallow_config::DuplicatesConfig;
6use fallow_types::discover::DiscoveredFile;
7use rustc_hash::{FxHashMap, FxHashSet};
8
9use crate::DuplicationAnalysis;
10
11pub const FINGERPRINT_PREFIX: &str = fallow_core::duplicates::FINGERPRINT_PREFIX;
12
13pub type CloneGroup = fallow_types::duplicates::CloneGroup;
14pub type CloneInstance = fallow_types::duplicates::CloneInstance;
15pub type DefaultIgnoreSkips = fallow_types::duplicates::DefaultIgnoreSkips;
16pub type DuplicationReport = fallow_types::duplicates::DuplicationReport;
17pub type DuplicationStats = fallow_types::duplicates::DuplicationStats;
18
19#[derive(Debug, Clone)]
21pub struct CloneFingerprintSet {
22 inner: fallow_core::duplicates::CloneFingerprintSet,
23}
24
25impl CloneFingerprintSet {
26 #[must_use]
28 pub fn from_groups(groups: &[CloneGroup]) -> Self {
29 Self {
30 inner: fallow_core::duplicates::CloneFingerprintSet::from_groups(groups),
31 }
32 }
33
34 #[must_use]
36 pub fn fingerprint_for_group(&self, group: &CloneGroup) -> String {
37 self.inner.fingerprint_for_group(group)
38 }
39
40 #[must_use]
42 pub fn fingerprint_for_parts(
43 &self,
44 instances: &[CloneInstance],
45 token_count: usize,
46 line_count: usize,
47 ) -> String {
48 self.inner
49 .fingerprint_for_parts(instances, token_count, line_count)
50 }
51
52 #[must_use]
54 pub fn find_group<'a>(
55 &self,
56 groups: &'a [CloneGroup],
57 fingerprint: &str,
58 ) -> Option<&'a CloneGroup> {
59 self.inner.find_group(groups, fingerprint)
60 }
61}
62
63#[must_use]
65pub fn clone_fingerprint(instances: &[CloneInstance]) -> String {
66 fallow_core::duplicates::clone_fingerprint(instances)
67}
68
69#[must_use]
71pub fn fingerprint_for_fragment(fragment: &str) -> String {
72 fallow_core::duplicates::fingerprint_for_fragment(fragment)
73}
74
75#[must_use]
77pub fn dominant_identifier(group: &CloneGroup) -> Option<String> {
78 fallow_core::duplicates::dominant_identifier(group)
79}
80
81pub fn refresh_clone_families(report: &mut DuplicationReport, root: &Path) {
83 report.clone_families =
84 fallow_core::duplicates::families::group_into_families(&report.clone_groups, root);
85 report.mirrored_directories = fallow_core::duplicates::families::detect_mirrored_directories(
86 &report.clone_families,
87 root,
88 );
89}
90
91#[must_use]
96pub fn recompute_stats(report: &DuplicationReport) -> DuplicationStats {
97 let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
98 let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
99 let mut duplicated_tokens = 0usize;
100 let mut clone_instances = 0usize;
101
102 for group in &report.clone_groups {
103 for instance in &group.instances {
104 files_with_clones.insert(&instance.file);
105 clone_instances += 1;
106 let lines = file_dup_lines.entry(&instance.file).or_default();
107 for line in instance.start_line..=instance.end_line {
108 lines.insert(line);
109 }
110 }
111 duplicated_tokens += group.token_count * group.instances.len();
112 }
113
114 let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
115
116 DuplicationStats {
117 total_files: report.stats.total_files,
118 files_with_clones: files_with_clones.len(),
119 total_lines: report.stats.total_lines,
120 duplicated_lines,
121 total_tokens: report.stats.total_tokens,
122 duplicated_tokens,
123 clone_groups: report.clone_groups.len(),
124 clone_instances,
125 duplication_percentage: if report.stats.total_lines > 0 {
126 (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
127 } else {
128 0.0
129 },
130 clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
131 }
132}
133
134#[must_use]
139pub fn source_token_kinds_equivalent(
140 path: &Path,
141 current: &str,
142 base: &str,
143 cross_language: bool,
144) -> bool {
145 let current_tokens =
146 fallow_core::duplicates::tokenize::tokenize_file(path, current, cross_language);
147 let base_tokens = fallow_core::duplicates::tokenize::tokenize_file(path, base, cross_language);
148 current_tokens
149 .tokens
150 .iter()
151 .map(|token| &token.kind)
152 .eq(base_tokens.tokens.iter().map(|token| &token.kind))
153}
154
155#[must_use]
157pub fn find_duplicates(
158 root: &Path,
159 files: &[DiscoveredFile],
160 config: &DuplicatesConfig,
161) -> DuplicationReport {
162 fallow_core::duplicates::find_duplicates(root, files, config)
163}
164
165#[must_use]
167pub fn find_duplicates_cached(
168 root: &Path,
169 files: &[DiscoveredFile],
170 config: &DuplicatesConfig,
171 cache_dir: &Path,
172) -> DuplicationReport {
173 fallow_core::duplicates::find_duplicates_cached(root, files, config, cache_dir)
174}
175
176#[must_use]
178pub fn find_duplicates_with_defaults(
179 root: &Path,
180 files: &[DiscoveredFile],
181 config: &DuplicatesConfig,
182 cache_dir: Option<&Path>,
183) -> DuplicationAnalysis {
184 let (report, default_ignore_skips) = if let Some(cache_dir) = cache_dir {
185 fallow_core::duplicates::find_duplicates_cached_with_default_ignore_skips(
186 root, files, config, cache_dir,
187 )
188 } else {
189 fallow_core::duplicates::find_duplicates_with_default_ignore_skips(root, files, config)
190 };
191 DuplicationAnalysis {
192 report,
193 default_ignore_skips,
194 }
195}
196
197#[must_use]
199pub fn find_duplicates_touching_files_with_defaults(
200 root: &Path,
201 files: &[DiscoveredFile],
202 config: &DuplicatesConfig,
203 changed_files: &[PathBuf],
204 cache_dir: Option<&Path>,
205) -> DuplicationAnalysis {
206 let changed_files = changed_files.iter().cloned().collect::<FxHashSet<_>>();
207 let (report, default_ignore_skips) = if let Some(cache_dir) = cache_dir {
208 fallow_core::duplicates::find_duplicates_touching_files_cached_with_default_ignore_skips(
209 root,
210 files,
211 config,
212 &changed_files,
213 cache_dir,
214 )
215 } else {
216 fallow_core::duplicates::find_duplicates_touching_files_with_default_ignore_skips(
217 root,
218 files,
219 config,
220 &changed_files,
221 )
222 };
223 DuplicationAnalysis {
224 report,
225 default_ignore_skips,
226 }
227}
228
229#[cfg(test)]
230mod tests {
231 use std::path::PathBuf;
232
233 use super::*;
234
235 fn instance(file: &str, start_line: usize, end_line: usize) -> CloneInstance {
236 CloneInstance {
237 file: PathBuf::from(file),
238 start_line,
239 end_line,
240 start_col: 0,
241 end_col: 0,
242 fragment: String::new(),
243 }
244 }
245
246 fn report(clone_groups: Vec<CloneGroup>) -> DuplicationReport {
247 DuplicationReport {
248 clone_groups,
249 clone_families: Vec::new(),
250 mirrored_directories: Vec::new(),
251 stats: DuplicationStats {
252 total_files: 3,
253 total_lines: 100,
254 total_tokens: 1_000,
255 clone_groups_below_min_occurrences: 4,
256 ..DuplicationStats::default()
257 },
258 }
259 }
260
261 #[test]
262 fn recompute_stats_deduplicates_overlapping_lines_per_file() {
263 let report = report(vec![
264 CloneGroup {
265 instances: vec![instance("src/a.ts", 1, 10), instance("src/b.ts", 20, 24)],
266 token_count: 30,
267 line_count: 10,
268 },
269 CloneGroup {
270 instances: vec![instance("src/a.ts", 5, 12), instance("src/c.ts", 40, 44)],
271 token_count: 20,
272 line_count: 8,
273 },
274 ]);
275
276 let stats = recompute_stats(&report);
277
278 assert_eq!(stats.total_files, 3);
279 assert_eq!(stats.files_with_clones, 3);
280 assert_eq!(stats.total_lines, 100);
281 assert_eq!(stats.duplicated_lines, 22);
282 assert_eq!(stats.total_tokens, 1_000);
283 assert_eq!(stats.duplicated_tokens, 100);
284 assert_eq!(stats.clone_groups, 2);
285 assert_eq!(stats.clone_instances, 4);
286 assert!((stats.duplication_percentage - 22.0).abs() < f64::EPSILON);
287 assert_eq!(stats.clone_groups_below_min_occurrences, 4);
288 }
289
290 #[test]
291 fn recompute_stats_handles_zero_total_lines() {
292 let mut report = report(vec![CloneGroup {
293 instances: vec![instance("src/a.ts", 1, 1)],
294 token_count: 5,
295 line_count: 1,
296 }]);
297 report.stats.total_lines = 0;
298
299 let stats = recompute_stats(&report);
300
301 assert_eq!(stats.duplicated_lines, 1);
302 assert!(stats.duplication_percentage.abs() < f64::EPSILON);
303 }
304
305 #[test]
306 fn clone_fingerprint_set_delegates_without_leaking_core_type() {
307 let groups = vec![CloneGroup {
308 instances: vec![
309 CloneInstance {
310 fragment: "const value = 1;".to_string(),
311 ..instance("src/a.ts", 1, 1)
312 },
313 CloneInstance {
314 fragment: "const value = 1;".to_string(),
315 ..instance("src/b.ts", 2, 2)
316 },
317 ],
318 token_count: 5,
319 line_count: 1,
320 }];
321 let fingerprints = CloneFingerprintSet::from_groups(&groups);
322 let fingerprint = fingerprints.fingerprint_for_group(&groups[0]);
323
324 assert!(fingerprint.starts_with(FINGERPRINT_PREFIX));
325 assert!(fingerprints.find_group(&groups, &fingerprint).is_some());
326 }
327}