1use std::collections::BTreeMap;
15use std::path::Path;
16
17use fallow_core::duplicates::{
18 CloneFamily, CloneGroup, CloneInstance, DuplicationReport, DuplicationStats,
19};
20use rustc_hash::FxHashSet;
21use serde::Serialize;
22
23use super::grouping::OwnershipResolver;
24use super::relative_path;
25use crate::baseline::recompute_stats;
26use crate::codeowners::UNOWNED_LABEL;
27
28fn key_for_instance(instance: &CloneInstance, root: &Path, resolver: &OwnershipResolver) -> String {
30 resolver.resolve(relative_path(&instance.file, root))
31}
32
33pub fn largest_owner(group: &CloneGroup, root: &Path, resolver: &OwnershipResolver) -> String {
40 let mut counts: BTreeMap<String, u32> = BTreeMap::new();
41 for instance in &group.instances {
42 let key = key_for_instance(instance, root, resolver);
43 *counts.entry(key).or_insert(0) += 1;
44 }
45 if counts.is_empty() {
46 return UNOWNED_LABEL.to_string();
47 }
48 let mut best_key: Option<String> = None;
49 let mut best_count: u32 = 0;
50 for (key, count) in counts {
51 if best_key.is_none() || count > best_count {
52 best_count = count;
53 best_key = Some(key);
54 }
55 }
56 best_key.unwrap_or_else(|| UNOWNED_LABEL.to_string())
57}
58
59#[derive(Debug, Clone, Serialize)]
62pub struct AttributedInstance {
63 #[serde(flatten)]
65 pub instance: CloneInstance,
66 pub owner: String,
68}
69
70#[derive(Debug, Clone, Serialize)]
72pub struct AttributedCloneGroup {
73 pub primary_owner: String,
76 pub token_count: usize,
78 pub line_count: usize,
80 pub instances: Vec<AttributedInstance>,
82}
83
84impl AttributedCloneGroup {
85 fn from_group(group: &CloneGroup, root: &Path, resolver: &OwnershipResolver) -> Self {
86 let primary_owner = largest_owner(group, root, resolver);
87 let instances = group
88 .instances
89 .iter()
90 .map(|instance| AttributedInstance {
91 owner: key_for_instance(instance, root, resolver),
92 instance: instance.clone(),
93 })
94 .collect();
95 Self {
96 primary_owner,
97 token_count: group.token_count,
98 line_count: group.line_count,
99 instances,
100 }
101 }
102}
103
104#[derive(Debug, Clone, Serialize)]
106pub struct DuplicationGroup {
107 pub key: String,
109 pub stats: DuplicationStats,
111 pub clone_groups: Vec<AttributedCloneGroup>,
113 pub clone_families: Vec<CloneFamily>,
115}
116
117#[derive(Debug, Clone, Serialize)]
119pub struct DuplicationGrouping {
120 pub mode: &'static str,
122 pub groups: Vec<DuplicationGroup>,
125}
126
127pub fn build_duplication_grouping(
132 report: &DuplicationReport,
133 root: &Path,
134 resolver: &OwnershipResolver,
135) -> DuplicationGrouping {
136 let mut buckets: BTreeMap<String, Vec<AttributedCloneGroup>> = BTreeMap::new();
138 for group in &report.clone_groups {
139 let attributed = AttributedCloneGroup::from_group(group, root, resolver);
140 buckets
141 .entry(attributed.primary_owner.clone())
142 .or_default()
143 .push(attributed);
144 }
145
146 let mut groups: Vec<DuplicationGroup> = buckets
150 .into_iter()
151 .map(|(key, attributed_groups)| {
152 let original_groups: Vec<CloneGroup> = attributed_groups
154 .iter()
155 .map(|ag| CloneGroup {
156 instances: ag.instances.iter().map(|i| i.instance.clone()).collect(),
157 token_count: ag.token_count,
158 line_count: ag.line_count,
159 })
160 .collect();
161 let mut subset = DuplicationReport {
162 clone_groups: original_groups,
163 clone_families: Vec::new(),
164 mirrored_directories: Vec::new(),
165 stats: DuplicationStats {
166 total_files: report.stats.total_files,
167 files_with_clones: 0,
168 total_lines: report.stats.total_lines,
169 duplicated_lines: 0,
170 total_tokens: report.stats.total_tokens,
171 duplicated_tokens: 0,
172 clone_groups: 0,
173 clone_instances: 0,
174 duplication_percentage: 0.0,
175 clone_groups_below_min_occurrences: report
176 .stats
177 .clone_groups_below_min_occurrences,
178 },
179 };
180 subset.stats = recompute_stats(&subset);
181
182 let bucket_files: FxHashSet<&Path> = attributed_groups
187 .iter()
188 .flat_map(|ag| ag.instances.iter().map(|i| i.instance.file.as_path()))
189 .collect();
190 let clone_families: Vec<CloneFamily> = report
191 .clone_families
192 .iter()
193 .filter(|f| f.files.iter().any(|fp| bucket_files.contains(fp.as_path())))
194 .cloned()
195 .collect();
196
197 DuplicationGroup {
198 key,
199 stats: subset.stats,
200 clone_groups: attributed_groups,
201 clone_families,
202 }
203 })
204 .collect();
205
206 groups.sort_by(|a, b| {
208 let a_unowned = a.key == UNOWNED_LABEL;
209 let b_unowned = b.key == UNOWNED_LABEL;
210 match (a_unowned, b_unowned) {
211 (true, false) => std::cmp::Ordering::Greater,
212 (false, true) => std::cmp::Ordering::Less,
213 _ => b
214 .clone_groups
215 .len()
216 .cmp(&a.clone_groups.len())
217 .then_with(|| a.key.cmp(&b.key)),
218 }
219 });
220
221 DuplicationGrouping {
222 mode: resolver.mode_label(),
223 groups,
224 }
225}
226
227#[cfg(test)]
228mod tests {
229 use std::path::PathBuf;
230
231 use fallow_core::duplicates::{CloneInstance, DuplicationStats};
232
233 use super::*;
234 use crate::codeowners::CodeOwners;
235
236 fn instance(path: &str, start: usize, end: usize) -> CloneInstance {
237 CloneInstance {
238 file: PathBuf::from(path),
239 start_line: start,
240 end_line: end,
241 start_col: 0,
242 end_col: 0,
243 fragment: String::new(),
244 }
245 }
246
247 fn group(instances: Vec<CloneInstance>) -> CloneGroup {
248 CloneGroup {
249 instances,
250 token_count: 50,
251 line_count: 10,
252 }
253 }
254
255 fn report(groups: Vec<CloneGroup>) -> DuplicationReport {
256 DuplicationReport {
257 clone_groups: groups,
258 clone_families: vec![],
259 mirrored_directories: vec![],
260 stats: DuplicationStats {
261 total_files: 10,
262 total_lines: 1000,
263 ..Default::default()
264 },
265 }
266 }
267
268 #[test]
269 fn largest_owner_majority_wins() {
270 let r = group(vec![
271 instance("/root/src/a.ts", 1, 10),
272 instance("/root/src/b.ts", 1, 10),
273 instance("/root/lib/c.ts", 1, 10),
274 ]);
275 let key = largest_owner(&r, Path::new("/root"), &OwnershipResolver::Directory);
276 assert_eq!(key, "src", "src has 2 instances vs lib's 1");
277 }
278
279 #[test]
280 fn largest_owner_alphabetical_tiebreak() {
281 let r = group(vec![
282 instance("/root/src/a.ts", 1, 10),
283 instance("/root/lib/b.ts", 1, 10),
284 ]);
285 let key = largest_owner(&r, Path::new("/root"), &OwnershipResolver::Directory);
287 assert_eq!(key, "lib");
288 }
289
290 #[test]
291 fn largest_owner_three_way_tie_alphabetical() {
292 let r = group(vec![
293 instance("/root/zeta/a.ts", 1, 10),
294 instance("/root/alpha/b.ts", 1, 10),
295 instance("/root/beta/c.ts", 1, 10),
296 ]);
297 let key = largest_owner(&r, Path::new("/root"), &OwnershipResolver::Directory);
298 assert_eq!(key, "alpha");
299 }
300
301 #[test]
302 fn build_grouping_partitions_clone_groups() {
303 let g1 = group(vec![
304 instance("/root/src/a.ts", 1, 10),
305 instance("/root/src/b.ts", 1, 10),
306 ]);
307 let g2 = group(vec![
308 instance("/root/lib/x.ts", 1, 10),
309 instance("/root/lib/y.ts", 1, 10),
310 ]);
311 let r = report(vec![g1, g2]);
312 let grouping =
313 build_duplication_grouping(&r, Path::new("/root"), &OwnershipResolver::Directory);
314 assert_eq!(grouping.groups.len(), 2);
315 let lib = grouping.groups.iter().find(|g| g.key == "lib").unwrap();
316 let src = grouping.groups.iter().find(|g| g.key == "src").unwrap();
317 assert_eq!(lib.clone_groups.len(), 1);
318 assert_eq!(src.clone_groups.len(), 1);
319 }
320
321 #[test]
322 fn build_grouping_unowned_pinned_last() {
323 let co = CodeOwners::parse("/src/ @frontend\n").unwrap();
324 let resolver = OwnershipResolver::Owner(co);
325 let g_src = group(vec![
327 instance("/root/src/a.ts", 1, 10),
328 instance("/root/src/b.ts", 1, 10),
329 ]);
330 let g_docs = group(vec![
331 instance("/root/docs/a.md", 1, 10),
332 instance("/root/docs/b.md", 1, 10),
333 ]);
334 let r = report(vec![g_src, g_docs]);
335 let grouping = build_duplication_grouping(&r, Path::new("/root"), &resolver);
336 assert_eq!(grouping.groups.len(), 2);
337 assert_eq!(grouping.groups.last().unwrap().key, UNOWNED_LABEL);
339 }
340
341 #[test]
342 fn build_grouping_per_instance_owner_inline() {
343 let g = group(vec![
344 instance("/root/src/a.ts", 1, 10),
345 instance("/root/src/b.ts", 1, 10),
346 instance("/root/lib/c.ts", 1, 10),
347 ]);
348 let r = report(vec![g]);
349 let grouping =
350 build_duplication_grouping(&r, Path::new("/root"), &OwnershipResolver::Directory);
351 assert_eq!(grouping.groups.len(), 1);
353 let bucket = &grouping.groups[0];
354 assert_eq!(bucket.key, "src");
355 assert_eq!(bucket.clone_groups.len(), 1);
356 let cg = &bucket.clone_groups[0];
357 assert_eq!(cg.primary_owner, "src");
358 assert_eq!(cg.instances.len(), 3);
359 let owners: Vec<&str> = cg.instances.iter().map(|i| i.owner.as_str()).collect();
360 assert!(owners.contains(&"src"));
361 assert!(owners.contains(&"lib"));
362 }
363
364 #[test]
365 fn empty_report_produces_empty_grouping() {
366 let r = DuplicationReport::default();
367 let grouping =
368 build_duplication_grouping(&r, Path::new("/root"), &OwnershipResolver::Directory);
369 assert!(grouping.groups.is_empty());
370 }
371}