1use std::collections::BTreeMap;
15use std::path::Path;
16
17use fallow_core::duplicates::{
18 CloneFamily, CloneGroup, CloneInstance, DuplicationReport, DuplicationStats,
19};
20use rustc_hash::FxHashSet;
21use serde::Serialize;
22
23use super::grouping::OwnershipResolver;
24use super::relative_path;
25use crate::baseline::recompute_stats;
26use crate::codeowners::UNOWNED_LABEL;
27
28fn key_for_instance(instance: &CloneInstance, root: &Path, resolver: &OwnershipResolver) -> String {
30 resolver.resolve(relative_path(&instance.file, root))
31}
32
33pub fn largest_owner(group: &CloneGroup, root: &Path, resolver: &OwnershipResolver) -> String {
40 let mut counts: BTreeMap<String, u32> = BTreeMap::new();
41 for instance in &group.instances {
42 let key = key_for_instance(instance, root, resolver);
43 *counts.entry(key).or_insert(0) += 1;
44 }
45 if counts.is_empty() {
46 return UNOWNED_LABEL.to_string();
47 }
48 let mut best_key: Option<String> = None;
49 let mut best_count: u32 = 0;
50 for (key, count) in counts {
51 if best_key.is_none() || count > best_count {
52 best_count = count;
53 best_key = Some(key);
54 }
55 }
56 best_key.unwrap_or_else(|| UNOWNED_LABEL.to_string())
57}
58
59#[derive(Debug, Clone, Serialize)]
62pub struct AttributedInstance {
63 #[serde(flatten)]
65 pub instance: CloneInstance,
66 pub owner: String,
68}
69
70#[derive(Debug, Clone, Serialize)]
72pub struct AttributedCloneGroup {
73 pub primary_owner: String,
76 pub token_count: usize,
78 pub line_count: usize,
80 pub instances: Vec<AttributedInstance>,
82}
83
84impl AttributedCloneGroup {
85 fn from_group(group: &CloneGroup, root: &Path, resolver: &OwnershipResolver) -> Self {
86 let primary_owner = largest_owner(group, root, resolver);
87 let instances = group
88 .instances
89 .iter()
90 .map(|instance| AttributedInstance {
91 owner: key_for_instance(instance, root, resolver),
92 instance: instance.clone(),
93 })
94 .collect();
95 Self {
96 primary_owner,
97 token_count: group.token_count,
98 line_count: group.line_count,
99 instances,
100 }
101 }
102}
103
104#[derive(Debug, Clone, Serialize)]
106pub struct DuplicationGroup {
107 pub key: String,
109 pub stats: DuplicationStats,
111 pub clone_groups: Vec<AttributedCloneGroup>,
113 pub clone_families: Vec<CloneFamily>,
115}
116
117#[derive(Debug, Clone, Serialize)]
119pub struct DuplicationGrouping {
120 pub mode: &'static str,
122 pub groups: Vec<DuplicationGroup>,
125}
126
127pub fn build_duplication_grouping(
132 report: &DuplicationReport,
133 root: &Path,
134 resolver: &OwnershipResolver,
135) -> DuplicationGrouping {
136 let mut buckets: BTreeMap<String, Vec<AttributedCloneGroup>> = BTreeMap::new();
138 for group in &report.clone_groups {
139 let attributed = AttributedCloneGroup::from_group(group, root, resolver);
140 buckets
141 .entry(attributed.primary_owner.clone())
142 .or_default()
143 .push(attributed);
144 }
145
146 let mut groups: Vec<DuplicationGroup> = buckets
150 .into_iter()
151 .map(|(key, attributed_groups)| {
152 let original_groups: Vec<CloneGroup> = attributed_groups
154 .iter()
155 .map(|ag| CloneGroup {
156 instances: ag.instances.iter().map(|i| i.instance.clone()).collect(),
157 token_count: ag.token_count,
158 line_count: ag.line_count,
159 })
160 .collect();
161 let mut subset = DuplicationReport {
162 clone_groups: original_groups,
163 clone_families: Vec::new(),
164 mirrored_directories: Vec::new(),
165 stats: DuplicationStats {
166 total_files: report.stats.total_files,
167 files_with_clones: 0,
168 total_lines: report.stats.total_lines,
169 duplicated_lines: 0,
170 total_tokens: report.stats.total_tokens,
171 duplicated_tokens: 0,
172 clone_groups: 0,
173 clone_instances: 0,
174 duplication_percentage: 0.0,
175 },
176 };
177 subset.stats = recompute_stats(&subset);
178
179 let bucket_files: FxHashSet<&Path> = attributed_groups
184 .iter()
185 .flat_map(|ag| ag.instances.iter().map(|i| i.instance.file.as_path()))
186 .collect();
187 let clone_families: Vec<CloneFamily> = report
188 .clone_families
189 .iter()
190 .filter(|f| f.files.iter().any(|fp| bucket_files.contains(fp.as_path())))
191 .cloned()
192 .collect();
193
194 DuplicationGroup {
195 key,
196 stats: subset.stats,
197 clone_groups: attributed_groups,
198 clone_families,
199 }
200 })
201 .collect();
202
203 groups.sort_by(|a, b| {
205 let a_unowned = a.key == UNOWNED_LABEL;
206 let b_unowned = b.key == UNOWNED_LABEL;
207 match (a_unowned, b_unowned) {
208 (true, false) => std::cmp::Ordering::Greater,
209 (false, true) => std::cmp::Ordering::Less,
210 _ => b
211 .clone_groups
212 .len()
213 .cmp(&a.clone_groups.len())
214 .then_with(|| a.key.cmp(&b.key)),
215 }
216 });
217
218 DuplicationGrouping {
219 mode: resolver.mode_label(),
220 groups,
221 }
222}
223
224#[cfg(test)]
225mod tests {
226 use std::path::PathBuf;
227
228 use fallow_core::duplicates::{CloneInstance, DuplicationStats};
229
230 use super::*;
231 use crate::codeowners::CodeOwners;
232
233 fn instance(path: &str, start: usize, end: usize) -> CloneInstance {
234 CloneInstance {
235 file: PathBuf::from(path),
236 start_line: start,
237 end_line: end,
238 start_col: 0,
239 end_col: 0,
240 fragment: String::new(),
241 }
242 }
243
244 fn group(instances: Vec<CloneInstance>) -> CloneGroup {
245 CloneGroup {
246 instances,
247 token_count: 50,
248 line_count: 10,
249 }
250 }
251
252 fn report(groups: Vec<CloneGroup>) -> DuplicationReport {
253 DuplicationReport {
254 clone_groups: groups,
255 clone_families: vec![],
256 mirrored_directories: vec![],
257 stats: DuplicationStats {
258 total_files: 10,
259 total_lines: 1000,
260 ..Default::default()
261 },
262 }
263 }
264
265 #[test]
266 fn largest_owner_majority_wins() {
267 let r = group(vec![
268 instance("/root/src/a.ts", 1, 10),
269 instance("/root/src/b.ts", 1, 10),
270 instance("/root/lib/c.ts", 1, 10),
271 ]);
272 let key = largest_owner(&r, Path::new("/root"), &OwnershipResolver::Directory);
273 assert_eq!(key, "src", "src has 2 instances vs lib's 1");
274 }
275
276 #[test]
277 fn largest_owner_alphabetical_tiebreak() {
278 let r = group(vec![
279 instance("/root/src/a.ts", 1, 10),
280 instance("/root/lib/b.ts", 1, 10),
281 ]);
282 let key = largest_owner(&r, Path::new("/root"), &OwnershipResolver::Directory);
284 assert_eq!(key, "lib");
285 }
286
287 #[test]
288 fn largest_owner_three_way_tie_alphabetical() {
289 let r = group(vec![
290 instance("/root/zeta/a.ts", 1, 10),
291 instance("/root/alpha/b.ts", 1, 10),
292 instance("/root/beta/c.ts", 1, 10),
293 ]);
294 let key = largest_owner(&r, Path::new("/root"), &OwnershipResolver::Directory);
295 assert_eq!(key, "alpha");
296 }
297
298 #[test]
299 fn build_grouping_partitions_clone_groups() {
300 let g1 = group(vec![
301 instance("/root/src/a.ts", 1, 10),
302 instance("/root/src/b.ts", 1, 10),
303 ]);
304 let g2 = group(vec![
305 instance("/root/lib/x.ts", 1, 10),
306 instance("/root/lib/y.ts", 1, 10),
307 ]);
308 let r = report(vec![g1, g2]);
309 let grouping =
310 build_duplication_grouping(&r, Path::new("/root"), &OwnershipResolver::Directory);
311 assert_eq!(grouping.groups.len(), 2);
312 let lib = grouping.groups.iter().find(|g| g.key == "lib").unwrap();
313 let src = grouping.groups.iter().find(|g| g.key == "src").unwrap();
314 assert_eq!(lib.clone_groups.len(), 1);
315 assert_eq!(src.clone_groups.len(), 1);
316 }
317
318 #[test]
319 fn build_grouping_unowned_pinned_last() {
320 let co = CodeOwners::parse("/src/ @frontend\n").unwrap();
321 let resolver = OwnershipResolver::Owner(co);
322 let g_src = group(vec![
324 instance("/root/src/a.ts", 1, 10),
325 instance("/root/src/b.ts", 1, 10),
326 ]);
327 let g_docs = group(vec![
328 instance("/root/docs/a.md", 1, 10),
329 instance("/root/docs/b.md", 1, 10),
330 ]);
331 let r = report(vec![g_src, g_docs]);
332 let grouping = build_duplication_grouping(&r, Path::new("/root"), &resolver);
333 assert_eq!(grouping.groups.len(), 2);
334 assert_eq!(grouping.groups.last().unwrap().key, UNOWNED_LABEL);
336 }
337
338 #[test]
339 fn build_grouping_per_instance_owner_inline() {
340 let g = group(vec![
341 instance("/root/src/a.ts", 1, 10),
342 instance("/root/src/b.ts", 1, 10),
343 instance("/root/lib/c.ts", 1, 10),
344 ]);
345 let r = report(vec![g]);
346 let grouping =
347 build_duplication_grouping(&r, Path::new("/root"), &OwnershipResolver::Directory);
348 assert_eq!(grouping.groups.len(), 1);
350 let bucket = &grouping.groups[0];
351 assert_eq!(bucket.key, "src");
352 assert_eq!(bucket.clone_groups.len(), 1);
353 let cg = &bucket.clone_groups[0];
354 assert_eq!(cg.primary_owner, "src");
355 assert_eq!(cg.instances.len(), 3);
356 let owners: Vec<&str> = cg.instances.iter().map(|i| i.owner.as_str()).collect();
357 assert!(owners.contains(&"src"));
358 assert!(owners.contains(&"lib"));
359 }
360
361 #[test]
362 fn empty_report_produces_empty_grouping() {
363 let r = DuplicationReport::default();
364 let grouping =
365 build_duplication_grouping(&r, Path::new("/root"), &OwnershipResolver::Directory);
366 assert!(grouping.groups.is_empty());
367 }
368}