1use std::collections::BTreeMap;
4use std::path::Path;
5
6use fallow_engine::duplicates::{
7 CloneFingerprintSet, CloneGroup, DuplicationReport, DuplicationStats,
8};
9use fallow_types::results::AnalysisResults;
10use rustc_hash::{FxHashMap, FxHashSet};
11
12use crate::{
13 AttributedCloneGroup, AttributedCloneGroupFinding, AttributedInstance, CloneFamilyFinding,
14 DuplicationGroup, DuplicationGrouping,
15};
16
17pub const UNOWNED_GROUP_LABEL: &str = "(unowned)";
19
20pub struct ResultGroup {
22 pub key: String,
24 pub owners: Option<Vec<String>>,
29 pub results: AnalysisResults,
31}
32
33#[must_use]
38pub fn group_analysis_results_with<F, O>(
39 results: &AnalysisResults,
40 mut key_for_path: F,
41 mut owners_for_path: O,
42 include_owners: bool,
43) -> Vec<ResultGroup>
44where
45 F: FnMut(&Path) -> String,
46 O: FnMut(&Path) -> Option<Vec<String>>,
47{
48 let mut group_owners: FxHashMap<String, Vec<String>> = FxHashMap::default();
49 let mut builder = GroupingBuilder::new(|path: &Path| {
50 let key = key_for_path(path);
51 if include_owners && !group_owners.contains_key(&key) {
52 let owners = owners_for_path(path).unwrap_or_default();
53 group_owners.insert(key.clone(), owners);
54 }
55 key
56 });
57 builder.group_symbol_issues(results);
58 builder.group_dependency_issues(results);
59 builder.group_relationship_issues(results);
60 builder.group_workspace_config_issues(results);
61
62 finalize_groups(builder.into_groups(), group_owners, include_owners)
63}
64
65struct GroupingBuilder<F> {
66 groups: FxHashMap<String, AnalysisResults>,
67 key_for: F,
68}
69
70impl<F> GroupingBuilder<F>
71where
72 F: FnMut(&Path) -> String,
73{
74 fn new(key_for: F) -> Self {
75 Self {
76 groups: FxHashMap::default(),
77 key_for,
78 }
79 }
80
81 fn entry_for_path(&mut self, path: &Path) -> &mut AnalysisResults {
82 let key = (self.key_for)(path);
83 self.groups.entry(key).or_default()
84 }
85
86 fn entry_for_key(&mut self, key: String) -> &mut AnalysisResults {
87 self.groups.entry(key).or_default()
88 }
89
90 fn into_groups(self) -> FxHashMap<String, AnalysisResults> {
91 self.groups
92 }
93
94 fn group_symbol_issues(&mut self, results: &AnalysisResults) {
95 for item in &results.unused_files {
96 self.entry_for_path(&item.file.path)
97 .unused_files
98 .push(item.clone());
99 }
100 for item in &results.unused_exports {
101 self.entry_for_path(&item.export.path)
102 .unused_exports
103 .push(item.clone());
104 }
105 for item in &results.unused_types {
106 self.entry_for_path(&item.export.path)
107 .unused_types
108 .push(item.clone());
109 }
110 for item in &results.private_type_leaks {
111 self.entry_for_path(&item.leak.path)
112 .private_type_leaks
113 .push(item.clone());
114 }
115 for item in &results.unused_enum_members {
116 self.entry_for_path(&item.member.path)
117 .unused_enum_members
118 .push(item.clone());
119 }
120 for item in &results.unused_class_members {
121 self.entry_for_path(&item.member.path)
122 .unused_class_members
123 .push(item.clone());
124 }
125 for item in &results.unused_store_members {
126 self.entry_for_path(&item.member.path)
127 .unused_store_members
128 .push(item.clone());
129 }
130 for item in &results.unresolved_imports {
131 self.entry_for_path(&item.import.path)
132 .unresolved_imports
133 .push(item.clone());
134 }
135 }
136
137 fn group_dependency_issues(&mut self, results: &AnalysisResults) {
138 for item in &results.unused_dependencies {
139 self.entry_for_path(&item.dep.path)
140 .unused_dependencies
141 .push(item.clone());
142 }
143 for item in &results.unused_dev_dependencies {
144 self.entry_for_path(&item.dep.path)
145 .unused_dev_dependencies
146 .push(item.clone());
147 }
148 for item in &results.unused_optional_dependencies {
149 self.entry_for_path(&item.dep.path)
150 .unused_optional_dependencies
151 .push(item.clone());
152 }
153 for item in &results.type_only_dependencies {
154 self.entry_for_path(&item.dep.path)
155 .type_only_dependencies
156 .push(item.clone());
157 }
158 for item in &results.test_only_dependencies {
159 self.entry_for_path(&item.dep.path)
160 .test_only_dependencies
161 .push(item.clone());
162 }
163
164 for item in &results.unlisted_dependencies {
165 let key = item.dep.imported_from.first().map_or_else(
166 || UNOWNED_GROUP_LABEL.to_string(),
167 |site| (self.key_for)(&site.path),
168 );
169 self.entry_for_key(key)
170 .unlisted_dependencies
171 .push(item.clone());
172 }
173 for item in &results.duplicate_exports {
174 let key = item.export.locations.first().map_or_else(
175 || UNOWNED_GROUP_LABEL.to_string(),
176 |loc| (self.key_for)(&loc.path),
177 );
178 self.entry_for_key(key).duplicate_exports.push(item.clone());
179 }
180 }
181
182 fn group_relationship_issues(&mut self, results: &AnalysisResults) {
183 self.group_structure_issues(results);
184 self.group_framework_boundary_issues(results);
185 self.group_component_contract_issues(results);
186 }
187
188 fn group_structure_issues(&mut self, results: &AnalysisResults) {
189 for item in &results.circular_dependencies {
190 let key = item
191 .cycle
192 .files
193 .first()
194 .map_or_else(|| UNOWNED_GROUP_LABEL.to_string(), |f| (self.key_for)(f));
195 self.entry_for_key(key)
196 .circular_dependencies
197 .push(item.clone());
198 }
199 for item in &results.boundary_violations {
200 self.entry_for_path(&item.violation.from_path)
201 .boundary_violations
202 .push(item.clone());
203 }
204 for item in &results.boundary_coverage_violations {
205 self.entry_for_path(&item.violation.path)
206 .boundary_coverage_violations
207 .push(item.clone());
208 }
209 for item in &results.boundary_call_violations {
210 self.entry_for_path(&item.violation.path)
211 .boundary_call_violations
212 .push(item.clone());
213 }
214 for item in &results.policy_violations {
215 self.entry_for_path(&item.violation.path)
216 .policy_violations
217 .push(item.clone());
218 }
219 }
220
221 fn group_framework_boundary_issues(&mut self, results: &AnalysisResults) {
222 for item in &results.invalid_client_exports {
223 self.entry_for_path(&item.export.path)
224 .invalid_client_exports
225 .push(item.clone());
226 }
227 for item in &results.mixed_client_server_barrels {
228 self.entry_for_path(&item.barrel.path)
229 .mixed_client_server_barrels
230 .push(item.clone());
231 }
232 for item in &results.misplaced_directives {
233 self.entry_for_path(&item.directive_site.path)
234 .misplaced_directives
235 .push(item.clone());
236 }
237 for item in &results.unprovided_injects {
238 self.entry_for_path(&item.inject.path)
239 .unprovided_injects
240 .push(item.clone());
241 }
242 for item in &results.unrendered_components {
243 self.entry_for_path(&item.component.path)
244 .unrendered_components
245 .push(item.clone());
246 }
247 }
248
249 fn group_component_contract_issues(&mut self, results: &AnalysisResults) {
250 for item in &results.unused_component_props {
251 self.entry_for_path(&item.prop.path)
252 .unused_component_props
253 .push(item.clone());
254 }
255 for item in &results.unused_component_emits {
256 self.entry_for_path(&item.emit.path)
257 .unused_component_emits
258 .push(item.clone());
259 }
260 for item in &results.unused_component_inputs {
261 self.entry_for_path(&item.input.path)
262 .unused_component_inputs
263 .push(item.clone());
264 }
265 for item in &results.unused_component_outputs {
266 self.entry_for_path(&item.output.path)
267 .unused_component_outputs
268 .push(item.clone());
269 }
270 for item in &results.unused_server_actions {
271 self.entry_for_path(&item.action.path)
272 .unused_server_actions
273 .push(item.clone());
274 }
275 for item in &results.unused_load_data_keys {
276 self.entry_for_path(&item.key.path)
277 .unused_load_data_keys
278 .push(item.clone());
279 }
280 for item in &results.stale_suppressions {
281 self.entry_for_path(&item.path)
282 .stale_suppressions
283 .push(item.clone());
284 }
285 }
286
287 fn group_workspace_config_issues(&mut self, results: &AnalysisResults) {
288 for item in &results.unused_catalog_entries {
289 self.entry_for_path(&item.entry.path)
290 .unused_catalog_entries
291 .push(item.clone());
292 }
293 for item in &results.empty_catalog_groups {
294 self.entry_for_path(&item.group.path)
295 .empty_catalog_groups
296 .push(item.clone());
297 }
298 for item in &results.unresolved_catalog_references {
299 self.entry_for_path(&item.reference.path)
300 .unresolved_catalog_references
301 .push(item.clone());
302 }
303 for item in &results.unused_dependency_overrides {
304 self.entry_for_path(&item.entry.path)
305 .unused_dependency_overrides
306 .push(item.clone());
307 }
308 for item in &results.misconfigured_dependency_overrides {
309 self.entry_for_path(&item.entry.path)
310 .misconfigured_dependency_overrides
311 .push(item.clone());
312 }
313 }
314}
315
316fn finalize_groups(
317 groups: FxHashMap<String, AnalysisResults>,
318 mut group_owners: FxHashMap<String, Vec<String>>,
319 include_owners: bool,
320) -> Vec<ResultGroup> {
321 let mut sorted: Vec<_> = groups
322 .into_iter()
323 .map(|(key, results)| {
324 let owners = if include_owners {
325 Some(group_owners.remove(&key).unwrap_or_default())
326 } else {
327 None
328 };
329 ResultGroup {
330 key,
331 owners,
332 results,
333 }
334 })
335 .collect();
336 sorted.sort_by(|a, b| {
337 let a_unowned = a.key == UNOWNED_GROUP_LABEL;
338 let b_unowned = b.key == UNOWNED_GROUP_LABEL;
339 match (a_unowned, b_unowned) {
340 (true, false) => std::cmp::Ordering::Greater,
341 (false, true) => std::cmp::Ordering::Less,
342 _ => b
343 .results
344 .total_issues()
345 .cmp(&a.results.total_issues())
346 .then_with(|| a.key.cmp(&b.key)),
347 }
348 });
349 sorted
350}
351
352#[must_use]
354pub fn largest_clone_group_owner_with<F>(group: &CloneGroup, mut key_for_path: F) -> String
355where
356 F: FnMut(&Path) -> String,
357{
358 let mut counts: BTreeMap<String, u32> = BTreeMap::new();
359 for instance in &group.instances {
360 let key = key_for_path(&instance.file);
361 *counts.entry(key).or_insert(0) += 1;
362 }
363 if counts.is_empty() {
364 return UNOWNED_GROUP_LABEL.to_string();
365 }
366 let mut best_key: Option<String> = None;
367 let mut best_count: u32 = 0;
368 for (key, count) in counts {
369 if best_key.is_none() || count > best_count {
370 best_count = count;
371 best_key = Some(key);
372 }
373 }
374 best_key.unwrap_or_else(|| UNOWNED_GROUP_LABEL.to_string())
375}
376
377#[must_use]
379pub fn build_duplication_grouping_with<F>(
380 report: &DuplicationReport,
381 mode: &'static str,
382 mut key_for_path: F,
383) -> DuplicationGrouping
384where
385 F: FnMut(&Path) -> String,
386{
387 let fingerprints = CloneFingerprintSet::from_groups(&report.clone_groups);
388 let buckets = build_attributed_clone_buckets(report, &mut key_for_path);
389 let mut groups: Vec<DuplicationGroup> = buckets
390 .into_iter()
391 .map(|(key, groups)| duplication_group(key, groups, report, &fingerprints))
392 .collect();
393 sort_duplication_groups(&mut groups);
394
395 DuplicationGrouping { mode, groups }
396}
397
398fn build_attributed_clone_buckets<F>(
399 report: &DuplicationReport,
400 key_for_path: &mut F,
401) -> BTreeMap<String, Vec<AttributedCloneGroup>>
402where
403 F: FnMut(&Path) -> String,
404{
405 let mut buckets: BTreeMap<String, Vec<AttributedCloneGroup>> = BTreeMap::new();
406 for group in &report.clone_groups {
407 let attributed = attributed_clone_group(group, key_for_path);
408 buckets
409 .entry(attributed.primary_owner.clone())
410 .or_default()
411 .push(attributed);
412 }
413 buckets
414}
415
416fn attributed_clone_group<F>(group: &CloneGroup, key_for_path: &mut F) -> AttributedCloneGroup
417where
418 F: FnMut(&Path) -> String,
419{
420 let primary_owner = largest_clone_group_owner_with(group, &mut *key_for_path);
421 let instances = group
422 .instances
423 .iter()
424 .map(|instance| AttributedInstance {
425 owner: key_for_path(&instance.file),
426 instance: instance.clone(),
427 })
428 .collect();
429 AttributedCloneGroup {
430 primary_owner,
431 token_count: group.token_count,
432 line_count: group.line_count,
433 instances,
434 }
435}
436
437fn duplication_group(
438 key: String,
439 attributed_groups: Vec<AttributedCloneGroup>,
440 report: &DuplicationReport,
441 fingerprints: &CloneFingerprintSet,
442) -> DuplicationGroup {
443 let mut subset = duplication_subset_report(&attributed_groups, report);
444 subset.stats = recompute_duplication_stats(&subset);
445 let clone_families = clone_families_for_bucket(&attributed_groups, report, fingerprints);
446 let clone_groups = attributed_groups
447 .into_iter()
448 .map(|group| {
449 let fingerprint = group.fingerprint(fingerprints);
450 AttributedCloneGroupFinding::with_fingerprint(group, fingerprint)
451 })
452 .collect();
453
454 DuplicationGroup {
455 key,
456 stats: subset.stats,
457 clone_groups,
458 clone_families,
459 }
460}
461
462fn duplication_subset_report(
463 attributed_groups: &[AttributedCloneGroup],
464 report: &DuplicationReport,
465) -> DuplicationReport {
466 DuplicationReport {
467 clone_groups: attributed_groups
468 .iter()
469 .map(|group| CloneGroup {
470 instances: group
471 .instances
472 .iter()
473 .map(|instance| instance.instance.clone())
474 .collect(),
475 token_count: group.token_count,
476 line_count: group.line_count,
477 })
478 .collect(),
479 clone_families: Vec::new(),
480 mirrored_directories: Vec::new(),
481 stats: DuplicationStats {
482 total_files: report.stats.total_files,
483 files_with_clones: 0,
484 total_lines: report.stats.total_lines,
485 duplicated_lines: 0,
486 total_tokens: report.stats.total_tokens,
487 duplicated_tokens: 0,
488 clone_groups: 0,
489 clone_instances: 0,
490 duplication_percentage: 0.0,
491 clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
492 },
493 }
494}
495
496fn clone_families_for_bucket(
497 attributed_groups: &[AttributedCloneGroup],
498 report: &DuplicationReport,
499 fingerprints: &CloneFingerprintSet,
500) -> Vec<CloneFamilyFinding> {
501 let bucket_files: FxHashSet<&Path> = attributed_groups
502 .iter()
503 .flat_map(|group| group.instances.iter().map(|i| i.instance.file.as_path()))
504 .collect();
505
506 report
507 .clone_families
508 .iter()
509 .filter(|family| {
510 family
511 .files
512 .iter()
513 .any(|path| bucket_files.contains(path.as_path()))
514 })
515 .map(|family| CloneFamilyFinding::with_fingerprints(family.clone(), fingerprints))
516 .collect()
517}
518
519fn recompute_duplication_stats(report: &DuplicationReport) -> DuplicationStats {
520 let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
521 let mut file_dup_lines: FxHashMap<&Path, FxHashSet<usize>> = FxHashMap::default();
522 let mut duplicated_tokens = 0usize;
523 let mut clone_instances = 0usize;
524
525 for group in &report.clone_groups {
526 for instance in &group.instances {
527 files_with_clones.insert(&instance.file);
528 clone_instances += 1;
529 let lines = file_dup_lines.entry(&instance.file).or_default();
530 for line in instance.start_line..=instance.end_line {
531 lines.insert(line);
532 }
533 }
534 duplicated_tokens += group.token_count * group.instances.len();
535 }
536
537 let duplicated_lines: usize = file_dup_lines.values().map(FxHashSet::len).sum();
538
539 DuplicationStats {
540 total_files: report.stats.total_files,
541 files_with_clones: files_with_clones.len(),
542 total_lines: report.stats.total_lines,
543 duplicated_lines,
544 total_tokens: report.stats.total_tokens,
545 duplicated_tokens,
546 clone_groups: report.clone_groups.len(),
547 clone_instances,
548 duplication_percentage: if report.stats.total_lines > 0 {
549 (duplicated_lines as f64 / report.stats.total_lines as f64) * 100.0
550 } else {
551 0.0
552 },
553 clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
554 }
555}
556
557fn sort_duplication_groups(groups: &mut [DuplicationGroup]) {
558 groups.sort_by(|a, b| {
559 let a_unowned = a.key == UNOWNED_GROUP_LABEL;
560 let b_unowned = b.key == UNOWNED_GROUP_LABEL;
561 match (a_unowned, b_unowned) {
562 (true, false) => std::cmp::Ordering::Greater,
563 (false, true) => std::cmp::Ordering::Less,
564 _ => b
565 .clone_groups
566 .len()
567 .cmp(&a.clone_groups.len())
568 .then_with(|| a.key.cmp(&b.key)),
569 }
570 });
571}