use std::path::{Path, PathBuf};
use rustc_hash::{FxHashMap, FxHashSet};
use super::scoring::FileScoreOutput;
use super::{
HealthGroupResolver, SubsetFilter, VitalSignsAndCountsInput, apply_duplication_metrics,
compute_vital_signs_and_counts,
};
use crate::vital_signs;
use crate::{discover::FileId, duplicates::DuplicationReport, source::ModuleInfo};
use fallow_output::{
ComplexityViolation, FileHealthScore, HealthActionsMeta, HealthFinding, HealthGroup,
HealthGrouping, HotspotEntry, HotspotFinding, LargeFunctionEntry, RefactoringTarget,
RefactoringTargetFinding, VitalSigns, VitalSignsCounts, summarize_coverage_source_consistency,
};
struct GroupBucket {
key: String,
owners: Option<Vec<String>>,
paths: FxHashSet<PathBuf>,
}
pub(super) struct HealthGroupingInput<'a> {
pub modules: &'a [ModuleInfo],
pub file_paths: &'a FxHashMap<FileId, &'a PathBuf>,
pub score_output: Option<&'a FileScoreOutput>,
pub file_scores: &'a [FileHealthScore],
pub findings: &'a [ComplexityViolation],
pub hotspots: &'a [HotspotEntry],
pub large_functions: &'a [LargeFunctionEntry],
pub targets: &'a [RefactoringTarget],
pub score_requested: bool,
pub dupes_report: Option<&'a DuplicationReport>,
pub needs_file_scores: bool,
pub needs_hotspots: bool,
pub show_vital_signs: bool,
pub action_ctx: &'a fallow_output::HealthActionContext,
}
pub(super) fn build_health_grouping(
resolver: &dyn HealthGroupResolver,
project_root: &Path,
candidate_paths: &FxHashSet<PathBuf>,
input: &HealthGroupingInput<'_>,
) -> HealthGrouping {
let buckets = bucket_paths(resolver, project_root, candidate_paths);
let groups: Vec<HealthGroup> = buckets
.into_iter()
.map(|bucket| build_group(bucket, project_root, input))
.collect();
HealthGrouping {
mode: resolver.mode_label(),
groups,
}
}
fn bucket_paths(
resolver: &dyn HealthGroupResolver,
project_root: &Path,
candidate_paths: &FxHashSet<PathBuf>,
) -> Vec<GroupBucket> {
let mut by_key: FxHashMap<String, GroupBucket> = FxHashMap::default();
for path in candidate_paths {
let rel = path.strip_prefix(project_root).unwrap_or(path);
let (key, _rule) = resolver.resolve_with_rule(rel);
let entry = by_key.entry(key.clone()).or_insert_with(|| GroupBucket {
key: key.clone(),
owners: resolver.section_owners_of(rel).map(<[_]>::to_vec),
paths: FxHashSet::default(),
});
entry.paths.insert(path.clone());
}
let mut out: Vec<GroupBucket> = by_key.into_values().collect();
out.sort_by(|a, b| {
let unowned_a = is_unowned_label(&a.key);
let unowned_b = is_unowned_label(&b.key);
match (unowned_a, unowned_b) {
(true, false) => std::cmp::Ordering::Greater,
(false, true) => std::cmp::Ordering::Less,
_ => b.paths.len().cmp(&a.paths.len()).then(a.key.cmp(&b.key)),
}
});
out
}
fn is_unowned_label(key: &str) -> bool {
key == crate::codeowners::UNOWNED_LABEL
}
fn build_group(
bucket: GroupBucket,
project_root: &Path,
input: &HealthGroupingInput<'_>,
) -> HealthGroup {
let GroupBucket { key, owners, paths } = bucket;
let subset = SubsetFilter::Paths(&paths);
let group_findings = filter_group_items(input.findings, &paths, |finding| &finding.path);
let group_file_scores = filter_group_items(input.file_scores, &paths, |score| &score.path);
let group_hotspots = filter_group_items(input.hotspots, &paths, |hotspot| &hotspot.path);
let group_large_functions =
filter_group_items(input.large_functions, &paths, |function| &function.path);
let total_files = paths.len();
let (vital_signs, _) =
compute_group_vital_signs(input, &paths, &subset, &group_file_scores, &group_hotspots);
let health_score = input
.score_requested
.then(|| vital_signs::compute_health_score(&vital_signs, total_files));
let functions_above_threshold = group_findings.len();
let coverage_source_consistency = summarize_coverage_source_consistency(
group_findings
.iter()
.filter_map(|finding| finding.coverage_source),
);
HealthGroup {
key,
owners,
files_analyzed: total_files,
functions_above_threshold,
coverage_source_consistency,
vital_signs: input.show_vital_signs.then_some(vital_signs),
health_score,
findings: wrap_group_findings(group_findings, input),
file_scores: group_file_scores,
hotspots: wrap_group_hotspots(group_hotspots, project_root),
large_functions: group_large_functions,
targets: wrap_group_targets(input.targets, &paths),
actions_meta: group_actions_meta(input),
}
}
fn filter_group_items<T: Clone>(
items: &[T],
paths: &FxHashSet<PathBuf>,
path: impl Fn(&T) -> &PathBuf,
) -> Vec<T> {
items
.iter()
.filter(|item| paths.contains(path(item)))
.cloned()
.collect()
}
fn compute_group_vital_signs(
input: &HealthGroupingInput<'_>,
paths: &FxHashSet<PathBuf>,
subset: &SubsetFilter<'_>,
group_file_scores: &[FileHealthScore],
group_hotspots: &[HotspotEntry],
) -> (VitalSigns, VitalSignsCounts) {
let vital_signs_input = VitalSignsAndCountsInput {
score_output: input.score_output,
modules: input.modules,
file_paths: input.file_paths,
needs_file_scores: input.needs_file_scores,
file_scores_slice: group_file_scores,
needs_hotspots: input.needs_hotspots,
hotspots: group_hotspots,
total_files: paths.len(),
subset,
};
let (mut vital_signs, mut counts) = compute_vital_signs_and_counts(&vital_signs_input);
apply_group_duplication_metrics(input, paths, &mut vital_signs, &mut counts);
(vital_signs, counts)
}
fn apply_group_duplication_metrics(
input: &HealthGroupingInput<'_>,
paths: &FxHashSet<PathBuf>,
vital_signs: &mut VitalSigns,
counts: &mut VitalSignsCounts,
) {
let Some(report) = input.dupes_report else {
return;
};
let dupes_report = subset_duplication_report(report, input, paths);
apply_duplication_metrics(vital_signs, counts, &dupes_report);
}
fn subset_duplication_report(
report: &DuplicationReport,
input: &HealthGroupingInput<'_>,
paths: &FxHashSet<PathBuf>,
) -> DuplicationReport {
let clone_groups = report
.clone_groups
.iter()
.filter_map(|group| {
let instances = group
.instances
.iter()
.filter(|instance| paths.contains(&instance.file))
.cloned()
.collect::<Vec<_>>();
(instances.len() > 1).then_some(fallow_types::duplicates::CloneGroup {
instances,
token_count: group.token_count,
line_count: group.line_count,
})
})
.collect::<Vec<_>>();
DuplicationReport {
stats: subset_duplication_stats(report, input, paths, &clone_groups),
clone_groups,
clone_families: Vec::new(),
mirrored_directories: Vec::new(),
}
}
fn subset_duplication_stats(
report: &DuplicationReport,
input: &HealthGroupingInput<'_>,
paths: &FxHashSet<PathBuf>,
clone_groups: &[fallow_types::duplicates::CloneGroup],
) -> fallow_types::duplicates::DuplicationStats {
let mut files_with_clones: FxHashSet<&Path> = FxHashSet::default();
let mut file_dup_lines: rustc_hash::FxHashMap<&Path, FxHashSet<usize>> =
rustc_hash::FxHashMap::default();
let mut duplicated_tokens = 0usize;
let mut clone_instances = 0usize;
for group in clone_groups {
for instance in &group.instances {
files_with_clones.insert(&instance.file);
clone_instances += 1;
let lines = file_dup_lines.entry(&instance.file).or_default();
for line in instance.start_line..=instance.end_line {
lines.insert(line);
}
}
duplicated_tokens += group.token_count * group.instances.len().saturating_sub(1);
}
let duplicated_lines = file_dup_lines.values().map(FxHashSet::len).sum::<usize>();
let total_lines = total_lines_for_paths(input, paths);
fallow_types::duplicates::DuplicationStats {
total_files: paths.len(),
files_with_clones: files_with_clones.len(),
total_lines,
duplicated_lines,
total_tokens: report.stats.total_tokens,
duplicated_tokens: duplicated_tokens.min(report.stats.total_tokens),
clone_groups: clone_groups.len(),
clone_instances,
duplication_percentage: if total_lines > 0 {
(duplicated_lines as f64 / total_lines as f64) * 100.0
} else {
0.0
},
clone_groups_below_min_occurrences: report.stats.clone_groups_below_min_occurrences,
}
}
fn total_lines_for_paths(input: &HealthGroupingInput<'_>, paths: &FxHashSet<PathBuf>) -> usize {
input
.modules
.iter()
.filter_map(|module| {
let path = input.file_paths.get(&module.file_id)?;
paths.contains(*path).then_some(module.line_offsets.len())
})
.sum()
}
fn wrap_group_findings(
findings: Vec<ComplexityViolation>,
input: &HealthGroupingInput<'_>,
) -> Vec<HealthFinding> {
findings
.into_iter()
.map(|finding| HealthFinding::with_actions(finding, input.action_ctx))
.collect()
}
fn wrap_group_hotspots(hotspots: Vec<HotspotEntry>, project_root: &Path) -> Vec<HotspotFinding> {
hotspots
.into_iter()
.map(|hotspot| HotspotFinding::with_actions(hotspot, project_root))
.collect()
}
fn wrap_group_targets(
targets: &[RefactoringTarget],
paths: &FxHashSet<PathBuf>,
) -> Vec<RefactoringTargetFinding> {
filter_group_items(targets, paths, |target| &target.path)
.into_iter()
.map(RefactoringTargetFinding::with_actions)
.collect()
}
fn group_actions_meta(input: &HealthGroupingInput<'_>) -> Option<HealthActionsMeta> {
input
.action_ctx
.opts
.omit_suppress_line
.then(|| HealthActionsMeta {
suppression_hints_omitted: true,
reason: input
.action_ctx
.opts
.omit_reason
.unwrap_or("unspecified")
.to_string(),
scope: "health-findings".to_string(),
})
}