use std::path::{Path, PathBuf};
use fallow_engine::duplicates::{
CloneFingerprintSet, clone_fingerprint, dominant_identifier, fingerprint_for_fragment,
};
use fallow_output::{
CloneFamilyAction, CloneGroupAction, CodeClimateIssue, CodeClimateIssueInput,
CodeClimateSeverity, clone_family_actions, clone_group_actions, codeclimate_fingerprint_hash,
normalize_uri,
};
use fallow_types::duplicates::{
CloneFamily, CloneGroup, CloneInstance, DuplicationReport, DuplicationStats, MirroredDirectory,
RefactoringSuggestion,
};
use fallow_types::envelope::AuditIntroduced;
use fallow_types::serde_path;
use serde::Serialize;
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct AttributedInstance {
#[serde(flatten)]
pub instance: CloneInstance,
pub owner: String,
}
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct AttributedCloneGroup {
pub primary_owner: String,
pub token_count: usize,
pub line_count: usize,
pub instances: Vec<AttributedInstance>,
}
impl AttributedCloneGroup {
#[must_use]
pub fn fingerprint(&self, fingerprints: &CloneFingerprintSet) -> String {
let instances: Vec<_> = self
.instances
.iter()
.map(|instance| instance.instance.clone())
.collect();
fingerprints.fingerprint_for_parts(&instances, self.token_count, self.line_count)
}
}
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct AttributedCloneGroupFinding {
#[serde(flatten)]
pub group: AttributedCloneGroup,
pub fingerprint: String,
pub actions: Vec<CloneGroupAction>,
}
impl AttributedCloneGroupFinding {
#[allow(
dead_code,
reason = "kept for focused wrapper tests and non-report construction paths"
)]
#[must_use]
pub fn with_actions(group: AttributedCloneGroup) -> Self {
let fingerprint = group.instances.first().map_or_else(
|| fingerprint_for_fragment(""),
|ai| fingerprint_for_fragment(&ai.instance.fragment),
);
Self::with_fingerprint(group, fingerprint)
}
#[must_use]
pub fn with_fingerprint(group: AttributedCloneGroup, fingerprint: String) -> Self {
let actions = clone_group_actions(group.line_count, group.instances.len());
Self {
group,
fingerprint,
actions,
}
}
}
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct DuplicationGroup {
pub key: String,
pub stats: DuplicationStats,
pub clone_groups: Vec<AttributedCloneGroupFinding>,
pub clone_families: Vec<CloneFamilyFinding>,
}
#[derive(Debug, Clone, Serialize)]
pub struct DuplicationGrouping {
pub mode: &'static str,
pub groups: Vec<DuplicationGroup>,
}
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct CloneGroupFinding {
#[serde(flatten)]
pub group: CloneGroup,
pub fingerprint: String,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub suggested_name: Option<String>,
pub actions: Vec<CloneGroupAction>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub introduced: Option<AuditIntroduced>,
}
impl CloneGroupFinding {
#[allow(
dead_code,
reason = "kept for focused wrapper tests and non-report construction paths"
)]
#[must_use]
pub fn with_actions(group: CloneGroup) -> Self {
let fingerprint = clone_fingerprint(&group.instances);
Self::with_fingerprint(group, fingerprint)
}
#[must_use]
pub fn with_fingerprint(group: CloneGroup, fingerprint: String) -> Self {
let suggested_name = dominant_identifier(&group);
let actions = clone_group_actions(group.line_count, group.instances.len());
Self {
fingerprint,
suggested_name,
group,
actions,
introduced: None,
}
}
}
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct CloneFamilyFinding {
#[serde(serialize_with = "serde_path::serialize_vec")]
pub files: Vec<PathBuf>,
pub groups: Vec<CloneGroupFinding>,
pub total_duplicated_lines: usize,
pub total_duplicated_tokens: usize,
pub suggestions: Vec<RefactoringSuggestion>,
pub actions: Vec<CloneFamilyAction>,
}
impl CloneFamilyFinding {
#[allow(
dead_code,
reason = "kept for focused wrapper tests and non-report construction paths"
)]
#[must_use]
pub fn with_actions(family: CloneFamily) -> Self {
let fingerprints = CloneFingerprintSet::from_groups(&family.groups);
Self::with_fingerprints(family, &fingerprints)
}
#[must_use]
pub fn with_fingerprints(family: CloneFamily, fingerprints: &CloneFingerprintSet) -> Self {
let actions = build_clone_family_actions(
&family.groups,
family.total_duplicated_lines,
&family.suggestions,
);
Self {
files: family.files,
groups: family
.groups
.into_iter()
.map(|group| {
let fingerprint = fingerprints.fingerprint_for_group(&group);
CloneGroupFinding::with_fingerprint(group, fingerprint)
})
.collect(),
total_duplicated_lines: family.total_duplicated_lines,
total_duplicated_tokens: family.total_duplicated_tokens,
suggestions: family.suggestions,
actions,
}
}
}
fn build_clone_family_actions(
groups: &[CloneGroup],
total_duplicated_lines: usize,
suggestions: &[RefactoringSuggestion],
) -> Vec<CloneFamilyAction> {
clone_family_actions(
groups.len(),
total_duplicated_lines,
suggestions
.iter()
.map(|suggestion| suggestion.description.as_str()),
)
}
#[derive(Debug, Clone, Serialize)]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct DupesReportPayload {
pub clone_groups: Vec<CloneGroupFinding>,
pub clone_families: Vec<CloneFamilyFinding>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub mirrored_directories: Vec<MirroredDirectory>,
pub stats: DuplicationStats,
}
impl DupesReportPayload {
#[must_use]
pub fn from_report(report: &DuplicationReport) -> Self {
let fingerprints = CloneFingerprintSet::from_groups(&report.clone_groups);
Self {
clone_groups: report
.clone_groups
.iter()
.map(|group| {
CloneGroupFinding::with_fingerprint(
group.clone(),
fingerprints.fingerprint_for_group(group),
)
})
.collect(),
clone_families: report
.clone_families
.iter()
.map(|family| CloneFamilyFinding::with_fingerprints(family.clone(), &fingerprints))
.collect(),
mirrored_directories: report.mirrored_directories.clone(),
stats: report.stats.clone(),
}
}
}
#[must_use]
#[expect(
clippy::cast_possible_truncation,
reason = "line numbers are bounded by source size"
)]
pub fn build_duplication_codeclimate(
report: &DuplicationReport,
root: &Path,
) -> Vec<CodeClimateIssue> {
let mut issues = Vec::new();
for (i, group) in report.clone_groups.iter().enumerate() {
let token_str = group.token_count.to_string();
let line_count_str = group.line_count.to_string();
let fragment_prefix: String = group
.instances
.first()
.map(|inst| inst.fragment.chars().take(64).collect())
.unwrap_or_default();
for instance in &group.instances {
let path = codeclimate_path(&instance.file, root);
let start_str = instance.start_line.to_string();
let fp = codeclimate_fingerprint_hash(&[
"fallow/code-duplication",
&path,
&start_str,
&token_str,
&line_count_str,
&fragment_prefix,
]);
issues.push(fallow_output::build_codeclimate_issue(
CodeClimateIssueInput {
check_name: "fallow/code-duplication",
description: &format!(
"Code clone group {} ({} lines, {} instances)",
i + 1,
group.line_count,
group.instances.len()
),
severity: CodeClimateSeverity::Minor,
category: "Duplication",
path: &path,
begin_line: Some(instance.start_line as u32),
fingerprint: &fp,
},
));
}
}
issues
}
fn codeclimate_path(path: &Path, root: &Path) -> String {
normalize_uri(
&path
.strip_prefix(root)
.unwrap_or(path)
.display()
.to_string(),
)
}
#[cfg(test)]
mod tests {
use std::path::Path;
use fallow_output::{CloneFamilyActionType, CloneGroupActionType};
use fallow_types::duplicates::{
CloneInstance, DuplicationStats, RefactoringKind, RefactoringSuggestion,
};
use super::*;
fn instance(path: &str) -> CloneInstance {
CloneInstance {
file: PathBuf::from(path),
start_line: 1,
end_line: 10,
start_col: 0,
end_col: 0,
fragment: String::new(),
}
}
fn group(instances: usize) -> CloneGroup {
CloneGroup {
instances: (0..instances)
.map(|i| instance(&format!("/root/file_{i}.ts")))
.collect(),
token_count: 100,
line_count: 20,
}
}
#[test]
fn clone_group_finding_position_0_is_extract_shared() {
let finding = CloneGroupFinding::with_actions(group(2));
assert_eq!(finding.actions.len(), 2);
assert_eq!(finding.actions[0].kind, CloneGroupActionType::ExtractShared);
assert_eq!(finding.actions[1].kind, CloneGroupActionType::SuppressLine);
assert!(finding.introduced.is_none());
}
#[test]
fn attributed_clone_group_finding_actions_match_clone_group_shape() {
let attributed = AttributedCloneGroup {
primary_owner: "src".to_string(),
token_count: 100,
line_count: 20,
instances: vec![
AttributedInstance {
instance: instance("/root/src/a.ts"),
owner: "src".to_string(),
},
AttributedInstance {
instance: instance("/root/src/b.ts"),
owner: "src".to_string(),
},
],
};
let finding = AttributedCloneGroupFinding::with_actions(attributed);
assert_eq!(finding.actions.len(), 2);
assert_eq!(finding.actions[0].kind, CloneGroupActionType::ExtractShared);
assert_eq!(finding.actions[1].kind, CloneGroupActionType::SuppressLine);
}
#[test]
fn clone_group_finding_surfaces_dominant_identifier() {
let fragment = "function parseCsv() { parseCsv(); parseCsv(); return parseCsv; }";
let g = CloneGroup {
instances: vec![
CloneInstance {
file: PathBuf::from("/root/a.ts"),
start_line: 1,
end_line: 3,
start_col: 0,
end_col: 0,
fragment: fragment.to_string(),
},
CloneInstance {
file: PathBuf::from("/root/b.ts"),
start_line: 1,
end_line: 3,
start_col: 0,
end_col: 0,
fragment: fragment.to_string(),
},
],
token_count: 100,
line_count: 3,
};
let finding = CloneGroupFinding::with_actions(g);
assert_eq!(finding.suggested_name.as_deref(), Some("parseCsv"));
}
#[test]
fn clone_group_finding_suggested_name_none_for_unnamed_fragment() {
let finding = CloneGroupFinding::with_actions(group(2));
assert!(finding.suggested_name.is_none());
}
#[test]
fn clone_group_finding_description_pluralises_instance_count() {
let single = CloneGroupFinding::with_actions(group(1));
assert!(single.actions[0].description.contains("1 instance"));
assert!(!single.actions[0].description.contains("1 instances"));
let multi = CloneGroupFinding::with_actions(group(3));
assert!(multi.actions[0].description.contains("3 instances"));
}
#[test]
fn clone_family_finding_position_0_is_extract_shared_then_suggestions_then_suppress() {
let family = CloneFamily {
files: vec![PathBuf::from("/root/a.ts"), PathBuf::from("/root/b.ts")],
groups: vec![group(2), group(2)],
total_duplicated_lines: 40,
total_duplicated_tokens: 200,
suggestions: vec![
RefactoringSuggestion {
kind: RefactoringKind::ExtractFunction,
description: "Extract helper".to_string(),
estimated_savings: 10,
},
RefactoringSuggestion {
kind: RefactoringKind::ExtractModule,
description: "Extract module".to_string(),
estimated_savings: 30,
},
],
};
let finding = CloneFamilyFinding::with_actions(family);
assert_eq!(finding.actions.len(), 4);
assert_eq!(
finding.actions[0].kind,
CloneFamilyActionType::ExtractShared
);
assert_eq!(
finding.actions[1].kind,
CloneFamilyActionType::ApplySuggestion
);
assert_eq!(finding.actions[1].description, "Extract helper");
assert_eq!(
finding.actions[2].kind,
CloneFamilyActionType::ApplySuggestion
);
assert_eq!(finding.actions[2].description, "Extract module");
assert_eq!(finding.actions[3].kind, CloneFamilyActionType::SuppressLine);
assert_eq!(finding.groups.len(), 2);
for inner in &finding.groups {
assert_eq!(inner.actions.len(), 2);
assert_eq!(inner.actions[0].kind, CloneGroupActionType::ExtractShared);
assert_eq!(inner.actions[1].kind, CloneGroupActionType::SuppressLine);
}
}
#[test]
fn clone_family_finding_with_no_suggestions_emits_two_actions() {
let family = CloneFamily {
files: vec![PathBuf::from("/root/a.ts")],
groups: vec![group(2)],
total_duplicated_lines: 20,
total_duplicated_tokens: 100,
suggestions: Vec::new(),
};
let finding = CloneFamilyFinding::with_actions(family);
assert_eq!(finding.actions.len(), 2);
assert_eq!(
finding.actions[0].kind,
CloneFamilyActionType::ExtractShared
);
assert_eq!(finding.actions[1].kind, CloneFamilyActionType::SuppressLine);
}
#[test]
fn payload_from_report_wraps_all_findings() {
let report = DuplicationReport {
clone_groups: vec![group(2), group(3)],
clone_families: vec![CloneFamily {
files: vec![PathBuf::from("/root/a.ts")],
groups: vec![group(2)],
total_duplicated_lines: 20,
total_duplicated_tokens: 100,
suggestions: Vec::new(),
}],
mirrored_directories: Vec::new(),
stats: DuplicationStats::default(),
};
let payload = DupesReportPayload::from_report(&report);
assert_eq!(payload.clone_groups.len(), 2);
assert_eq!(payload.clone_families.len(), 1);
for finding in &payload.clone_groups {
assert_eq!(finding.actions.len(), 2);
}
assert_eq!(payload.clone_families[0].actions.len(), 2);
}
#[test]
fn duplication_codeclimate_uses_relative_normalized_paths() {
let report = DuplicationReport {
clone_groups: vec![CloneGroup {
instances: vec![CloneInstance {
file: PathBuf::from("/root/app/[id]/page.tsx"),
start_line: 4,
end_line: 8,
start_col: 0,
end_col: 0,
fragment: "const duplicate = 1;".to_string(),
}],
token_count: 42,
line_count: 5,
}],
clone_families: Vec::new(),
mirrored_directories: Vec::new(),
stats: DuplicationStats::default(),
};
let issues = build_duplication_codeclimate(&report, Path::new("/root"));
assert_eq!(issues.len(), 1);
let issue = &issues[0];
assert_eq!(issue.check_name, "fallow/code-duplication");
assert_eq!(issue.location.path, "app/%5Bid%5D/page.tsx");
assert_eq!(issue.location.lines.begin, 4);
assert_eq!(issue.categories, vec!["Duplication"]);
assert!(issue.description.contains("Code clone group 1"));
}
}