fallow_types/duplicates.rs
1//! Shared duplicate-code output contracts.
2
3use std::path::PathBuf;
4
5use serde::Serialize;
6
7use crate::serde_path;
8
9/// A single instance of duplicated code at a specific location.
10#[derive(Debug, Clone, Serialize)]
11#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
12pub struct CloneInstance {
13 /// Path to the file containing this clone instance.
14 #[serde(serialize_with = "serde_path::serialize")]
15 pub file: PathBuf,
16 /// 1-based start line of the clone.
17 pub start_line: usize,
18 /// 1-based end line of the clone.
19 pub end_line: usize,
20 /// 0-based start column.
21 pub start_col: usize,
22 /// 0-based end column.
23 pub end_col: usize,
24 /// The actual source code fragment.
25 pub fragment: String,
26}
27
28/// A group of code clones -- the same (or normalized-equivalent) code appearing
29/// in multiple places.
30#[derive(Debug, Clone, Serialize)]
31#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
32pub struct CloneGroup {
33 /// All instances where this duplicated code appears.
34 pub instances: Vec<CloneInstance>,
35 /// Number of tokens in the duplicated block.
36 pub token_count: usize,
37 /// Number of lines in the duplicated block.
38 pub line_count: usize,
39}
40
41/// The kind of refactoring suggested for a clone family.
42#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
43#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
44pub enum RefactoringKind {
45 /// Extract a shared function/utility.
46 ExtractFunction,
47 /// Extract a shared module.
48 ExtractModule,
49}
50
51/// A refactoring suggestion for a clone family.
52#[derive(Debug, Clone, Serialize)]
53#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
54pub struct RefactoringSuggestion {
55 /// What kind of refactoring is suggested.
56 pub kind: RefactoringKind,
57 /// Human-readable description of the suggestion.
58 pub description: String,
59 /// Estimated lines that could be eliminated.
60 pub estimated_savings: usize,
61}
62
63/// A clone family: a set of clone groups that share the same file set.
64///
65/// When multiple clone groups are all duplicated between the same set of files,
66/// they form a family, indicating a deeper structural relationship that should
67/// be refactored together rather than group-by-group.
68#[derive(Debug, Clone, Serialize)]
69#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
70pub struct CloneFamily {
71 /// The files involved in this family (sorted for stable output).
72 #[serde(serialize_with = "serde_path::serialize_vec")]
73 pub files: Vec<PathBuf>,
74 /// Clone groups belonging to this family.
75 pub groups: Vec<CloneGroup>,
76 /// Total number of duplicated lines across all groups.
77 pub total_duplicated_lines: usize,
78 /// Total number of duplicated tokens across all groups.
79 pub total_duplicated_tokens: usize,
80 /// Refactoring suggestions for this family.
81 pub suggestions: Vec<RefactoringSuggestion>,
82}
83
84/// A detected mirrored directory pattern: two directory prefixes that contain
85/// identical files (e.g., `src/` and `deno/lib/`).
86#[derive(Debug, Clone, Serialize)]
87#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
88pub struct MirroredDirectory {
89 /// First directory path (lexically smaller).
90 pub dir_a: String,
91 /// Second directory path.
92 pub dir_b: String,
93 /// Filenames shared between the two directories.
94 pub shared_files: Vec<String>,
95 /// Total duplicated lines across all shared files.
96 pub total_lines: usize,
97}
98
99/// Number of files skipped by one built-in duplicates ignore pattern.
100#[derive(Debug, Clone, Default)]
101pub struct DefaultIgnoreSkipCount {
102 /// Glob pattern that matched skipped files.
103 pub pattern: &'static str,
104 /// Number of files skipped by this pattern.
105 pub count: usize,
106}
107
108/// Human-format-only skipped-file stats for built-in duplicates ignores.
109#[derive(Debug, Clone, Default)]
110pub struct DefaultIgnoreSkips {
111 /// Total number of files skipped by built-in duplicates ignores.
112 pub total: usize,
113 /// Per-pattern skip counts, in default pattern order.
114 pub by_pattern: Vec<DefaultIgnoreSkipCount>,
115}
116
117/// Overall duplication analysis report.
118#[derive(Debug, Clone, Default, Serialize)]
119#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
120pub struct DuplicationReport {
121 /// All detected clone groups. Each group contains 2+ instances of identical
122 /// or near-identical code.
123 pub clone_groups: Vec<CloneGroup>,
124 /// Clone families: groups of clone groups sharing the same file set,
125 /// indicating systematic duplication patterns.
126 pub clone_families: Vec<CloneFamily>,
127 /// Detected mirrored directory trees (directories with many identical files).
128 #[serde(default, skip_serializing_if = "Vec::is_empty")]
129 pub mirrored_directories: Vec<MirroredDirectory>,
130 /// Aggregate statistics.
131 pub stats: DuplicationStats,
132}
133
134impl DuplicationReport {
135 /// Sort all result arrays for deterministic output ordering.
136 ///
137 /// Clone groups are sorted by their first instance's file path and line, and
138 /// instances within each group are sorted by file path then line. Clone
139 /// families are sorted by their file set.
140 pub fn sort(&mut self) {
141 for group in &mut self.clone_groups {
142 group
143 .instances
144 .sort_by(|a, b| a.file.cmp(&b.file).then(a.start_line.cmp(&b.start_line)));
145 }
146 self.clone_groups
147 .sort_by(|a, b| match (a.instances.first(), b.instances.first()) {
148 (Some(ai), Some(bi)) => ai
149 .file
150 .cmp(&bi.file)
151 .then(ai.start_line.cmp(&bi.start_line)),
152 (Some(_), None) => std::cmp::Ordering::Less,
153 (None, Some(_)) => std::cmp::Ordering::Greater,
154 (None, None) => std::cmp::Ordering::Equal,
155 });
156
157 for family in &mut self.clone_families {
158 for group in &mut family.groups {
159 group
160 .instances
161 .sort_by(|a, b| a.file.cmp(&b.file).then(a.start_line.cmp(&b.start_line)));
162 }
163 family
164 .groups
165 .sort_by(|a, b| match (a.instances.first(), b.instances.first()) {
166 (Some(ai), Some(bi)) => ai
167 .file
168 .cmp(&bi.file)
169 .then(ai.start_line.cmp(&bi.start_line)),
170 (Some(_), None) => std::cmp::Ordering::Less,
171 (None, Some(_)) => std::cmp::Ordering::Greater,
172 (None, None) => std::cmp::Ordering::Equal,
173 });
174 }
175 self.clone_families.sort_by(|a, b| a.files.cmp(&b.files));
176 }
177}
178
179/// Aggregate duplication statistics.
180#[derive(Debug, Clone, Default, Serialize)]
181#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
182pub struct DuplicationStats {
183 /// Total files analyzed.
184 pub total_files: usize,
185 /// Files containing at least one clone instance.
186 pub files_with_clones: usize,
187 /// Total lines across all analyzed files.
188 pub total_lines: usize,
189 /// Lines that are part of at least one clone.
190 pub duplicated_lines: usize,
191 /// Total tokens across all analyzed files.
192 pub total_tokens: usize,
193 /// Tokens that are part of at least one clone.
194 pub duplicated_tokens: usize,
195 /// Number of clone groups in the reported `clone_groups[]` array.
196 /// Matches `clone_groups[].length` post `minOccurrences` filtering; the
197 /// count of groups hidden by the filter is exposed in
198 /// `clone_groups_below_min_occurrences`.
199 pub clone_groups: usize,
200 /// Total clone instances across all reported groups. Matches the sum of
201 /// `clone_groups[].locations[].length` post `minOccurrences` filtering.
202 pub clone_instances: usize,
203 /// Percentage of duplicated lines (0.0 to 100.0). Always reflects the FULL
204 /// corpus, computed BEFORE the `minOccurrences` filter so trend lines and
205 /// `threshold` gates stay stable when the filter changes.
206 pub duplication_percentage: f64,
207 /// Number of clone groups hidden by `duplicates.minOccurrences`. Absent (or
208 /// `0`) when the filter is at its default of `2` and nothing was hidden.
209 /// Pre-filter clone group count = `clone_groups +
210 /// clone_groups_below_min_occurrences`.
211 #[serde(default, skip_serializing_if = "is_zero_usize")]
212 pub clone_groups_below_min_occurrences: usize,
213}
214
215#[expect(
216 clippy::trivially_copy_pass_by_ref,
217 reason = "serde skip_serializing_if requires &T signature"
218)]
219const fn is_zero_usize(value: &usize) -> bool {
220 *value == 0
221}