Skip to main content

fallow_types/
duplicates.rs

1//! Shared duplicate-code output contracts.
2
3use std::path::PathBuf;
4
5use serde::Serialize;
6
7use crate::serde_path;
8
9/// A single instance of duplicated code at a specific location.
10#[derive(Debug, Clone, Serialize)]
11#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
12pub struct CloneInstance {
13    /// Path to the file containing this clone instance.
14    #[serde(serialize_with = "serde_path::serialize")]
15    pub file: PathBuf,
16    /// 1-based start line of the clone.
17    pub start_line: usize,
18    /// 1-based end line of the clone.
19    pub end_line: usize,
20    /// 0-based start column.
21    pub start_col: usize,
22    /// 0-based end column.
23    pub end_col: usize,
24    /// The actual source code fragment.
25    pub fragment: String,
26}
27
28/// A group of code clones -- the same (or normalized-equivalent) code appearing
29/// in multiple places.
30#[derive(Debug, Clone, Serialize)]
31#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
32pub struct CloneGroup {
33    /// All instances where this duplicated code appears.
34    pub instances: Vec<CloneInstance>,
35    /// Number of tokens in the duplicated block.
36    pub token_count: usize,
37    /// Number of lines in the duplicated block.
38    pub line_count: usize,
39}
40
41/// The kind of refactoring suggested for a clone family.
42#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
43#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
44pub enum RefactoringKind {
45    /// Extract a shared function/utility.
46    ExtractFunction,
47    /// Extract a shared module.
48    ExtractModule,
49}
50
51/// A refactoring suggestion for a clone family.
52#[derive(Debug, Clone, Serialize)]
53#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
54pub struct RefactoringSuggestion {
55    /// What kind of refactoring is suggested.
56    pub kind: RefactoringKind,
57    /// Human-readable description of the suggestion.
58    pub description: String,
59    /// Estimated lines that could be eliminated.
60    pub estimated_savings: usize,
61}
62
63/// A clone family: a set of clone groups that share the same file set.
64///
65/// When multiple clone groups are all duplicated between the same set of files,
66/// they form a family, indicating a deeper structural relationship that should
67/// be refactored together rather than group-by-group.
68#[derive(Debug, Clone, Serialize)]
69#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
70pub struct CloneFamily {
71    /// The files involved in this family (sorted for stable output).
72    #[serde(serialize_with = "serde_path::serialize_vec")]
73    pub files: Vec<PathBuf>,
74    /// Clone groups belonging to this family.
75    pub groups: Vec<CloneGroup>,
76    /// Total number of duplicated lines across all groups.
77    pub total_duplicated_lines: usize,
78    /// Total number of duplicated tokens across all groups.
79    pub total_duplicated_tokens: usize,
80    /// Refactoring suggestions for this family.
81    pub suggestions: Vec<RefactoringSuggestion>,
82}
83
84/// A detected mirrored directory pattern: two directory prefixes that contain
85/// identical files (e.g., `src/` and `deno/lib/`).
86#[derive(Debug, Clone, Serialize)]
87#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
88pub struct MirroredDirectory {
89    /// First directory path (lexically smaller).
90    pub dir_a: String,
91    /// Second directory path.
92    pub dir_b: String,
93    /// Filenames shared between the two directories.
94    pub shared_files: Vec<String>,
95    /// Total duplicated lines across all shared files.
96    pub total_lines: usize,
97}
98
99/// Number of files skipped by one built-in duplicates ignore pattern.
100#[derive(Debug, Clone, Default)]
101pub struct DefaultIgnoreSkipCount {
102    /// Glob pattern that matched skipped files.
103    pub pattern: &'static str,
104    /// Number of files skipped by this pattern.
105    pub count: usize,
106}
107
108/// Human-format-only skipped-file stats for built-in duplicates ignores.
109#[derive(Debug, Clone, Default)]
110pub struct DefaultIgnoreSkips {
111    /// Total number of files skipped by built-in duplicates ignores.
112    pub total: usize,
113    /// Per-pattern skip counts, in default pattern order.
114    pub by_pattern: Vec<DefaultIgnoreSkipCount>,
115}
116
117/// Overall duplication analysis report.
118#[derive(Debug, Clone, Default, Serialize)]
119#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
120pub struct DuplicationReport {
121    /// All detected clone groups. Each group contains 2+ instances of identical
122    /// or near-identical code.
123    pub clone_groups: Vec<CloneGroup>,
124    /// Clone families: groups of clone groups sharing the same file set,
125    /// indicating systematic duplication patterns.
126    pub clone_families: Vec<CloneFamily>,
127    /// Detected mirrored directory trees (directories with many identical files).
128    #[serde(default, skip_serializing_if = "Vec::is_empty")]
129    pub mirrored_directories: Vec<MirroredDirectory>,
130    /// Aggregate statistics.
131    pub stats: DuplicationStats,
132}
133
134impl DuplicationReport {
135    /// Sort all result arrays for deterministic output ordering.
136    ///
137    /// Clone groups are sorted by their first instance's file path and line, and
138    /// instances within each group are sorted by file path then line. Clone
139    /// families are sorted by their file set.
140    pub fn sort(&mut self) {
141        for group in &mut self.clone_groups {
142            group
143                .instances
144                .sort_by(|a, b| a.file.cmp(&b.file).then(a.start_line.cmp(&b.start_line)));
145        }
146        self.clone_groups
147            .sort_by(|a, b| match (a.instances.first(), b.instances.first()) {
148                (Some(ai), Some(bi)) => ai
149                    .file
150                    .cmp(&bi.file)
151                    .then(ai.start_line.cmp(&bi.start_line)),
152                (Some(_), None) => std::cmp::Ordering::Less,
153                (None, Some(_)) => std::cmp::Ordering::Greater,
154                (None, None) => std::cmp::Ordering::Equal,
155            });
156
157        for family in &mut self.clone_families {
158            for group in &mut family.groups {
159                group
160                    .instances
161                    .sort_by(|a, b| a.file.cmp(&b.file).then(a.start_line.cmp(&b.start_line)));
162            }
163            family
164                .groups
165                .sort_by(|a, b| match (a.instances.first(), b.instances.first()) {
166                    (Some(ai), Some(bi)) => ai
167                        .file
168                        .cmp(&bi.file)
169                        .then(ai.start_line.cmp(&bi.start_line)),
170                    (Some(_), None) => std::cmp::Ordering::Less,
171                    (None, Some(_)) => std::cmp::Ordering::Greater,
172                    (None, None) => std::cmp::Ordering::Equal,
173                });
174        }
175        self.clone_families.sort_by(|a, b| a.files.cmp(&b.files));
176    }
177}
178
179/// Aggregate duplication statistics.
180#[derive(Debug, Clone, Default, Serialize)]
181#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
182pub struct DuplicationStats {
183    /// Total files analyzed.
184    pub total_files: usize,
185    /// Files containing at least one clone instance.
186    pub files_with_clones: usize,
187    /// Total lines across all analyzed files.
188    pub total_lines: usize,
189    /// Lines that are part of at least one clone.
190    pub duplicated_lines: usize,
191    /// Total tokens across all analyzed files.
192    pub total_tokens: usize,
193    /// Tokens that are part of at least one clone.
194    pub duplicated_tokens: usize,
195    /// Number of clone groups in the reported `clone_groups[]` array.
196    /// Matches `clone_groups[].length` post `minOccurrences` filtering; the
197    /// count of groups hidden by the filter is exposed in
198    /// `clone_groups_below_min_occurrences`.
199    pub clone_groups: usize,
200    /// Total clone instances across all reported groups. Matches the sum of
201    /// `clone_groups[].locations[].length` post `minOccurrences` filtering.
202    pub clone_instances: usize,
203    /// Percentage of duplicated lines (0.0 to 100.0). Always reflects the FULL
204    /// corpus, computed BEFORE the `minOccurrences` filter so trend lines and
205    /// `threshold` gates stay stable when the filter changes.
206    pub duplication_percentage: f64,
207    /// Number of clone groups hidden by `duplicates.minOccurrences`. Absent (or
208    /// `0`) when the filter is at its default of `2` and nothing was hidden.
209    /// Pre-filter clone group count = `clone_groups +
210    /// clone_groups_below_min_occurrences`.
211    #[serde(default, skip_serializing_if = "is_zero_usize")]
212    pub clone_groups_below_min_occurrences: usize,
213}
214
215#[expect(
216    clippy::trivially_copy_pass_by_ref,
217    reason = "serde skip_serializing_if requires &T signature"
218)]
219const fn is_zero_usize(value: &usize) -> bool {
220    *value == 0
221}