the_code_graph_domain/use_cases/
clones.rs1use crate::analysis::clones::{
2 cluster_matches, compare_pair, compute_fingerprints, group_into_buckets,
3};
4use crate::model::*;
5use crate::ports::{FileSystem, GraphStore};
6use crate::Result;
7use std::collections::HashMap;
8use std::path::PathBuf;
9
10pub struct CloneUseCase<S, F> {
11 store: S,
12 fs: F,
13 root: PathBuf,
14}
15
16impl<S: GraphStore, F: FileSystem> CloneUseCase<S, F> {
17 pub fn new(store: S, fs: F, root: PathBuf) -> Self {
18 Self { store, fs, root }
19 }
20
21 pub fn analyze(&self, config: &CloneConfig) -> Result<CloneAnalysis> {
22 let symbols = self.store.all_symbols()?;
23 let edges = self.store.all_edges()?;
24
25 let fingerprints = compute_fingerprints(&symbols, &edges, config);
26 let total_symbols = fingerprints.len();
27
28 if total_symbols < 2 {
29 return Ok(CloneAnalysis {
30 clusters: Vec::new(),
31 total_symbols_analyzed: total_symbols,
32 symbols_in_clones: 0,
33 duplication_pct: 0.0,
34 most_duplicated: None,
35 });
36 }
37
38 let buckets = group_into_buckets(&fingerprints);
39
40 let mut file_cache: HashMap<PathBuf, String> = HashMap::new();
42 let mut all_matches: Vec<CloneMatch> = Vec::new();
43
44 for bucket in buckets.values() {
45 if bucket.len() < 2 {
46 continue;
47 }
48 let max_pairs = config.max_candidates_per_bucket;
49 let mut pair_count = 0;
50
51 for i in 0..bucket.len() {
52 if pair_count >= max_pairs {
53 break;
54 }
55 for j in (i + 1)..bucket.len() {
56 if pair_count >= max_pairs {
57 break;
58 }
59 let fp_a = &bucket[i];
60 let fp_b = &bucket[j];
61 let cross_lang = fp_a.language != fp_b.language;
62
63 if cross_lang {
64 let mut m = compare_pair("", "", true, config.threshold).unwrap();
65 m.source = fp_a.qualified_name.clone();
66 m.target = fp_b.qualified_name.clone();
67 all_matches.push(m);
68 } else {
69 let body_a = self.read_body(&mut file_cache, fp_a);
70 let body_b = self.read_body(&mut file_cache, fp_b);
71 if let Some(mut m) = compare_pair(&body_a, &body_b, false, config.threshold)
72 {
73 m.source = fp_a.qualified_name.clone();
74 m.target = fp_b.qualified_name.clone();
75 all_matches.push(m);
76 }
77 }
78 pair_count += 1;
79 }
80 }
81 }
82
83 let clusters = cluster_matches(&all_matches);
84 let symbols_in_clones: usize = clusters.iter().map(|c| c.members.len()).sum();
85 let duplication_pct = if total_symbols == 0 {
86 0.0
87 } else {
88 symbols_in_clones as f64 / total_symbols as f64 * 100.0
89 };
90
91 let mut pair_counts: HashMap<&str, usize> = HashMap::new();
92 for m in &all_matches {
93 *pair_counts.entry(&m.source).or_default() += 1;
94 *pair_counts.entry(&m.target).or_default() += 1;
95 }
96 let most_duplicated = pair_counts
97 .into_iter()
98 .max_by_key(|(_, c)| *c)
99 .map(|(name, _)| name.to_string());
100
101 Ok(CloneAnalysis {
102 clusters,
103 total_symbols_analyzed: total_symbols,
104 symbols_in_clones,
105 duplication_pct,
106 most_duplicated,
107 })
108 }
109
110 fn read_body(
111 &self,
112 cache: &mut HashMap<PathBuf, String>,
113 fp: &StructuralFingerprint,
114 ) -> String {
115 let file_content = cache
116 .entry(fp.file.clone())
117 .or_insert_with(|| {
118 let abs_path = self.root.join(&fp.file);
119 self.fs.read_file(&abs_path).unwrap_or_default()
120 })
121 .clone();
122
123 let lines: Vec<&str> = file_content.lines().collect();
125 let start = fp.line_start.saturating_sub(1); let end = fp.line_end.min(lines.len());
127 if start >= lines.len() || start >= end {
128 return String::new();
129 }
130 lines[start..end].join("\n")
131 }
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137 use crate::test_support::{InMemoryGraphStore, MockFileSystem};
138 use std::path::PathBuf;
139
140 fn build_clone_store() -> (InMemoryGraphStore, MockFileSystem) {
141 let mut store = InMemoryGraphStore::new();
142 store.insert_symbol(SymbolNode {
143 name: "add".into(),
144 qualified_name: "a.rs::add".into(),
145 kind: SymbolKind::Function,
146 location: Location {
147 file: PathBuf::from("a.rs"),
148 line_start: 1,
149 line_end: 10,
150 col_start: 0,
151 col_end: 0,
152 },
153 visibility: Visibility::Public,
154 is_exported: false,
155 is_async: false,
156 is_test: false,
157 decorators: vec![],
158 signature: None,
159 });
160 store.insert_symbol(SymbolNode {
161 name: "sum".into(),
162 qualified_name: "b.rs::sum".into(),
163 kind: SymbolKind::Function,
164 location: Location {
165 file: PathBuf::from("b.rs"),
166 line_start: 1,
167 line_end: 10,
168 col_start: 0,
169 col_end: 0,
170 },
171 visibility: Visibility::Public,
172 is_exported: false,
173 is_async: false,
174 is_test: false,
175 decorators: vec![],
176 signature: None,
177 });
178 store.insert_symbol(SymbolNode {
179 name: "tiny".into(),
180 qualified_name: "c.rs::tiny".into(),
181 kind: SymbolKind::Function,
182 location: Location {
183 file: PathBuf::from("c.rs"),
184 line_start: 1,
185 line_end: 3,
186 col_start: 0,
187 col_end: 0,
188 },
189 visibility: Visibility::Public,
190 is_exported: false,
191 is_async: false,
192 is_test: false,
193 decorators: vec![],
194 signature: None,
195 });
196
197 let fs = MockFileSystem::new(vec![
198 (PathBuf::from("/test/a.rs"), "fn add(x: i32, y: i32) -> i32 {\n x + y\n}\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad".into()),
199 (PathBuf::from("/test/b.rs"), "fn sum(a: i32, b: i32) -> i32 {\n a + b\n}\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad".into()),
200 (PathBuf::from("/test/c.rs"), "fn tiny() { 1 }".into()),
201 ]);
202 (store, fs)
203 }
204
205 #[test]
206 fn analyze_detects_type2_clones() {
207 let (store, fs) = build_clone_store();
208 let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
209 let analysis = uc.analyze(&CloneConfig::default()).unwrap();
210 assert!(!analysis.clusters.is_empty());
211 assert!(analysis.duplication_pct > 0.0);
212 }
213
214 #[test]
215 fn analyze_filters_by_min_lines() {
216 let (store, fs) = build_clone_store();
217 let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
218 let analysis = uc.analyze(&CloneConfig::default()).unwrap();
219 let all_members: Vec<&str> = analysis
220 .clusters
221 .iter()
222 .flat_map(|c| c.members.iter().map(|m| m.as_str()))
223 .collect();
224 assert!(!all_members.contains(&"c.rs::tiny"));
225 }
226
227 #[test]
228 fn analyze_empty_graph() {
229 let store = InMemoryGraphStore::new();
230 let fs = MockFileSystem::new(vec![]);
231 let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
232 let analysis = uc.analyze(&CloneConfig::default()).unwrap();
233 assert!(analysis.clusters.is_empty());
234 assert_eq!(analysis.duplication_pct, 0.0);
235 assert!(analysis.most_duplicated.is_none());
236 }
237
238 #[test]
239 fn analyze_single_symbol() {
240 let mut store = InMemoryGraphStore::new();
241 store.insert_symbol(SymbolNode {
242 name: "only".into(),
243 qualified_name: "a.rs::only".into(),
244 kind: SymbolKind::Function,
245 location: Location {
246 file: PathBuf::from("a.rs"),
247 line_start: 1,
248 line_end: 10,
249 col_start: 0,
250 col_end: 0,
251 },
252 visibility: Visibility::Public,
253 is_exported: false,
254 is_async: false,
255 is_test: false,
256 decorators: vec![],
257 signature: None,
258 });
259 let fs = MockFileSystem::new(vec![]);
260 let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
261 let analysis = uc.analyze(&CloneConfig::default()).unwrap();
262 assert!(analysis.clusters.is_empty());
263 }
264}