Skip to main content

the_code_graph_domain/use_cases/
clones.rs

1use crate::analysis::clones::{
2    cluster_matches, compare_pair, compute_fingerprints, group_into_buckets,
3};
4use crate::model::*;
5use crate::ports::{FileSystem, GraphStore};
6use crate::Result;
7use std::collections::HashMap;
8use std::path::PathBuf;
9
10pub struct CloneUseCase<S, F> {
11    store: S,
12    fs: F,
13    root: PathBuf,
14}
15
16impl<S: GraphStore, F: FileSystem> CloneUseCase<S, F> {
17    pub fn new(store: S, fs: F, root: PathBuf) -> Self {
18        Self { store, fs, root }
19    }
20
21    pub fn analyze(&self, config: &CloneConfig) -> Result<CloneAnalysis> {
22        let symbols = self.store.all_symbols()?;
23        let edges = self.store.all_edges()?;
24
25        let fingerprints = compute_fingerprints(&symbols, &edges, config);
26        let total_symbols = fingerprints.len();
27
28        if total_symbols < 2 {
29            return Ok(CloneAnalysis {
30                clusters: Vec::new(),
31                total_symbols_analyzed: total_symbols,
32                symbols_in_clones: 0,
33                duplication_pct: 0.0,
34                most_duplicated: None,
35            });
36        }
37
38        let buckets = group_into_buckets(&fingerprints);
39
40        // Phase 2: pairwise comparison within buckets with file content cache
41        let mut file_cache: HashMap<PathBuf, String> = HashMap::new();
42        let mut all_matches: Vec<CloneMatch> = Vec::new();
43
44        for bucket in buckets.values() {
45            if bucket.len() < 2 {
46                continue;
47            }
48            let max_pairs = config.max_candidates_per_bucket;
49            let mut pair_count = 0;
50
51            for i in 0..bucket.len() {
52                if pair_count >= max_pairs {
53                    break;
54                }
55                for j in (i + 1)..bucket.len() {
56                    if pair_count >= max_pairs {
57                        break;
58                    }
59                    let fp_a = &bucket[i];
60                    let fp_b = &bucket[j];
61                    let cross_lang = fp_a.language != fp_b.language;
62
63                    if cross_lang {
64                        let mut m = compare_pair("", "", true, config.threshold).unwrap();
65                        m.source = fp_a.qualified_name.clone();
66                        m.target = fp_b.qualified_name.clone();
67                        all_matches.push(m);
68                    } else {
69                        let body_a = self.read_body(&mut file_cache, fp_a);
70                        let body_b = self.read_body(&mut file_cache, fp_b);
71                        if let Some(mut m) = compare_pair(&body_a, &body_b, false, config.threshold)
72                        {
73                            m.source = fp_a.qualified_name.clone();
74                            m.target = fp_b.qualified_name.clone();
75                            all_matches.push(m);
76                        }
77                    }
78                    pair_count += 1;
79                }
80            }
81        }
82
83        let clusters = cluster_matches(&all_matches);
84        let symbols_in_clones: usize = clusters.iter().map(|c| c.members.len()).sum();
85        let duplication_pct = if total_symbols == 0 {
86            0.0
87        } else {
88            symbols_in_clones as f64 / total_symbols as f64 * 100.0
89        };
90
91        let mut pair_counts: HashMap<&str, usize> = HashMap::new();
92        for m in &all_matches {
93            *pair_counts.entry(&m.source).or_default() += 1;
94            *pair_counts.entry(&m.target).or_default() += 1;
95        }
96        let most_duplicated = pair_counts
97            .into_iter()
98            .max_by_key(|(_, c)| *c)
99            .map(|(name, _)| name.to_string());
100
101        Ok(CloneAnalysis {
102            clusters,
103            total_symbols_analyzed: total_symbols,
104            symbols_in_clones,
105            duplication_pct,
106            most_duplicated,
107        })
108    }
109
110    fn read_body(
111        &self,
112        cache: &mut HashMap<PathBuf, String>,
113        fp: &StructuralFingerprint,
114    ) -> String {
115        let file_content = cache
116            .entry(fp.file.clone())
117            .or_insert_with(|| {
118                let abs_path = self.root.join(&fp.file);
119                self.fs.read_file(&abs_path).unwrap_or_default()
120            })
121            .clone();
122
123        // Extract only the symbol body lines (1-indexed line_start..=line_end)
124        let lines: Vec<&str> = file_content.lines().collect();
125        let start = fp.line_start.saturating_sub(1); // convert to 0-indexed
126        let end = fp.line_end.min(lines.len());
127        if start >= lines.len() || start >= end {
128            return String::new();
129        }
130        lines[start..end].join("\n")
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137    use crate::test_support::{InMemoryGraphStore, MockFileSystem};
138    use std::path::PathBuf;
139
140    fn build_clone_store() -> (InMemoryGraphStore, MockFileSystem) {
141        let mut store = InMemoryGraphStore::new();
142        store.insert_symbol(SymbolNode {
143            name: "add".into(),
144            qualified_name: "a.rs::add".into(),
145            kind: SymbolKind::Function,
146            location: Location {
147                file: PathBuf::from("a.rs"),
148                line_start: 1,
149                line_end: 10,
150                col_start: 0,
151                col_end: 0,
152            },
153            visibility: Visibility::Public,
154            is_exported: false,
155            is_async: false,
156            is_test: false,
157            decorators: vec![],
158            signature: None,
159        });
160        store.insert_symbol(SymbolNode {
161            name: "sum".into(),
162            qualified_name: "b.rs::sum".into(),
163            kind: SymbolKind::Function,
164            location: Location {
165                file: PathBuf::from("b.rs"),
166                line_start: 1,
167                line_end: 10,
168                col_start: 0,
169                col_end: 0,
170            },
171            visibility: Visibility::Public,
172            is_exported: false,
173            is_async: false,
174            is_test: false,
175            decorators: vec![],
176            signature: None,
177        });
178        store.insert_symbol(SymbolNode {
179            name: "tiny".into(),
180            qualified_name: "c.rs::tiny".into(),
181            kind: SymbolKind::Function,
182            location: Location {
183                file: PathBuf::from("c.rs"),
184                line_start: 1,
185                line_end: 3,
186                col_start: 0,
187                col_end: 0,
188            },
189            visibility: Visibility::Public,
190            is_exported: false,
191            is_async: false,
192            is_test: false,
193            decorators: vec![],
194            signature: None,
195        });
196
197        let fs = MockFileSystem::new(vec![
198            (PathBuf::from("/test/a.rs"), "fn add(x: i32, y: i32) -> i32 {\n    x + y\n}\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad".into()),
199            (PathBuf::from("/test/b.rs"), "fn sum(a: i32, b: i32) -> i32 {\n    a + b\n}\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad\n// pad".into()),
200            (PathBuf::from("/test/c.rs"), "fn tiny() { 1 }".into()),
201        ]);
202        (store, fs)
203    }
204
205    #[test]
206    fn analyze_detects_type2_clones() {
207        let (store, fs) = build_clone_store();
208        let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
209        let analysis = uc.analyze(&CloneConfig::default()).unwrap();
210        assert!(!analysis.clusters.is_empty());
211        assert!(analysis.duplication_pct > 0.0);
212    }
213
214    #[test]
215    fn analyze_filters_by_min_lines() {
216        let (store, fs) = build_clone_store();
217        let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
218        let analysis = uc.analyze(&CloneConfig::default()).unwrap();
219        let all_members: Vec<&str> = analysis
220            .clusters
221            .iter()
222            .flat_map(|c| c.members.iter().map(|m| m.as_str()))
223            .collect();
224        assert!(!all_members.contains(&"c.rs::tiny"));
225    }
226
227    #[test]
228    fn analyze_empty_graph() {
229        let store = InMemoryGraphStore::new();
230        let fs = MockFileSystem::new(vec![]);
231        let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
232        let analysis = uc.analyze(&CloneConfig::default()).unwrap();
233        assert!(analysis.clusters.is_empty());
234        assert_eq!(analysis.duplication_pct, 0.0);
235        assert!(analysis.most_duplicated.is_none());
236    }
237
238    #[test]
239    fn analyze_single_symbol() {
240        let mut store = InMemoryGraphStore::new();
241        store.insert_symbol(SymbolNode {
242            name: "only".into(),
243            qualified_name: "a.rs::only".into(),
244            kind: SymbolKind::Function,
245            location: Location {
246                file: PathBuf::from("a.rs"),
247                line_start: 1,
248                line_end: 10,
249                col_start: 0,
250                col_end: 0,
251            },
252            visibility: Visibility::Public,
253            is_exported: false,
254            is_async: false,
255            is_test: false,
256            decorators: vec![],
257            signature: None,
258        });
259        let fs = MockFileSystem::new(vec![]);
260        let uc = CloneUseCase::new(store, fs, PathBuf::from("/test"));
261        let analysis = uc.analyze(&CloneConfig::default()).unwrap();
262        assert!(analysis.clusters.is_empty());
263    }
264}