similarity_core/
cli_parallel.rs1use crate::TSEDOptions;
2use rayon::prelude::*;
3use std::fs;
4use std::path::PathBuf;
5
6#[derive(Debug)]
8pub struct FileData<F> {
9 pub path: PathBuf,
10 pub content: String,
11 pub functions: Vec<F>,
12}
13
14pub trait FunctionExtractor {
16 type Function: Clone + Send + Sync;
17
18 fn extract_functions(
19 &self,
20 filename: &str,
21 content: &str,
22 ) -> Result<Vec<Self::Function>, Box<dyn std::error::Error>>;
23}
24
25#[derive(Debug, Clone)]
27pub struct SimilarityResult<F> {
28 pub func1: F,
29 pub func2: F,
30 pub similarity: f64,
31}
32
33impl<F> SimilarityResult<F> {
34 pub fn new(func1: F, func2: F, similarity: f64) -> Self {
35 Self { func1, func2, similarity }
36 }
37}
38
39pub trait SimilarityChecker {
41 type Function: Clone + Send + Sync;
42
43 fn find_similar_in_file(
44 &self,
45 filename: &str,
46 content: &str,
47 threshold: f64,
48 options: &TSEDOptions,
49 fast_mode: bool,
50 ) -> Result<Vec<SimilarityResult<Self::Function>>, Box<dyn std::error::Error>>;
51
52 fn compare_functions(
53 &self,
54 func1: &Self::Function,
55 func2: &Self::Function,
56 content1: &str,
57 content2: &str,
58 options: &TSEDOptions,
59 ) -> Result<f64, Box<dyn std::error::Error>>;
60}
61
62pub fn load_files_parallel<E>(files: &[PathBuf], extractor: &E) -> Vec<FileData<E::Function>>
64where
65 E: FunctionExtractor + Sync,
66 E::Function: Send,
67{
68 files
69 .par_iter()
70 .filter_map(|file| {
71 match fs::read_to_string(file) {
72 Ok(content) => {
73 let filename = file.to_string_lossy();
74 match extractor.extract_functions(&filename, &content) {
76 Ok(functions) => Some(FileData { path: file.clone(), content, functions }),
77 Err(_) => None, }
79 }
80 Err(e) => {
81 eprintln!("Error reading {}: {}", file.display(), e);
82 None
83 }
84 }
85 })
86 .collect()
87}
88
89pub fn check_within_file_duplicates_parallel<S>(
91 files: &[PathBuf],
92 threshold: f64,
93 options: &TSEDOptions,
94 fast_mode: bool,
95 checker: &S,
96) -> Vec<(PathBuf, Vec<SimilarityResult<S::Function>>)>
97where
98 S: SimilarityChecker + Sync,
99{
100 files
101 .par_iter()
102 .filter_map(|file| match fs::read_to_string(file) {
103 Ok(code) => {
104 let file_str = file.to_string_lossy();
105
106 match checker.find_similar_in_file(&file_str, &code, threshold, options, fast_mode)
107 {
108 Ok(pairs) if !pairs.is_empty() => Some((file.clone(), pairs)),
109 _ => None,
110 }
111 }
112 Err(_) => None,
113 })
114 .collect()
115}
116
117pub fn check_cross_file_duplicates_parallel<S>(
119 file_data: &[FileData<S::Function>],
120 threshold: f64,
121 options: &TSEDOptions,
122 checker: &S,
123) -> Vec<(String, SimilarityResult<S::Function>, String)>
124where
125 S: SimilarityChecker + Sync,
126 S::Function: Clone + Send + Sync,
127{
128 let mut all_functions = Vec::new();
130 for data in file_data {
131 let filename = data.path.to_string_lossy().to_string();
132 for func in &data.functions {
133 all_functions.push((filename.clone(), data.content.clone(), func.clone()));
134 }
135 }
136
137 let mut pairs_to_check = Vec::new();
139 for i in 0..all_functions.len() {
140 for j in (i + 1)..all_functions.len() {
141 let (file1, _, _) = &all_functions[i];
142 let (file2, _, _) = &all_functions[j];
143
144 if file1 != file2 {
146 pairs_to_check.push((i, j));
147 }
148 }
149 }
150
151 pairs_to_check
153 .into_par_iter()
154 .filter_map(|(i, j)| {
155 let (file1, content1, func1) = &all_functions[i];
156 let (file2, content2, func2) = &all_functions[j];
157
158 match checker.compare_functions(func1, func2, content1, content2, options) {
160 Ok(similarity) => {
161 if similarity >= threshold {
162 Some((
163 file1.clone(),
164 SimilarityResult::new(func1.clone(), func2.clone(), similarity),
165 file2.clone(),
166 ))
167 } else {
168 None
169 }
170 }
171 Err(_) => None,
172 }
173 })
174 .collect()
175}