1use std::collections::{BTreeMap, BTreeSet, HashSet};
5
6use objects::object::{ChangeImportance, SemanticChange};
7
8use super::analysis_similarity::{SimilarityMethod, compute_similarity_with_language};
9use crate::parser::{FunctionDef, Language, ParsedFile};
10
11const FUNCTION_RENAME_SIMILARITY_THRESHOLD: f64 = 0.58;
12
13type FunctionMap = BTreeMap<String, Vec<FunctionDef>>;
31
32type InstanceRef = (String, usize);
35
36fn build_function_map(parsed: Option<&ParsedFile>) -> FunctionMap {
37 let Some(parsed) = parsed else {
38 return BTreeMap::new();
39 };
40 let mut map = FunctionMap::new();
41 for func in parsed.extract_functions() {
42 map.entry(func.name.clone()).or_default().push(func);
43 }
44 map
45}
46
47fn pair_within_name(
50 olds: &[FunctionDef],
51 news: &[FunctionDef],
52 similarity_method: SimilarityMethod,
53 language: Language,
54) -> (Vec<(usize, usize)>, Vec<usize>, Vec<usize>) {
55 let mut candidates: Vec<(usize, usize, f64)> = Vec::with_capacity(olds.len() * news.len());
56 for (i, o) in olds.iter().enumerate() {
57 for (j, n) in news.iter().enumerate() {
58 let similarity = if o.content == n.content {
59 1.0
60 } else {
61 compute_similarity_with_language(
62 &o.content,
63 &n.content,
64 similarity_method,
65 language,
66 )
67 };
68 candidates.push((i, j, similarity));
69 }
70 }
71 candidates
73 .sort_by(|(li, lj, ls), (ri, rj, rs)| rs.total_cmp(ls).then(li.cmp(ri)).then(lj.cmp(rj)));
74
75 let mut old_used = vec![false; olds.len()];
76 let mut new_used = vec![false; news.len()];
77 let mut pairs = Vec::new();
78 for (i, j, _) in candidates {
79 if !old_used[i] && !new_used[j] {
80 old_used[i] = true;
81 new_used[j] = true;
82 pairs.push((i, j));
83 }
84 }
85 let unmatched_old = old_used
86 .iter()
87 .enumerate()
88 .filter_map(|(i, used)| (!*used).then_some(i))
89 .collect();
90 let unmatched_new = new_used
91 .iter()
92 .enumerate()
93 .filter_map(|(j, used)| (!*used).then_some(j))
94 .collect();
95 (pairs, unmatched_old, unmatched_new)
96}
97
98pub fn detect_function_changes(
100 old_path: &std::path::Path,
101 new_path: &std::path::Path,
102 old_content: &str,
103 new_content: &str,
104 similarity_method: SimilarityMethod,
105) -> Vec<SemanticChange> {
106 let old_parsed = ParsedFile::parse(old_content, Language::from_path(old_path));
107 let new_parsed = ParsedFile::parse(new_content, Language::from_path(new_path));
108
109 detect_function_changes_with_parsed(
110 old_path,
111 new_path,
112 old_parsed.as_ref(),
113 new_parsed.as_ref(),
114 similarity_method,
115 )
116}
117
118pub(crate) fn detect_function_changes_with_parsed(
119 old_path: &std::path::Path,
120 new_path: &std::path::Path,
121 old_parsed: Option<&ParsedFile>,
122 new_parsed: Option<&ParsedFile>,
123 similarity_method: SimilarityMethod,
124) -> Vec<SemanticChange> {
125 let mut changes = Vec::new();
126 let mut file_modified = false;
127
128 let old_funcs = build_function_map(old_parsed);
129 let new_funcs = build_function_map(new_parsed);
130 let language = Language::from_path(new_path);
131
132 let mut pairs: Vec<(String, usize, usize)> = Vec::new();
134 let mut unmatched_old: Vec<InstanceRef> = Vec::new();
135 let mut unmatched_new: Vec<InstanceRef> = Vec::new();
136
137 let mut all_names: BTreeSet<&str> = BTreeSet::new();
138 all_names.extend(old_funcs.keys().map(String::as_str));
139 all_names.extend(new_funcs.keys().map(String::as_str));
140
141 let empty: Vec<FunctionDef> = Vec::new();
142 for name in &all_names {
143 let olds = old_funcs.get(*name).unwrap_or(&empty);
144 let news = new_funcs.get(*name).unwrap_or(&empty);
145 let (within, u_old, u_new) = pair_within_name(olds, news, similarity_method, language);
146 for (oi, ni) in within {
147 pairs.push(((*name).to_string(), oi, ni));
148 }
149 unmatched_old.extend(u_old.into_iter().map(|i| ((*name).to_string(), i)));
150 unmatched_new.extend(u_new.into_iter().map(|i| ((*name).to_string(), i)));
151 }
152 pairs.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.2.cmp(&b.2)));
153
154 let moved_function_names = stable_order_moved_names(&old_funcs, &new_funcs, &pairs);
155
156 let mut consumed_old: HashSet<InstanceRef> = HashSet::new();
158 for (new_name, ni) in &unmatched_new {
159 let new_func = &new_funcs[new_name][*ni];
160 let renamed_from = unmatched_old
161 .iter()
162 .filter(|(on, oi)| !consumed_old.contains(&(on.clone(), *oi)))
163 .filter(|(on, _)| on != new_name)
164 .filter_map(|(on, oi)| {
165 let old_func = &old_funcs[on][*oi];
166 let similarity = compute_similarity_with_language(
167 &normalized_function_for_matching(&old_func.content, on),
168 &normalized_function_for_matching(&new_func.content, new_name),
169 similarity_method,
170 language,
171 );
172 let same_location_update = old_path == new_path
173 && old_func.start_line.abs_diff(new_func.start_line) <= 5
174 && similarity >= 0.30;
175 (similarity >= FUNCTION_RENAME_SIMILARITY_THRESHOLD || same_location_update)
176 .then_some(((on.clone(), *oi), similarity))
177 })
178 .max_by(
179 |(left_key, left_similarity), (right_key, right_similarity)| {
180 left_similarity
181 .total_cmp(right_similarity)
182 .then_with(|| right_key.cmp(left_key))
183 },
184 )
185 .map(|(key, _)| key);
186
187 if let Some((old_name, old_idx)) = renamed_from {
188 consumed_old.insert((old_name.clone(), old_idx));
189 changes.push(SemanticChange::FunctionRenamed {
190 file: new_path.to_path_buf(),
191 old_name,
192 new_name: new_name.clone(),
193 importance: Some(ChangeImportance::Low),
194 });
195 file_modified = true;
196 } else {
197 let source = extraction_source(&old_funcs, new_func);
198 if let Some(source_name) = source {
199 changes.push(SemanticChange::FunctionExtracted {
200 file: new_path.to_path_buf(),
201 name: new_name.clone(),
202 source_file: Some(old_path.to_path_buf()),
203 source_name: Some(source_name),
204 importance: Some(ChangeImportance::High),
205 });
206 } else {
207 changes.push(SemanticChange::FunctionAdded {
208 file: new_path.to_path_buf(),
209 name: new_name.clone(),
210 importance: Some(ChangeImportance::High),
211 });
212 }
213 file_modified = true;
214 }
215 }
216
217 for (old_name, oi) in &unmatched_old {
219 if consumed_old.contains(&(old_name.clone(), *oi)) {
220 continue;
221 }
222 changes.push(SemanticChange::FunctionDeleted {
223 file: new_path.to_path_buf(),
224 name: old_name.clone(),
225 importance: Some(ChangeImportance::High),
226 });
227 file_modified = true;
228 }
229
230 for (name, oi, ni) in &pairs {
232 let old_func = &old_funcs[name][*oi];
233 let new_func = &new_funcs[name][*ni];
234 if old_func.signature != new_func.signature {
235 changes.push(SemanticChange::SignatureChanged {
236 file: new_path.to_path_buf(),
237 name: name.clone(),
238 old_signature: old_func.signature.clone(),
239 new_signature: new_func.signature.clone(),
240 importance: Some(ChangeImportance::Medium),
241 });
242 file_modified = true;
243 } else if old_path == new_path
244 && old_func.content == new_func.content
245 && old_func.start_line != new_func.start_line
246 && moved_function_names.contains(name)
247 {
248 changes.push(SemanticChange::FunctionMoved {
249 file: new_path.to_path_buf(),
250 name: name.clone(),
251 old_start_line: old_func.start_line,
252 new_start_line: new_func.start_line,
253 importance: Some(ChangeImportance::Low),
254 });
255 file_modified = true;
256 } else if old_func.content != new_func.content {
257 changes.push(SemanticChange::FunctionModified {
258 file: new_path.to_path_buf(),
259 name: name.clone(),
260 importance: Some(ChangeImportance::Medium),
261 });
262 file_modified = true;
263 }
264 }
265
266 if file_modified {
267 changes.push(SemanticChange::FileModified {
268 path: new_path.to_path_buf(),
269 classification: None,
270 importance: None,
271 confidence: None,
272 });
273 }
274
275 changes
276}
277
278fn extraction_source(old_funcs: &FunctionMap, extracted: &FunctionDef) -> Option<String> {
279 let extracted_lines = meaningful_body_lines(&extracted.content);
280 if extracted_lines.is_empty() {
281 return None;
282 }
283
284 old_funcs
285 .iter()
286 .flat_map(|(name, funcs)| funcs.iter().map(move |func| (name.clone(), func)))
287 .filter_map(|(name, old_func)| {
288 let old_lines = meaningful_body_lines(&old_func.content);
289 let evidence = extraction_evidence(&old_lines, &extracted_lines);
290 evidence.is_strong().then_some((name, evidence))
291 })
292 .max_by(|left, right| {
293 left.1
294 .score
295 .total_cmp(&right.1.score)
296 .then_with(|| left.1.matched.cmp(&right.1.matched))
297 .then_with(|| right.0.cmp(&left.0))
298 })
299 .map(|(name, _)| name)
300}
301
302#[derive(Debug)]
303struct ExtractionEvidence {
304 matched: usize,
305 score: f64,
306 exact_matches: usize,
307 longest_exact_expression_len: usize,
308 extracted_lines: usize,
309}
310
311impl ExtractionEvidence {
312 fn is_strong(&self) -> bool {
313 if self.extracted_lines == 0 {
314 return false;
315 }
316
317 let coverage = self.matched as f64 / self.extracted_lines as f64;
318 let weighted_coverage = self.score / self.extracted_lines as f64;
319
320 if self.extracted_lines == 1 {
321 return self.exact_matches == 1
322 && weighted_coverage >= 0.95
323 && self.longest_exact_expression_len >= 20;
324 }
325
326 coverage >= 0.70 && weighted_coverage >= 0.70
327 }
328}
329
330fn extraction_evidence(old_lines: &[String], extracted_lines: &[String]) -> ExtractionEvidence {
331 let mut matched = 0;
332 let mut score = 0.0;
333 let mut exact_matches = 0;
334 let mut longest_exact_expression_len = 0;
335
336 for line in extracted_lines {
337 let best = old_lines
338 .iter()
339 .map(|old_line| body_line_match(old_line, line))
340 .max_by(|left, right| left.score.total_cmp(&right.score))
341 .unwrap_or_default();
342 if best.score > 0.0 {
343 matched += 1;
344 score += best.score;
345 }
346 if best.score >= 1.0 {
347 exact_matches += 1;
348 longest_exact_expression_len = longest_exact_expression_len.max(best.expression_len);
349 }
350 }
351
352 ExtractionEvidence {
353 matched,
354 score,
355 exact_matches,
356 longest_exact_expression_len,
357 extracted_lines: extracted_lines.len(),
358 }
359}
360
361#[derive(Clone, Copy, Debug, Default)]
362struct BodyLineMatch {
363 score: f64,
364 expression_len: usize,
365}
366
367fn body_line_match(old_line: &str, extracted_line: &str) -> BodyLineMatch {
368 let old = comparable_body_expression(old_line);
369 let extracted = comparable_body_expression(extracted_line);
370 if old == extracted {
371 return BodyLineMatch {
372 score: 1.0,
373 expression_len: extracted.len(),
374 };
375 }
376 if extracted.len() >= 24 && old.contains(&extracted) {
377 return BodyLineMatch {
378 score: 0.75,
379 expression_len: extracted.len(),
380 };
381 }
382 if old.len() >= 24 && extracted.contains(&old) {
383 return BodyLineMatch {
384 score: 0.75,
385 expression_len: old.len(),
386 };
387 }
388 BodyLineMatch::default()
389}
390
391fn comparable_body_expression(line: &str) -> String {
392 let trimmed = line
393 .trim()
394 .trim_end_matches(';')
395 .trim_start_matches("return ")
396 .trim();
397 let expression = trimmed
398 .split_once('=')
399 .map(|(_, rhs)| rhs.trim())
400 .unwrap_or(trimmed);
401 expression.trim_end_matches(';').trim().to_string()
402}
403
404fn meaningful_body_lines(content: &str) -> Vec<String> {
405 content
406 .lines()
407 .map(str::trim)
408 .filter(|line| {
409 !line.is_empty()
410 && *line != "{"
411 && *line != "}"
412 && !line.starts_with("fn ")
413 && !line.starts_with("pub fn ")
414 && !line.starts_with("async fn ")
415 && !line.starts_with("pub async fn ")
416 })
417 .map(ToString::to_string)
418 .collect()
419}
420
421fn stable_order_moved_names(
422 old_funcs: &FunctionMap,
423 new_funcs: &FunctionMap,
424 pairs: &[(String, usize, usize)],
425) -> HashSet<String> {
426 let mut old_order: Vec<(usize, String)> = Vec::new();
427 let mut new_order: Vec<(usize, String)> = Vec::new();
428 for (name, oi, ni) in pairs {
429 let old_func = &old_funcs[name][*oi];
430 let new_func = &new_funcs[name][*ni];
431 if old_func.content == new_func.content {
432 old_order.push((old_func.start_line, name.clone()));
433 new_order.push((new_func.start_line, name.clone()));
434 }
435 }
436 old_order.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
437 new_order.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
438
439 let old_names: Vec<String> = old_order.into_iter().map(|(_, n)| n).collect();
440 let new_names: Vec<String> = new_order.into_iter().map(|(_, n)| n).collect();
441 if old_names == new_names {
442 return HashSet::new();
443 }
444 old_names
445 .into_iter()
446 .zip(new_names)
447 .filter_map(|(old_name, new_name)| (old_name != new_name).then_some([old_name, new_name]))
448 .flatten()
449 .collect()
450}
451
452fn normalized_function_for_matching(content: &str, name: &str) -> String {
453 content
454 .replace(name, "__function_name__")
455 .lines()
456 .map(str::trim)
457 .filter(|line| !line.is_empty())
458 .collect::<Vec<_>>()
459 .join("\n")
460}