1use std::collections::{BTreeMap, BTreeSet, HashSet};
5
6use objects::object::{ChangeImportance, SemanticChange};
7
8use super::analysis_similarity::{SimilarityMethod, compute_similarity_with_language};
9use crate::parser::{FunctionDef, Language, ParsedFile};
10
11const FUNCTION_RENAME_SIMILARITY_THRESHOLD: f64 = 0.58;
12
13pub fn detect_function_changes(
15 old_path: &std::path::Path,
16 new_path: &std::path::Path,
17 old_content: &str,
18 new_content: &str,
19 similarity_method: SimilarityMethod,
20) -> Vec<SemanticChange> {
21 let old_parsed = ParsedFile::parse(old_content, Language::from_path(old_path));
22 let new_parsed = ParsedFile::parse(new_content, Language::from_path(new_path));
23
24 detect_function_changes_with_parsed(
25 old_path,
26 new_path,
27 old_parsed.as_ref(),
28 new_parsed.as_ref(),
29 similarity_method,
30 )
31}
32
33pub(crate) fn detect_function_changes_with_parsed(
34 old_path: &std::path::Path,
35 new_path: &std::path::Path,
36 old_parsed: Option<&ParsedFile>,
37 new_parsed: Option<&ParsedFile>,
38 similarity_method: SimilarityMethod,
39) -> Vec<SemanticChange> {
40 let mut changes = Vec::new();
41 let mut file_modified = false;
42
43 let old_funcs: BTreeMap<String, FunctionDef> = old_parsed
44 .map(|p| {
45 p.extract_functions()
46 .into_iter()
47 .map(|f| (f.name.clone(), f))
48 .collect()
49 })
50 .unwrap_or_default();
51
52 let new_funcs: BTreeMap<String, FunctionDef> = new_parsed
53 .map(|p| {
54 p.extract_functions()
55 .into_iter()
56 .map(|f| (f.name.clone(), f))
57 .collect()
58 })
59 .unwrap_or_default();
60
61 let removed_old_names: BTreeSet<_> = old_funcs
62 .keys()
63 .filter(|name| !new_funcs.contains_key(*name))
64 .cloned()
65 .collect();
66 let moved_function_names = stable_order_moved_names(&old_funcs, &new_funcs);
67 let mut matched_old_names = HashSet::new();
68
69 for (name, func) in &new_funcs {
70 if !old_funcs.contains_key(name) {
71 let renamed_from = removed_old_names
72 .iter()
73 .filter(|old_name| !matched_old_names.contains(old_name.as_str()))
74 .filter_map(|old_name| {
75 let old_func = old_funcs.get(old_name)?;
76 let similarity = compute_similarity_with_language(
77 &normalized_function_for_matching(&old_func.content, old_name),
78 &normalized_function_for_matching(&func.content, name),
79 similarity_method,
80 Language::from_path(new_path),
81 );
82
83 let same_location_update = old_path == new_path
84 && old_func.start_line.abs_diff(func.start_line) <= 5
85 && similarity >= 0.30;
86 (similarity >= FUNCTION_RENAME_SIMILARITY_THRESHOLD || same_location_update)
87 .then_some((old_name, similarity))
88 })
89 .max_by(
90 |(left_name, left_similarity), (right_name, right_similarity)| {
91 left_similarity
92 .total_cmp(right_similarity)
93 .then_with(|| right_name.cmp(left_name))
94 },
95 )
96 .map(|(old_name, _)| old_name.clone());
97
98 if let Some(old_name) = renamed_from {
99 matched_old_names.insert(old_name.clone());
100 changes.push(SemanticChange::FunctionRenamed {
101 file: new_path.to_path_buf(),
102 old_name,
103 new_name: name.clone(),
104 importance: Some(ChangeImportance::Low),
105 });
106 file_modified = true;
107 } else {
108 let source = extraction_source(&old_funcs, func);
109 if let Some(source_name) = source {
110 changes.push(SemanticChange::FunctionExtracted {
111 file: new_path.to_path_buf(),
112 name: name.clone(),
113 source_file: Some(old_path.to_path_buf()),
114 source_name: Some(source_name),
115 importance: Some(ChangeImportance::High),
116 });
117 } else {
118 changes.push(SemanticChange::FunctionAdded {
119 file: new_path.to_path_buf(),
120 name: name.clone(),
121 importance: Some(ChangeImportance::High),
122 });
123 }
124 file_modified = true;
125 }
126 }
127 }
128
129 for name in removed_old_names {
130 if !changes.iter().any(
131 |c| matches!(c, SemanticChange::FunctionRenamed { old_name, .. } if old_name == &name),
132 ) {
133 changes.push(SemanticChange::FunctionDeleted {
134 file: new_path.to_path_buf(),
135 name,
136 importance: Some(ChangeImportance::High),
137 });
138 file_modified = true;
139 }
140 }
141
142 for (name, new_func) in &new_funcs {
143 if let Some(old_func) = old_funcs.get(name)
144 && old_func.signature != new_func.signature
145 {
146 changes.push(SemanticChange::SignatureChanged {
147 file: new_path.to_path_buf(),
148 name: name.clone(),
149 old_signature: old_func.signature.clone(),
150 new_signature: new_func.signature.clone(),
151 importance: Some(ChangeImportance::Medium),
152 });
153 file_modified = true;
154 } else if let Some(old_func) = old_funcs.get(name) {
155 if old_path == new_path
156 && old_func.content == new_func.content
157 && old_func.start_line != new_func.start_line
158 && moved_function_names.contains(name)
159 {
160 changes.push(SemanticChange::FunctionMoved {
161 file: new_path.to_path_buf(),
162 name: name.clone(),
163 old_start_line: old_func.start_line,
164 new_start_line: new_func.start_line,
165 importance: Some(ChangeImportance::Low),
166 });
167 file_modified = true;
168 } else if old_func.content != new_func.content {
169 changes.push(SemanticChange::FunctionModified {
170 file: new_path.to_path_buf(),
171 name: name.clone(),
172 importance: Some(ChangeImportance::Medium),
173 });
174 file_modified = true;
175 }
176 }
177 }
178
179 if file_modified {
180 changes.push(SemanticChange::FileModified {
181 path: new_path.to_path_buf(),
182 classification: None,
183 importance: None,
184 confidence: None,
185 });
186 }
187
188 changes
189}
190
191fn extraction_source(
192 old_funcs: &BTreeMap<String, FunctionDef>,
193 extracted: &FunctionDef,
194) -> Option<String> {
195 let extracted_lines = meaningful_body_lines(&extracted.content);
196 if extracted_lines.is_empty() {
197 return None;
198 }
199
200 old_funcs
201 .iter()
202 .filter_map(|(name, old_func)| {
203 let old_lines = meaningful_body_lines(&old_func.content);
204 let evidence = extraction_evidence(&old_lines, &extracted_lines);
205 evidence.is_strong().then_some((name.clone(), evidence))
206 })
207 .max_by(|left, right| {
208 left.1
209 .score
210 .total_cmp(&right.1.score)
211 .then_with(|| left.1.matched.cmp(&right.1.matched))
212 .then_with(|| right.0.cmp(&left.0))
213 })
214 .map(|(name, _)| name)
215}
216
217#[derive(Debug)]
218struct ExtractionEvidence {
219 matched: usize,
220 score: f64,
221 exact_matches: usize,
222 longest_exact_expression_len: usize,
223 extracted_lines: usize,
224}
225
226impl ExtractionEvidence {
227 fn is_strong(&self) -> bool {
228 if self.extracted_lines == 0 {
229 return false;
230 }
231
232 let coverage = self.matched as f64 / self.extracted_lines as f64;
233 let weighted_coverage = self.score / self.extracted_lines as f64;
234
235 if self.extracted_lines == 1 {
236 return self.exact_matches == 1
237 && weighted_coverage >= 0.95
238 && self.longest_exact_expression_len >= 20;
239 }
240
241 coverage >= 0.70 && weighted_coverage >= 0.70
242 }
243}
244
245fn extraction_evidence(old_lines: &[String], extracted_lines: &[String]) -> ExtractionEvidence {
246 let mut matched = 0;
247 let mut score = 0.0;
248 let mut exact_matches = 0;
249 let mut longest_exact_expression_len = 0;
250
251 for line in extracted_lines {
252 let best = old_lines
253 .iter()
254 .map(|old_line| body_line_match(old_line, line))
255 .max_by(|left, right| left.score.total_cmp(&right.score))
256 .unwrap_or_default();
257 if best.score > 0.0 {
258 matched += 1;
259 score += best.score;
260 }
261 if best.score >= 1.0 {
262 exact_matches += 1;
263 longest_exact_expression_len = longest_exact_expression_len.max(best.expression_len);
264 }
265 }
266
267 ExtractionEvidence {
268 matched,
269 score,
270 exact_matches,
271 longest_exact_expression_len,
272 extracted_lines: extracted_lines.len(),
273 }
274}
275
276#[derive(Clone, Copy, Debug, Default)]
277struct BodyLineMatch {
278 score: f64,
279 expression_len: usize,
280}
281
282fn body_line_match(old_line: &str, extracted_line: &str) -> BodyLineMatch {
283 let old = comparable_body_expression(old_line);
284 let extracted = comparable_body_expression(extracted_line);
285 if old == extracted {
286 return BodyLineMatch {
287 score: 1.0,
288 expression_len: extracted.len(),
289 };
290 }
291 if extracted.len() >= 24 && old.contains(&extracted) {
292 return BodyLineMatch {
293 score: 0.75,
294 expression_len: extracted.len(),
295 };
296 }
297 if old.len() >= 24 && extracted.contains(&old) {
298 return BodyLineMatch {
299 score: 0.75,
300 expression_len: old.len(),
301 };
302 }
303 BodyLineMatch::default()
304}
305
306fn comparable_body_expression(line: &str) -> String {
307 let trimmed = line
308 .trim()
309 .trim_end_matches(';')
310 .trim_start_matches("return ")
311 .trim();
312 let expression = trimmed
313 .split_once('=')
314 .map(|(_, rhs)| rhs.trim())
315 .unwrap_or(trimmed);
316 expression.trim_end_matches(';').trim().to_string()
317}
318
319fn meaningful_body_lines(content: &str) -> Vec<String> {
320 content
321 .lines()
322 .map(str::trim)
323 .filter(|line| {
324 !line.is_empty()
325 && *line != "{"
326 && *line != "}"
327 && !line.starts_with("fn ")
328 && !line.starts_with("pub fn ")
329 && !line.starts_with("async fn ")
330 && !line.starts_with("pub async fn ")
331 })
332 .map(ToString::to_string)
333 .collect()
334}
335
336fn stable_order_moved_names(
337 old_funcs: &BTreeMap<String, FunctionDef>,
338 new_funcs: &BTreeMap<String, FunctionDef>,
339) -> HashSet<String> {
340 let mut old_order = stable_function_order(old_funcs, new_funcs, true);
341 let mut new_order = stable_function_order(old_funcs, new_funcs, false);
342
343 if old_order == new_order {
344 return HashSet::new();
345 }
346
347 old_order
348 .drain(..)
349 .zip(new_order.drain(..))
350 .filter_map(|(old_name, new_name)| (old_name != new_name).then_some([old_name, new_name]))
351 .flatten()
352 .collect()
353}
354
355fn stable_function_order(
356 old_funcs: &BTreeMap<String, FunctionDef>,
357 new_funcs: &BTreeMap<String, FunctionDef>,
358 use_old_position: bool,
359) -> Vec<String> {
360 let mut ordered = old_funcs
361 .iter()
362 .filter_map(|(name, old_func)| {
363 let new_func = new_funcs.get(name)?;
364 (old_func.content == new_func.content).then_some((
365 if use_old_position {
366 old_func.start_line
367 } else {
368 new_func.start_line
369 },
370 name.clone(),
371 ))
372 })
373 .collect::<Vec<_>>();
374 ordered.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
375 ordered.into_iter().map(|(_, name)| name).collect()
376}
377
378fn normalized_function_for_matching(content: &str, name: &str) -> String {
379 content
380 .replace(name, "__function_name__")
381 .lines()
382 .map(str::trim)
383 .filter(|line| !line.is_empty())
384 .collect::<Vec<_>>()
385 .join("\n")
386}