Skip to main content

semantic/analysis/
analysis_functions.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Function-level semantic changes.
3
4use std::collections::{BTreeMap, BTreeSet, HashSet};
5
6use objects::object::{ChangeImportance, SemanticChange};
7
8use super::analysis_similarity::{SimilarityMethod, compute_similarity_with_language};
9use crate::parser::{FunctionDef, Language, ParsedFile};
10
11const FUNCTION_RENAME_SIMILARITY_THRESHOLD: f64 = 0.58;
12
13/// Detect function-level changes between two file versions.
14pub fn detect_function_changes(
15    old_path: &std::path::Path,
16    new_path: &std::path::Path,
17    old_content: &str,
18    new_content: &str,
19    similarity_method: SimilarityMethod,
20) -> Vec<SemanticChange> {
21    let old_parsed = ParsedFile::parse(old_content, Language::from_path(old_path));
22    let new_parsed = ParsedFile::parse(new_content, Language::from_path(new_path));
23
24    detect_function_changes_with_parsed(
25        old_path,
26        new_path,
27        old_parsed.as_ref(),
28        new_parsed.as_ref(),
29        similarity_method,
30    )
31}
32
33pub(crate) fn detect_function_changes_with_parsed(
34    old_path: &std::path::Path,
35    new_path: &std::path::Path,
36    old_parsed: Option<&ParsedFile>,
37    new_parsed: Option<&ParsedFile>,
38    similarity_method: SimilarityMethod,
39) -> Vec<SemanticChange> {
40    let mut changes = Vec::new();
41    let mut file_modified = false;
42
43    let old_funcs: BTreeMap<String, FunctionDef> = old_parsed
44        .map(|p| {
45            p.extract_functions()
46                .into_iter()
47                .map(|f| (f.name.clone(), f))
48                .collect()
49        })
50        .unwrap_or_default();
51
52    let new_funcs: BTreeMap<String, FunctionDef> = new_parsed
53        .map(|p| {
54            p.extract_functions()
55                .into_iter()
56                .map(|f| (f.name.clone(), f))
57                .collect()
58        })
59        .unwrap_or_default();
60
61    let removed_old_names: BTreeSet<_> = old_funcs
62        .keys()
63        .filter(|name| !new_funcs.contains_key(*name))
64        .cloned()
65        .collect();
66    let moved_function_names = stable_order_moved_names(&old_funcs, &new_funcs);
67    let mut matched_old_names = HashSet::new();
68
69    for (name, func) in &new_funcs {
70        if !old_funcs.contains_key(name) {
71            let renamed_from = removed_old_names
72                .iter()
73                .filter(|old_name| !matched_old_names.contains(old_name.as_str()))
74                .filter_map(|old_name| {
75                    let old_func = old_funcs.get(old_name)?;
76                    let similarity = compute_similarity_with_language(
77                        &normalized_function_for_matching(&old_func.content, old_name),
78                        &normalized_function_for_matching(&func.content, name),
79                        similarity_method,
80                        Language::from_path(new_path),
81                    );
82
83                    let same_location_update = old_path == new_path
84                        && old_func.start_line.abs_diff(func.start_line) <= 5
85                        && similarity >= 0.30;
86                    (similarity >= FUNCTION_RENAME_SIMILARITY_THRESHOLD || same_location_update)
87                        .then_some((old_name, similarity))
88                })
89                .max_by(
90                    |(left_name, left_similarity), (right_name, right_similarity)| {
91                        left_similarity
92                            .total_cmp(right_similarity)
93                            .then_with(|| right_name.cmp(left_name))
94                    },
95                )
96                .map(|(old_name, _)| old_name.clone());
97
98            if let Some(old_name) = renamed_from {
99                matched_old_names.insert(old_name.clone());
100                changes.push(SemanticChange::FunctionRenamed {
101                    file: new_path.to_path_buf(),
102                    old_name,
103                    new_name: name.clone(),
104                    importance: Some(ChangeImportance::Low),
105                });
106                file_modified = true;
107            } else {
108                let source = extraction_source(&old_funcs, func);
109                if let Some(source_name) = source {
110                    changes.push(SemanticChange::FunctionExtracted {
111                        file: new_path.to_path_buf(),
112                        name: name.clone(),
113                        source_file: Some(old_path.to_path_buf()),
114                        source_name: Some(source_name),
115                        importance: Some(ChangeImportance::High),
116                    });
117                } else {
118                    changes.push(SemanticChange::FunctionAdded {
119                        file: new_path.to_path_buf(),
120                        name: name.clone(),
121                        importance: Some(ChangeImportance::High),
122                    });
123                }
124                file_modified = true;
125            }
126        }
127    }
128
129    for name in removed_old_names {
130        if !changes.iter().any(
131            |c| matches!(c, SemanticChange::FunctionRenamed { old_name, .. } if old_name == &name),
132        ) {
133            changes.push(SemanticChange::FunctionDeleted {
134                file: new_path.to_path_buf(),
135                name,
136                importance: Some(ChangeImportance::High),
137            });
138            file_modified = true;
139        }
140    }
141
142    for (name, new_func) in &new_funcs {
143        if let Some(old_func) = old_funcs.get(name)
144            && old_func.signature != new_func.signature
145        {
146            changes.push(SemanticChange::SignatureChanged {
147                file: new_path.to_path_buf(),
148                name: name.clone(),
149                old_signature: old_func.signature.clone(),
150                new_signature: new_func.signature.clone(),
151                importance: Some(ChangeImportance::Medium),
152            });
153            file_modified = true;
154        } else if let Some(old_func) = old_funcs.get(name) {
155            if old_path == new_path
156                && old_func.content == new_func.content
157                && old_func.start_line != new_func.start_line
158                && moved_function_names.contains(name)
159            {
160                changes.push(SemanticChange::FunctionMoved {
161                    file: new_path.to_path_buf(),
162                    name: name.clone(),
163                    old_start_line: old_func.start_line,
164                    new_start_line: new_func.start_line,
165                    importance: Some(ChangeImportance::Low),
166                });
167                file_modified = true;
168            } else if old_func.content != new_func.content {
169                changes.push(SemanticChange::FunctionModified {
170                    file: new_path.to_path_buf(),
171                    name: name.clone(),
172                    importance: Some(ChangeImportance::Medium),
173                });
174                file_modified = true;
175            }
176        }
177    }
178
179    if file_modified {
180        changes.push(SemanticChange::FileModified {
181            path: new_path.to_path_buf(),
182            classification: None,
183            importance: None,
184            confidence: None,
185        });
186    }
187
188    changes
189}
190
191fn extraction_source(
192    old_funcs: &BTreeMap<String, FunctionDef>,
193    extracted: &FunctionDef,
194) -> Option<String> {
195    let extracted_lines = meaningful_body_lines(&extracted.content);
196    if extracted_lines.is_empty() {
197        return None;
198    }
199
200    old_funcs
201        .iter()
202        .filter_map(|(name, old_func)| {
203            let old_lines = meaningful_body_lines(&old_func.content);
204            let evidence = extraction_evidence(&old_lines, &extracted_lines);
205            evidence.is_strong().then_some((name.clone(), evidence))
206        })
207        .max_by(|left, right| {
208            left.1
209                .score
210                .total_cmp(&right.1.score)
211                .then_with(|| left.1.matched.cmp(&right.1.matched))
212                .then_with(|| right.0.cmp(&left.0))
213        })
214        .map(|(name, _)| name)
215}
216
217#[derive(Debug)]
218struct ExtractionEvidence {
219    matched: usize,
220    score: f64,
221    exact_matches: usize,
222    longest_exact_expression_len: usize,
223    extracted_lines: usize,
224}
225
226impl ExtractionEvidence {
227    fn is_strong(&self) -> bool {
228        if self.extracted_lines == 0 {
229            return false;
230        }
231
232        let coverage = self.matched as f64 / self.extracted_lines as f64;
233        let weighted_coverage = self.score / self.extracted_lines as f64;
234
235        if self.extracted_lines == 1 {
236            return self.exact_matches == 1
237                && weighted_coverage >= 0.95
238                && self.longest_exact_expression_len >= 20;
239        }
240
241        coverage >= 0.70 && weighted_coverage >= 0.70
242    }
243}
244
245fn extraction_evidence(old_lines: &[String], extracted_lines: &[String]) -> ExtractionEvidence {
246    let mut matched = 0;
247    let mut score = 0.0;
248    let mut exact_matches = 0;
249    let mut longest_exact_expression_len = 0;
250
251    for line in extracted_lines {
252        let best = old_lines
253            .iter()
254            .map(|old_line| body_line_match(old_line, line))
255            .max_by(|left, right| left.score.total_cmp(&right.score))
256            .unwrap_or_default();
257        if best.score > 0.0 {
258            matched += 1;
259            score += best.score;
260        }
261        if best.score >= 1.0 {
262            exact_matches += 1;
263            longest_exact_expression_len = longest_exact_expression_len.max(best.expression_len);
264        }
265    }
266
267    ExtractionEvidence {
268        matched,
269        score,
270        exact_matches,
271        longest_exact_expression_len,
272        extracted_lines: extracted_lines.len(),
273    }
274}
275
276#[derive(Clone, Copy, Debug, Default)]
277struct BodyLineMatch {
278    score: f64,
279    expression_len: usize,
280}
281
282fn body_line_match(old_line: &str, extracted_line: &str) -> BodyLineMatch {
283    let old = comparable_body_expression(old_line);
284    let extracted = comparable_body_expression(extracted_line);
285    if old == extracted {
286        return BodyLineMatch {
287            score: 1.0,
288            expression_len: extracted.len(),
289        };
290    }
291    if extracted.len() >= 24 && old.contains(&extracted) {
292        return BodyLineMatch {
293            score: 0.75,
294            expression_len: extracted.len(),
295        };
296    }
297    if old.len() >= 24 && extracted.contains(&old) {
298        return BodyLineMatch {
299            score: 0.75,
300            expression_len: old.len(),
301        };
302    }
303    BodyLineMatch::default()
304}
305
306fn comparable_body_expression(line: &str) -> String {
307    let trimmed = line
308        .trim()
309        .trim_end_matches(';')
310        .trim_start_matches("return ")
311        .trim();
312    let expression = trimmed
313        .split_once('=')
314        .map(|(_, rhs)| rhs.trim())
315        .unwrap_or(trimmed);
316    expression.trim_end_matches(';').trim().to_string()
317}
318
319fn meaningful_body_lines(content: &str) -> Vec<String> {
320    content
321        .lines()
322        .map(str::trim)
323        .filter(|line| {
324            !line.is_empty()
325                && *line != "{"
326                && *line != "}"
327                && !line.starts_with("fn ")
328                && !line.starts_with("pub fn ")
329                && !line.starts_with("async fn ")
330                && !line.starts_with("pub async fn ")
331        })
332        .map(ToString::to_string)
333        .collect()
334}
335
336fn stable_order_moved_names(
337    old_funcs: &BTreeMap<String, FunctionDef>,
338    new_funcs: &BTreeMap<String, FunctionDef>,
339) -> HashSet<String> {
340    let mut old_order = stable_function_order(old_funcs, new_funcs, true);
341    let mut new_order = stable_function_order(old_funcs, new_funcs, false);
342
343    if old_order == new_order {
344        return HashSet::new();
345    }
346
347    old_order
348        .drain(..)
349        .zip(new_order.drain(..))
350        .filter_map(|(old_name, new_name)| (old_name != new_name).then_some([old_name, new_name]))
351        .flatten()
352        .collect()
353}
354
355fn stable_function_order(
356    old_funcs: &BTreeMap<String, FunctionDef>,
357    new_funcs: &BTreeMap<String, FunctionDef>,
358    use_old_position: bool,
359) -> Vec<String> {
360    let mut ordered = old_funcs
361        .iter()
362        .filter_map(|(name, old_func)| {
363            let new_func = new_funcs.get(name)?;
364            (old_func.content == new_func.content).then_some((
365                if use_old_position {
366                    old_func.start_line
367                } else {
368                    new_func.start_line
369                },
370                name.clone(),
371            ))
372        })
373        .collect::<Vec<_>>();
374    ordered.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
375    ordered.into_iter().map(|(_, name)| name).collect()
376}
377
378fn normalized_function_for_matching(content: &str, name: &str) -> String {
379    content
380        .replace(name, "__function_name__")
381        .lines()
382        .map(str::trim)
383        .filter(|line| !line.is_empty())
384        .collect::<Vec<_>>()
385        .join("\n")
386}