1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6pub struct MatchResult {
7 pub changes: Vec<SemanticChange>,
8}
9
10pub fn match_entities(
15 before: &[SemanticEntity],
16 after: &[SemanticEntity],
17 _file_path: &str,
18 similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
19 commit_sha: Option<&str>,
20 author: Option<&str>,
21) -> MatchResult {
22 let mut changes: Vec<SemanticChange> = Vec::new();
23 let mut matched_before: HashSet<&str> = HashSet::new();
24 let mut matched_after: HashSet<&str> = HashSet::new();
25
26 let before_by_id: HashMap<&str, &SemanticEntity> =
27 before.iter().map(|e| (e.id.as_str(), e)).collect();
28 let after_by_id: HashMap<&str, &SemanticEntity> =
29 after.iter().map(|e| (e.id.as_str(), e)).collect();
30
31 for (&id, after_entity) in &after_by_id {
33 if let Some(before_entity) = before_by_id.get(id) {
34 matched_before.insert(id);
35 matched_after.insert(id);
36
37 if before_entity.content_hash != after_entity.content_hash {
38 let structural_change = match (&before_entity.structural_hash, &after_entity.structural_hash) {
39 (Some(before_sh), Some(after_sh)) => Some(before_sh != after_sh),
40 _ => None,
41 };
42 changes.push(SemanticChange {
43 id: format!("change::{id}"),
44 entity_id: id.to_string(),
45 change_type: ChangeType::Modified,
46 entity_type: after_entity.entity_type.clone(),
47 entity_name: after_entity.name.clone(),
48 file_path: after_entity.file_path.clone(),
49 old_entity_name: None,
50 old_file_path: None,
51 before_content: Some(before_entity.content.clone()),
52 after_content: Some(after_entity.content.clone()),
53 commit_sha: commit_sha.map(String::from),
54 author: author.map(String::from),
55 timestamp: None,
56 structural_change,
57 });
58 }
59 }
60 }
61
62 let unmatched_before: Vec<&SemanticEntity> = before
64 .iter()
65 .filter(|e| !matched_before.contains(e.id.as_str()))
66 .collect();
67 let unmatched_after: Vec<&SemanticEntity> = after
68 .iter()
69 .filter(|e| !matched_after.contains(e.id.as_str()))
70 .collect();
71
72 let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
74 let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
75 for entity in &unmatched_before {
76 before_by_hash
77 .entry(entity.content_hash.as_str())
78 .or_default()
79 .push(entity);
80 if let Some(ref sh) = entity.structural_hash {
81 before_by_structural
82 .entry(sh.as_str())
83 .or_default()
84 .push(entity);
85 }
86 }
87
88 for after_entity in &unmatched_after {
89 if matched_after.contains(after_entity.id.as_str()) {
90 continue;
91 }
92 let found = before_by_hash
94 .get_mut(after_entity.content_hash.as_str())
95 .and_then(|c| c.pop());
96 let found = found.or_else(|| {
98 after_entity.structural_hash.as_ref().and_then(|sh| {
99 before_by_structural.get_mut(sh.as_str()).and_then(|c| {
100 c.iter()
101 .position(|e| !matched_before.contains(e.id.as_str()))
102 .map(|i| c.remove(i))
103 })
104 })
105 });
106
107 if let Some(before_entity) = found {
108 matched_before.insert(&before_entity.id);
109 matched_after.insert(&after_entity.id);
110
111 if before_entity.name == after_entity.name
114 && before_entity.file_path == after_entity.file_path
115 && before_entity.content_hash == after_entity.content_hash
116 {
117 continue;
118 }
119
120 let change_type = if before_entity.file_path != after_entity.file_path {
121 ChangeType::Moved
122 } else {
123 ChangeType::Renamed
124 };
125
126 let old_file_path = if before_entity.file_path != after_entity.file_path {
127 Some(before_entity.file_path.clone())
128 } else {
129 None
130 };
131
132 let old_entity_name = if before_entity.name != after_entity.name {
133 Some(before_entity.name.clone())
134 } else {
135 None
136 };
137
138 changes.push(SemanticChange {
139 id: format!("change::{}", after_entity.id),
140 entity_id: after_entity.id.clone(),
141 change_type,
142 entity_type: after_entity.entity_type.clone(),
143 entity_name: after_entity.name.clone(),
144 file_path: after_entity.file_path.clone(),
145 old_entity_name,
146 old_file_path,
147 before_content: Some(before_entity.content.clone()),
148 after_content: Some(after_entity.content.clone()),
149 commit_sha: commit_sha.map(String::from),
150 author: author.map(String::from),
151 timestamp: None,
152 structural_change: None,
153 });
154 }
155 }
156
157 let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
159 .iter()
160 .filter(|e| !matched_before.contains(e.id.as_str()))
161 .copied()
162 .collect();
163 let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
164 .iter()
165 .filter(|e| !matched_after.contains(e.id.as_str()))
166 .copied()
167 .collect();
168
169 if let Some(sim_fn) = similarity_fn {
170 if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
171 const THRESHOLD: f64 = 0.8;
172 const SIZE_RATIO_CUTOFF: f64 = 0.5;
174
175 let before_lens: Vec<usize> = still_unmatched_before
177 .iter()
178 .map(|e| e.content.split_whitespace().count())
179 .collect();
180 let after_lens: Vec<usize> = still_unmatched_after
181 .iter()
182 .map(|e| e.content.split_whitespace().count())
183 .collect();
184
185 for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
186 let mut best_match: Option<&SemanticEntity> = None;
187 let mut best_score: f64 = 0.0;
188 let a_len = after_lens[ai];
189
190 for (bi, before_entity) in still_unmatched_before.iter().enumerate() {
191 if matched_before.contains(before_entity.id.as_str()) {
192 continue;
193 }
194 if before_entity.entity_type != after_entity.entity_type {
195 continue;
196 }
197
198 let b_len = before_lens[bi];
200 let (min_l, max_l) = if a_len < b_len { (a_len, b_len) } else { (b_len, a_len) };
201 if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
202 continue;
203 }
204
205 let score = sim_fn(before_entity, after_entity);
206 if score > best_score && score >= THRESHOLD {
207 best_score = score;
208 best_match = Some(before_entity);
209 }
210 }
211
212 if let Some(matched) = best_match {
213 matched_before.insert(&matched.id);
214 matched_after.insert(&after_entity.id);
215
216 if matched.name == after_entity.name
218 && matched.file_path == after_entity.file_path
219 && matched.content_hash == after_entity.content_hash
220 {
221 continue;
222 }
223
224 let change_type = if matched.file_path != after_entity.file_path {
225 ChangeType::Moved
226 } else {
227 ChangeType::Renamed
228 };
229
230 let old_file_path = if matched.file_path != after_entity.file_path {
231 Some(matched.file_path.clone())
232 } else {
233 None
234 };
235
236 let old_entity_name = if matched.name != after_entity.name {
237 Some(matched.name.clone())
238 } else {
239 None
240 };
241
242 changes.push(SemanticChange {
243 id: format!("change::{}", after_entity.id),
244 entity_id: after_entity.id.clone(),
245 change_type,
246 entity_type: after_entity.entity_type.clone(),
247 entity_name: after_entity.name.clone(),
248 file_path: after_entity.file_path.clone(),
249 old_entity_name,
250 old_file_path,
251 before_content: Some(matched.content.clone()),
252 after_content: Some(after_entity.content.clone()),
253 commit_sha: commit_sha.map(String::from),
254 author: author.map(String::from),
255 timestamp: None,
256 structural_change: None,
257 });
258 }
259 }
260 }
261 }
262
263 for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
265 changes.push(SemanticChange {
266 id: format!("change::deleted::{}", entity.id),
267 entity_id: entity.id.clone(),
268 change_type: ChangeType::Deleted,
269 entity_type: entity.entity_type.clone(),
270 entity_name: entity.name.clone(),
271 file_path: entity.file_path.clone(),
272 old_entity_name: None,
273 old_file_path: None,
274 before_content: Some(entity.content.clone()),
275 after_content: None,
276 commit_sha: commit_sha.map(String::from),
277 author: author.map(String::from),
278 timestamp: None,
279 structural_change: None,
280 });
281 }
282
283 for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
285 changes.push(SemanticChange {
286 id: format!("change::added::{}", entity.id),
287 entity_id: entity.id.clone(),
288 change_type: ChangeType::Added,
289 entity_type: entity.entity_type.clone(),
290 entity_name: entity.name.clone(),
291 file_path: entity.file_path.clone(),
292 old_entity_name: None,
293 old_file_path: None,
294 before_content: None,
295 after_content: Some(entity.content.clone()),
296 commit_sha: commit_sha.map(String::from),
297 author: author.map(String::from),
298 timestamp: None,
299 structural_change: None,
300 });
301 }
302
303 MatchResult { changes }
304}
305
306pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
308 let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
309 let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
310
311 let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
313 (tokens_a.len(), tokens_b.len())
314 } else {
315 (tokens_b.len(), tokens_a.len())
316 };
317 if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
318 return 0.0;
319 }
320
321 let set_a: HashSet<&str> = tokens_a.into_iter().collect();
322 let set_b: HashSet<&str> = tokens_b.into_iter().collect();
323
324 let intersection_size = set_a.intersection(&set_b).count();
325 let union_size = set_a.union(&set_b).count();
326
327 if union_size == 0 {
328 return 0.0;
329 }
330
331 intersection_size as f64 / union_size as f64
332}
333
334#[cfg(test)]
335mod tests {
336 use super::*;
337 use crate::utils::hash::content_hash;
338
339 fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
340 SemanticEntity {
341 id: id.to_string(),
342 file_path: file_path.to_string(),
343 entity_type: "function".to_string(),
344 name: name.to_string(),
345 parent_id: None,
346 content: content.to_string(),
347 content_hash: content_hash(content),
348 structural_hash: None,
349 start_line: 1,
350 end_line: 1,
351 metadata: None,
352 }
353 }
354
355 #[test]
356 fn test_exact_match_modified() {
357 let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
358 let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
359 let result = match_entities(&before, &after, "a.ts", None, None, None);
360 assert_eq!(result.changes.len(), 1);
361 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
362 }
363
364 #[test]
365 fn test_exact_match_unchanged() {
366 let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
367 let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
368 let result = match_entities(&before, &after, "a.ts", None, None, None);
369 assert_eq!(result.changes.len(), 0);
370 }
371
372 #[test]
373 fn test_added_deleted() {
374 let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
375 let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
376 let result = match_entities(&before, &after, "a.ts", None, None, None);
377 assert_eq!(result.changes.len(), 2);
378 let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
379 assert!(types.contains(&ChangeType::Deleted));
380 assert!(types.contains(&ChangeType::Added));
381 }
382
383 #[test]
384 fn test_content_hash_rename() {
385 let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
386 let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
387 let result = match_entities(&before, &after, "a.ts", None, None, None);
388 assert_eq!(result.changes.len(), 1);
389 assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
390 }
391
392 #[test]
393 fn test_default_similarity() {
394 let a = make_entity("a", "a", "the quick brown fox", "a.ts");
395 let b = make_entity("b", "b", "the quick brown dog", "a.ts");
396 let score = default_similarity(&a, &b);
397 assert!(score > 0.5);
398 assert!(score < 1.0);
399 }
400}