1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6pub struct MatchResult {
7 pub changes: Vec<SemanticChange>,
8}
9
10pub fn match_entities(
15 before: &[SemanticEntity],
16 after: &[SemanticEntity],
17 _file_path: &str,
18 _similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
19 commit_sha: Option<&str>,
20 author: Option<&str>,
21) -> MatchResult {
22 let mut changes: Vec<SemanticChange> = Vec::new();
23 let mut matched_before: HashSet<&str> = HashSet::new();
24 let mut matched_after: HashSet<&str> = HashSet::new();
25
26 let before_by_id: HashMap<&str, &SemanticEntity> =
27 before.iter().map(|e| (e.id.as_str(), e)).collect();
28 let after_by_id: HashMap<&str, &SemanticEntity> =
29 after.iter().map(|e| (e.id.as_str(), e)).collect();
30
31 for (&id, after_entity) in &after_by_id {
33 if let Some(before_entity) = before_by_id.get(id) {
34 matched_before.insert(id);
35 matched_after.insert(id);
36
37 if before_entity.content_hash != after_entity.content_hash {
38 let structural_change = match (&before_entity.structural_hash, &after_entity.structural_hash) {
39 (Some(before_sh), Some(after_sh)) => Some(before_sh != after_sh),
40 _ => None,
41 };
42 changes.push(SemanticChange {
43 id: format!("change::{id}"),
44 entity_id: id.to_string(),
45 change_type: ChangeType::Modified,
46 entity_type: after_entity.entity_type.clone(),
47 entity_name: after_entity.name.clone(),
48 entity_line: after_entity.start_line,
49 file_path: after_entity.file_path.clone(),
50 old_entity_name: None,
51 old_file_path: None,
52 before_content: Some(before_entity.content.clone()),
53 after_content: Some(after_entity.content.clone()),
54 commit_sha: commit_sha.map(String::from),
55 author: author.map(String::from),
56 timestamp: None,
57 structural_change,
58 });
59 }
60 }
61 }
62
63 let unmatched_before: Vec<&SemanticEntity> = before
65 .iter()
66 .filter(|e| !matched_before.contains(e.id.as_str()))
67 .collect();
68 let unmatched_after: Vec<&SemanticEntity> = after
69 .iter()
70 .filter(|e| !matched_after.contains(e.id.as_str()))
71 .collect();
72
73 let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
75 let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
76 for entity in &unmatched_before {
77 before_by_hash
78 .entry(entity.content_hash.as_str())
79 .or_default()
80 .push(entity);
81 if let Some(ref sh) = entity.structural_hash {
82 before_by_structural
83 .entry(sh.as_str())
84 .or_default()
85 .push(entity);
86 }
87 }
88
89 for after_entity in &unmatched_after {
90 if matched_after.contains(after_entity.id.as_str()) {
91 continue;
92 }
93 let found = before_by_hash
95 .get_mut(after_entity.content_hash.as_str())
96 .and_then(|c| c.pop());
97 let found = found.or_else(|| {
99 after_entity.structural_hash.as_ref().and_then(|sh| {
100 before_by_structural.get_mut(sh.as_str()).and_then(|c| {
101 c.iter()
102 .position(|e| !matched_before.contains(e.id.as_str()))
103 .map(|i| c.remove(i))
104 })
105 })
106 });
107
108 if let Some(before_entity) = found {
109 matched_before.insert(&before_entity.id);
110 matched_after.insert(&after_entity.id);
111
112 if before_entity.name == after_entity.name
115 && before_entity.file_path == after_entity.file_path
116 && before_entity.content_hash == after_entity.content_hash
117 {
118 continue;
119 }
120
121 let change_type = if before_entity.file_path != after_entity.file_path {
122 ChangeType::Moved
123 } else {
124 ChangeType::Renamed
125 };
126
127 let old_file_path = if before_entity.file_path != after_entity.file_path {
128 Some(before_entity.file_path.clone())
129 } else {
130 None
131 };
132
133 let old_entity_name = if before_entity.name != after_entity.name {
134 Some(before_entity.name.clone())
135 } else {
136 None
137 };
138
139 changes.push(SemanticChange {
140 id: format!("change::{}", after_entity.id),
141 entity_id: after_entity.id.clone(),
142 change_type,
143 entity_type: after_entity.entity_type.clone(),
144 entity_name: after_entity.name.clone(),
145 entity_line: after_entity.start_line,
146 file_path: after_entity.file_path.clone(),
147 old_entity_name,
148 old_file_path,
149 before_content: Some(before_entity.content.clone()),
150 after_content: Some(after_entity.content.clone()),
151 commit_sha: commit_sha.map(String::from),
152 author: author.map(String::from),
153 timestamp: None,
154 structural_change: None,
155 });
156 }
157 }
158
159 let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
162 .iter()
163 .filter(|e| !matched_before.contains(e.id.as_str()))
164 .copied()
165 .collect();
166 let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
167 .iter()
168 .filter(|e| !matched_after.contains(e.id.as_str()))
169 .copied()
170 .collect();
171
172 if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
173 const THRESHOLD: f64 = 0.8;
174 const SIZE_RATIO_CUTOFF: f64 = 0.5;
175
176 let before_sets: Vec<HashSet<&str>> = still_unmatched_before
178 .iter()
179 .map(|e| e.content.split_whitespace().collect())
180 .collect();
181 let after_sets: Vec<HashSet<&str>> = still_unmatched_after
182 .iter()
183 .map(|e| e.content.split_whitespace().collect())
184 .collect();
185
186 let mut before_by_type: HashMap<&str, Vec<usize>> = HashMap::new();
188 for (i, e) in still_unmatched_before.iter().enumerate() {
189 before_by_type
190 .entry(e.entity_type.as_str())
191 .or_default()
192 .push(i);
193 }
194
195 for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
196 let candidates = match before_by_type.get(after_entity.entity_type.as_str()) {
197 Some(indices) => indices,
198 None => continue,
199 };
200
201 let a_set = &after_sets[ai];
202 let a_len = a_set.len();
203 let mut best_idx: Option<usize> = None;
204 let mut best_score: f64 = 0.0;
205
206 for &bi in candidates {
207 if matched_before.contains(still_unmatched_before[bi].id.as_str()) {
208 continue;
209 }
210
211 let b_set = &before_sets[bi];
212 let b_len = b_set.len();
213
214 let (min_l, max_l) = if a_len < b_len {
216 (a_len, b_len)
217 } else {
218 (b_len, a_len)
219 };
220 if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
221 continue;
222 }
223
224 let intersection = a_set.intersection(b_set).count();
226 let union = a_len + b_len - intersection;
227 let score = if union == 0 {
228 0.0
229 } else {
230 intersection as f64 / union as f64
231 };
232
233 if score >= THRESHOLD && score > best_score {
234 best_score = score;
235 best_idx = Some(bi);
236 }
237 }
238
239 if let Some(bi) = best_idx {
240 let matched = still_unmatched_before[bi];
241 matched_before.insert(&matched.id);
242 matched_after.insert(&after_entity.id);
243
244 if matched.name == after_entity.name
246 && matched.file_path == after_entity.file_path
247 && matched.content_hash == after_entity.content_hash
248 {
249 continue;
250 }
251
252 let change_type = if matched.file_path != after_entity.file_path {
253 ChangeType::Moved
254 } else {
255 ChangeType::Renamed
256 };
257
258 let old_file_path = if matched.file_path != after_entity.file_path {
259 Some(matched.file_path.clone())
260 } else {
261 None
262 };
263
264 let old_entity_name = if matched.name != after_entity.name {
265 Some(matched.name.clone())
266 } else {
267 None
268 };
269
270 changes.push(SemanticChange {
271 id: format!("change::{}", after_entity.id),
272 entity_id: after_entity.id.clone(),
273 change_type,
274 entity_type: after_entity.entity_type.clone(),
275 entity_name: after_entity.name.clone(),
276 entity_line: after_entity.start_line,
277 file_path: after_entity.file_path.clone(),
278 old_entity_name,
279 old_file_path,
280 before_content: Some(matched.content.clone()),
281 after_content: Some(after_entity.content.clone()),
282 commit_sha: commit_sha.map(String::from),
283 author: author.map(String::from),
284 timestamp: None,
285 structural_change: None,
286 });
287 }
288 }
289 }
290
291 for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
293 changes.push(SemanticChange {
294 id: format!("change::deleted::{}", entity.id),
295 entity_id: entity.id.clone(),
296 change_type: ChangeType::Deleted,
297 entity_type: entity.entity_type.clone(),
298 entity_name: entity.name.clone(),
299 entity_line: entity.start_line,
300 file_path: entity.file_path.clone(),
301 old_entity_name: None,
302 old_file_path: None,
303 before_content: Some(entity.content.clone()),
304 after_content: None,
305 commit_sha: commit_sha.map(String::from),
306 author: author.map(String::from),
307 timestamp: None,
308 structural_change: None,
309 });
310 }
311
312 for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
314 changes.push(SemanticChange {
315 id: format!("change::added::{}", entity.id),
316 entity_id: entity.id.clone(),
317 change_type: ChangeType::Added,
318 entity_type: entity.entity_type.clone(),
319 entity_name: entity.name.clone(),
320 entity_line: entity.start_line,
321 file_path: entity.file_path.clone(),
322 old_entity_name: None,
323 old_file_path: None,
324 before_content: None,
325 after_content: Some(entity.content.clone()),
326 commit_sha: commit_sha.map(String::from),
327 author: author.map(String::from),
328 timestamp: None,
329 structural_change: None,
330 });
331 }
332
333 let modified_ids: HashSet<&str> = changes
338 .iter()
339 .filter(|c| c.change_type == ChangeType::Modified)
340 .map(|c| c.entity_id.as_str())
341 .collect();
342
343 if modified_ids.len() > 1 {
344 let mut parents_to_remove: HashSet<&str> = HashSet::new();
345 for entity in after.iter().chain(before.iter()) {
346 if let Some(ref pid) = entity.parent_id {
347 if modified_ids.contains(entity.id.as_str())
348 && modified_ids.contains(pid.as_str())
349 {
350 parents_to_remove.insert(pid.as_str());
351 }
352 }
353 }
354
355 if !parents_to_remove.is_empty() {
356 changes.retain(|c| {
357 !(c.change_type == ChangeType::Modified
358 && parents_to_remove.contains(c.entity_id.as_str()))
359 });
360 }
361 }
362
363 MatchResult { changes }
364}
365
366pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
368 let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
369 let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
370
371 let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
373 (tokens_a.len(), tokens_b.len())
374 } else {
375 (tokens_b.len(), tokens_a.len())
376 };
377 if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
378 return 0.0;
379 }
380
381 let set_a: HashSet<&str> = tokens_a.into_iter().collect();
382 let set_b: HashSet<&str> = tokens_b.into_iter().collect();
383
384 let intersection_size = set_a.intersection(&set_b).count();
385 let union_size = set_a.union(&set_b).count();
386
387 if union_size == 0 {
388 return 0.0;
389 }
390
391 intersection_size as f64 / union_size as f64
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397 use crate::utils::hash::content_hash;
398
399 fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
400 SemanticEntity {
401 id: id.to_string(),
402 file_path: file_path.to_string(),
403 entity_type: "function".to_string(),
404 name: name.to_string(),
405 parent_id: None,
406 content: content.to_string(),
407 content_hash: content_hash(content),
408 structural_hash: None,
409 start_line: 1,
410 end_line: 1,
411 metadata: None,
412 }
413 }
414
415 #[test]
416 fn test_exact_match_modified() {
417 let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
418 let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
419 let result = match_entities(&before, &after, "a.ts", None, None, None);
420 assert_eq!(result.changes.len(), 1);
421 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
422 }
423
424 #[test]
425 fn test_exact_match_unchanged() {
426 let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
427 let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
428 let result = match_entities(&before, &after, "a.ts", None, None, None);
429 assert_eq!(result.changes.len(), 0);
430 }
431
432 #[test]
433 fn test_added_deleted() {
434 let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
435 let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
436 let result = match_entities(&before, &after, "a.ts", None, None, None);
437 assert_eq!(result.changes.len(), 2);
438 let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
439 assert!(types.contains(&ChangeType::Deleted));
440 assert!(types.contains(&ChangeType::Added));
441 }
442
443 #[test]
444 fn test_content_hash_rename() {
445 let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
446 let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
447 let result = match_entities(&before, &after, "a.ts", None, None, None);
448 assert_eq!(result.changes.len(), 1);
449 assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
450 }
451
452 #[test]
453 fn test_parent_child_dedup_class_method() {
454 let class_before = SemanticEntity {
457 id: "a.ts::class::DataStack".to_string(),
458 file_path: "a.ts".to_string(),
459 entity_type: "class".to_string(),
460 name: "DataStack".to_string(),
461 parent_id: None,
462 content: "class DataStack { constructor() {} genPg() { old } }".to_string(),
463 content_hash: content_hash("class DataStack { constructor() {} genPg() { old } }"),
464 structural_hash: None,
465 start_line: 1,
466 end_line: 10,
467 metadata: None,
468 };
469 let method_before = SemanticEntity {
470 id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
471 file_path: "a.ts".to_string(),
472 entity_type: "method".to_string(),
473 name: "genPg".to_string(),
474 parent_id: Some("a.ts::class::DataStack".to_string()),
475 content: "genPg() { old }".to_string(),
476 content_hash: content_hash("genPg() { old }"),
477 structural_hash: None,
478 start_line: 5,
479 end_line: 8,
480 metadata: None,
481 };
482
483 let class_after = SemanticEntity {
484 id: "a.ts::class::DataStack".to_string(),
485 file_path: "a.ts".to_string(),
486 entity_type: "class".to_string(),
487 name: "DataStack".to_string(),
488 parent_id: None,
489 content: "class DataStack { constructor() {} genPg() { new } }".to_string(),
490 content_hash: content_hash("class DataStack { constructor() {} genPg() { new } }"),
491 structural_hash: None,
492 start_line: 1,
493 end_line: 10,
494 metadata: None,
495 };
496 let method_after = SemanticEntity {
497 id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
498 file_path: "a.ts".to_string(),
499 entity_type: "method".to_string(),
500 name: "genPg".to_string(),
501 parent_id: Some("a.ts::class::DataStack".to_string()),
502 content: "genPg() { new }".to_string(),
503 content_hash: content_hash("genPg() { new }"),
504 structural_hash: None,
505 start_line: 5,
506 end_line: 8,
507 metadata: None,
508 };
509
510 let before = vec![class_before, method_before];
511 let after = vec![class_after, method_after];
512 let result = match_entities(&before, &after, "a.ts", None, None, None);
513
514 assert_eq!(result.changes.len(), 1);
516 assert_eq!(result.changes[0].entity_name, "genPg");
517 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
518 }
519
520 #[test]
521 fn test_parent_not_deduped_when_no_child_changes() {
522 let class_before = SemanticEntity {
524 id: "a.ts::class::Foo".to_string(),
525 file_path: "a.ts".to_string(),
526 entity_type: "class".to_string(),
527 name: "Foo".to_string(),
528 parent_id: None,
529 content: "class Foo { bar() {} }".to_string(),
530 content_hash: content_hash("class Foo { bar() {} }"),
531 structural_hash: None,
532 start_line: 1,
533 end_line: 5,
534 metadata: None,
535 };
536 let method_before = SemanticEntity {
537 id: "a.ts::a.ts::class::Foo::bar".to_string(),
538 file_path: "a.ts".to_string(),
539 entity_type: "method".to_string(),
540 name: "bar".to_string(),
541 parent_id: Some("a.ts::class::Foo".to_string()),
542 content: "bar() {}".to_string(),
543 content_hash: content_hash("bar() {}"),
544 structural_hash: None,
545 start_line: 2,
546 end_line: 4,
547 metadata: None,
548 };
549
550 let class_after = SemanticEntity {
551 id: "a.ts::class::Foo".to_string(),
552 file_path: "a.ts".to_string(),
553 entity_type: "class".to_string(),
554 name: "Foo".to_string(),
555 parent_id: None,
556 content: "class Foo { x = 1; bar() {} }".to_string(),
557 content_hash: content_hash("class Foo { x = 1; bar() {} }"),
558 structural_hash: None,
559 start_line: 1,
560 end_line: 6,
561 metadata: None,
562 };
563 let method_after = SemanticEntity {
564 id: "a.ts::a.ts::class::Foo::bar".to_string(),
565 file_path: "a.ts".to_string(),
566 entity_type: "method".to_string(),
567 name: "bar".to_string(),
568 parent_id: Some("a.ts::class::Foo".to_string()),
569 content: "bar() {}".to_string(),
570 content_hash: content_hash("bar() {}"),
571 structural_hash: None,
572 start_line: 3,
573 end_line: 5,
574 metadata: None,
575 };
576
577 let before = vec![class_before, method_before];
578 let after = vec![class_after, method_after];
579 let result = match_entities(&before, &after, "a.ts", None, None, None);
580
581 assert_eq!(result.changes.len(), 1);
583 assert_eq!(result.changes[0].entity_name, "Foo");
584 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
585 }
586
587 #[test]
588 fn test_default_similarity() {
589 let a = make_entity("a", "a", "the quick brown fox", "a.ts");
590 let b = make_entity("b", "b", "the quick brown dog", "a.ts");
591 let score = default_similarity(&a, &b);
592 assert!(score > 0.5);
593 assert!(score < 1.0);
594 }
595}