1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6pub struct MatchResult {
7 pub changes: Vec<SemanticChange>,
8}
9
10pub fn match_entities(
15 before: &[SemanticEntity],
16 after: &[SemanticEntity],
17 _file_path: &str,
18 _similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
19 commit_sha: Option<&str>,
20 author: Option<&str>,
21) -> MatchResult {
22 let mut changes: Vec<SemanticChange> = Vec::new();
23 let mut matched_before: HashSet<&str> = HashSet::new();
24 let mut matched_after: HashSet<&str> = HashSet::new();
25
26 let before_by_id: HashMap<&str, &SemanticEntity> =
27 before.iter().map(|e| (e.id.as_str(), e)).collect();
28 let after_by_id: HashMap<&str, &SemanticEntity> =
29 after.iter().map(|e| (e.id.as_str(), e)).collect();
30
31 for (&id, after_entity) in &after_by_id {
33 if let Some(before_entity) = before_by_id.get(id) {
34 matched_before.insert(id);
35 matched_after.insert(id);
36
37 if before_entity.content_hash != after_entity.content_hash {
38 let structural_change = match (&before_entity.structural_hash, &after_entity.structural_hash) {
39 (Some(before_sh), Some(after_sh)) => Some(before_sh != after_sh),
40 _ => None,
41 };
42 changes.push(SemanticChange {
43 id: format!("change::{id}"),
44 entity_id: id.to_string(),
45 change_type: ChangeType::Modified,
46 entity_type: after_entity.entity_type.clone(),
47 entity_name: after_entity.name.clone(),
48 entity_line: after_entity.start_line,
49 file_path: after_entity.file_path.clone(),
50 old_entity_name: None,
51 old_file_path: None,
52 before_content: Some(before_entity.content.clone()),
53 after_content: Some(after_entity.content.clone()),
54 commit_sha: commit_sha.map(String::from),
55 author: author.map(String::from),
56 timestamp: None,
57 structural_change,
58 });
59 }
60 }
61 }
62
63 let unmatched_before: Vec<&SemanticEntity> = before
65 .iter()
66 .filter(|e| !matched_before.contains(e.id.as_str()))
67 .collect();
68 let unmatched_after: Vec<&SemanticEntity> = after
69 .iter()
70 .filter(|e| !matched_after.contains(e.id.as_str()))
71 .collect();
72
73 let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
75 let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
76 for entity in &unmatched_before {
77 before_by_hash
78 .entry(entity.content_hash.as_str())
79 .or_default()
80 .push(entity);
81 if let Some(ref sh) = entity.structural_hash {
82 before_by_structural
83 .entry(sh.as_str())
84 .or_default()
85 .push(entity);
86 }
87 }
88
89 for after_entity in &unmatched_after {
90 if matched_after.contains(after_entity.id.as_str()) {
91 continue;
92 }
93 let found = before_by_hash
95 .get_mut(after_entity.content_hash.as_str())
96 .and_then(|c| c.pop());
97 let found = found.or_else(|| {
99 after_entity.structural_hash.as_ref().and_then(|sh| {
100 before_by_structural.get_mut(sh.as_str()).and_then(|c| {
101 c.iter()
102 .position(|e| !matched_before.contains(e.id.as_str()))
103 .map(|i| c.remove(i))
104 })
105 })
106 });
107
108 if let Some(before_entity) = found {
109 matched_before.insert(&before_entity.id);
110 matched_after.insert(&after_entity.id);
111
112 if before_entity.name == after_entity.name
115 && before_entity.file_path == after_entity.file_path
116 && before_entity.content_hash == after_entity.content_hash
117 {
118 continue;
119 }
120
121 let change_type = if before_entity.file_path != after_entity.file_path {
122 ChangeType::Moved
123 } else {
124 ChangeType::Renamed
125 };
126
127 let old_file_path = if before_entity.file_path != after_entity.file_path {
128 Some(before_entity.file_path.clone())
129 } else {
130 None
131 };
132
133 let old_entity_name = if before_entity.name != after_entity.name {
134 Some(before_entity.name.clone())
135 } else {
136 None
137 };
138
139 changes.push(SemanticChange {
140 id: format!("change::{}", after_entity.id),
141 entity_id: after_entity.id.clone(),
142 change_type,
143 entity_type: after_entity.entity_type.clone(),
144 entity_name: after_entity.name.clone(),
145 entity_line: after_entity.start_line,
146 file_path: after_entity.file_path.clone(),
147 old_entity_name,
148 old_file_path,
149 before_content: Some(before_entity.content.clone()),
150 after_content: Some(after_entity.content.clone()),
151 commit_sha: commit_sha.map(String::from),
152 author: author.map(String::from),
153 timestamp: None,
154 structural_change: None,
155 });
156 }
157 }
158
159 let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
162 .iter()
163 .filter(|e| !matched_before.contains(e.id.as_str()))
164 .copied()
165 .collect();
166 let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
167 .iter()
168 .filter(|e| !matched_after.contains(e.id.as_str()))
169 .copied()
170 .collect();
171
172 if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
173 const THRESHOLD: f64 = 0.8;
174 const SIZE_RATIO_CUTOFF: f64 = 0.5;
175
176 let before_sets: Vec<HashSet<&str>> = still_unmatched_before
178 .iter()
179 .map(|e| e.content.split_whitespace().collect())
180 .collect();
181 let after_sets: Vec<HashSet<&str>> = still_unmatched_after
182 .iter()
183 .map(|e| e.content.split_whitespace().collect())
184 .collect();
185
186 let mut before_by_type: HashMap<&str, Vec<usize>> = HashMap::new();
188 for (i, e) in still_unmatched_before.iter().enumerate() {
189 before_by_type
190 .entry(e.entity_type.as_str())
191 .or_default()
192 .push(i);
193 }
194
195 for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
196 let candidates = match before_by_type.get(after_entity.entity_type.as_str()) {
197 Some(indices) => indices,
198 None => continue,
199 };
200
201 let a_set = &after_sets[ai];
202 let a_len = a_set.len();
203 let mut best_idx: Option<usize> = None;
204 let mut best_score: f64 = 0.0;
205
206 for &bi in candidates {
207 if matched_before.contains(still_unmatched_before[bi].id.as_str()) {
208 continue;
209 }
210
211 let b_set = &before_sets[bi];
212 let b_len = b_set.len();
213
214 let (min_l, max_l) = if a_len < b_len {
216 (a_len, b_len)
217 } else {
218 (b_len, a_len)
219 };
220 if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
221 continue;
222 }
223
224 let intersection = a_set.intersection(b_set).count();
226 let union = a_len + b_len - intersection;
227 let score = if union == 0 {
228 0.0
229 } else {
230 intersection as f64 / union as f64
231 };
232
233 if score >= THRESHOLD && score > best_score {
234 best_score = score;
235 best_idx = Some(bi);
236 }
237 }
238
239 if let Some(bi) = best_idx {
240 let matched = still_unmatched_before[bi];
241 matched_before.insert(&matched.id);
242 matched_after.insert(&after_entity.id);
243
244 if matched.name == after_entity.name
246 && matched.file_path == after_entity.file_path
247 && matched.content_hash == after_entity.content_hash
248 {
249 continue;
250 }
251
252 let change_type = if matched.file_path != after_entity.file_path {
253 ChangeType::Moved
254 } else {
255 ChangeType::Renamed
256 };
257
258 let old_file_path = if matched.file_path != after_entity.file_path {
259 Some(matched.file_path.clone())
260 } else {
261 None
262 };
263
264 let old_entity_name = if matched.name != after_entity.name {
265 Some(matched.name.clone())
266 } else {
267 None
268 };
269
270 changes.push(SemanticChange {
271 id: format!("change::{}", after_entity.id),
272 entity_id: after_entity.id.clone(),
273 change_type,
274 entity_type: after_entity.entity_type.clone(),
275 entity_name: after_entity.name.clone(),
276 entity_line: after_entity.start_line,
277 file_path: after_entity.file_path.clone(),
278 old_entity_name,
279 old_file_path,
280 before_content: Some(matched.content.clone()),
281 after_content: Some(after_entity.content.clone()),
282 commit_sha: commit_sha.map(String::from),
283 author: author.map(String::from),
284 timestamp: None,
285 structural_change: None,
286 });
287 }
288 }
289 }
290
291 for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
293 changes.push(SemanticChange {
294 id: format!("change::deleted::{}", entity.id),
295 entity_id: entity.id.clone(),
296 change_type: ChangeType::Deleted,
297 entity_type: entity.entity_type.clone(),
298 entity_name: entity.name.clone(),
299 entity_line: entity.start_line,
300 file_path: entity.file_path.clone(),
301 old_entity_name: None,
302 old_file_path: None,
303 before_content: Some(entity.content.clone()),
304 after_content: None,
305 commit_sha: commit_sha.map(String::from),
306 author: author.map(String::from),
307 timestamp: None,
308 structural_change: None,
309 });
310 }
311
312 for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
314 changes.push(SemanticChange {
315 id: format!("change::added::{}", entity.id),
316 entity_id: entity.id.clone(),
317 change_type: ChangeType::Added,
318 entity_type: entity.entity_type.clone(),
319 entity_name: entity.name.clone(),
320 entity_line: entity.start_line,
321 file_path: entity.file_path.clone(),
322 old_entity_name: None,
323 old_file_path: None,
324 before_content: None,
325 after_content: Some(entity.content.clone()),
326 commit_sha: commit_sha.map(String::from),
327 author: author.map(String::from),
328 timestamp: None,
329 structural_change: None,
330 });
331 }
332
333 MatchResult { changes }
334}
335
336pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
338 let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
339 let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
340
341 let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
343 (tokens_a.len(), tokens_b.len())
344 } else {
345 (tokens_b.len(), tokens_a.len())
346 };
347 if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
348 return 0.0;
349 }
350
351 let set_a: HashSet<&str> = tokens_a.into_iter().collect();
352 let set_b: HashSet<&str> = tokens_b.into_iter().collect();
353
354 let intersection_size = set_a.intersection(&set_b).count();
355 let union_size = set_a.union(&set_b).count();
356
357 if union_size == 0 {
358 return 0.0;
359 }
360
361 intersection_size as f64 / union_size as f64
362}
363
364#[cfg(test)]
365mod tests {
366 use super::*;
367 use crate::utils::hash::content_hash;
368
369 fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
370 SemanticEntity {
371 id: id.to_string(),
372 file_path: file_path.to_string(),
373 entity_type: "function".to_string(),
374 name: name.to_string(),
375 parent_id: None,
376 content: content.to_string(),
377 content_hash: content_hash(content),
378 structural_hash: None,
379 start_line: 1,
380 end_line: 1,
381 metadata: None,
382 }
383 }
384
385 #[test]
386 fn test_exact_match_modified() {
387 let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
388 let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
389 let result = match_entities(&before, &after, "a.ts", None, None, None);
390 assert_eq!(result.changes.len(), 1);
391 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
392 }
393
394 #[test]
395 fn test_exact_match_unchanged() {
396 let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
397 let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
398 let result = match_entities(&before, &after, "a.ts", None, None, None);
399 assert_eq!(result.changes.len(), 0);
400 }
401
402 #[test]
403 fn test_added_deleted() {
404 let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
405 let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
406 let result = match_entities(&before, &after, "a.ts", None, None, None);
407 assert_eq!(result.changes.len(), 2);
408 let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
409 assert!(types.contains(&ChangeType::Deleted));
410 assert!(types.contains(&ChangeType::Added));
411 }
412
413 #[test]
414 fn test_content_hash_rename() {
415 let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
416 let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
417 let result = match_entities(&before, &after, "a.ts", None, None, None);
418 assert_eq!(result.changes.len(), 1);
419 assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
420 }
421
422 #[test]
423 fn test_default_similarity() {
424 let a = make_entity("a", "a", "the quick brown fox", "a.ts");
425 let b = make_entity("b", "b", "the quick brown dog", "a.ts");
426 let score = default_similarity(&a, &b);
427 assert!(score > 0.5);
428 assert!(score < 1.0);
429 }
430}