1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6pub struct MatchResult {
7 pub changes: Vec<SemanticChange>,
8}
9
10pub fn match_entities(
15 before: &[SemanticEntity],
16 after: &[SemanticEntity],
17 _file_path: &str,
18 similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
19 commit_sha: Option<&str>,
20 author: Option<&str>,
21) -> MatchResult {
22 let mut changes: Vec<SemanticChange> = Vec::new();
23 let mut matched_before: HashSet<&str> = HashSet::new();
24 let mut matched_after: HashSet<&str> = HashSet::new();
25
26 let before_by_id: HashMap<&str, &SemanticEntity> =
27 before.iter().map(|e| (e.id.as_str(), e)).collect();
28 let after_by_id: HashMap<&str, &SemanticEntity> =
29 after.iter().map(|e| (e.id.as_str(), e)).collect();
30
31 for (&id, after_entity) in &after_by_id {
33 if let Some(before_entity) = before_by_id.get(id) {
34 matched_before.insert(id);
35 matched_after.insert(id);
36
37 if before_entity.content_hash != after_entity.content_hash {
38 let structural_change = match (&before_entity.structural_hash, &after_entity.structural_hash) {
39 (Some(before_sh), Some(after_sh)) => Some(before_sh != after_sh),
40 _ => None,
41 };
42 changes.push(SemanticChange {
43 id: format!("change::{id}"),
44 entity_id: id.to_string(),
45 change_type: ChangeType::Modified,
46 entity_type: after_entity.entity_type.clone(),
47 entity_name: after_entity.name.clone(),
48 file_path: after_entity.file_path.clone(),
49 old_file_path: None,
50 before_content: Some(before_entity.content.clone()),
51 after_content: Some(after_entity.content.clone()),
52 commit_sha: commit_sha.map(String::from),
53 author: author.map(String::from),
54 timestamp: None,
55 structural_change,
56 });
57 }
58 }
59 }
60
61 let unmatched_before: Vec<&SemanticEntity> = before
63 .iter()
64 .filter(|e| !matched_before.contains(e.id.as_str()))
65 .collect();
66 let unmatched_after: Vec<&SemanticEntity> = after
67 .iter()
68 .filter(|e| !matched_after.contains(e.id.as_str()))
69 .collect();
70
71 let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
73 let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
74 for entity in &unmatched_before {
75 before_by_hash
76 .entry(entity.content_hash.as_str())
77 .or_default()
78 .push(entity);
79 if let Some(ref sh) = entity.structural_hash {
80 before_by_structural
81 .entry(sh.as_str())
82 .or_default()
83 .push(entity);
84 }
85 }
86
87 for after_entity in &unmatched_after {
88 if matched_after.contains(after_entity.id.as_str()) {
89 continue;
90 }
91 let found = before_by_hash
93 .get_mut(after_entity.content_hash.as_str())
94 .and_then(|c| c.pop());
95 let found = found.or_else(|| {
97 after_entity.structural_hash.as_ref().and_then(|sh| {
98 before_by_structural.get_mut(sh.as_str()).and_then(|c| {
99 c.iter()
100 .position(|e| !matched_before.contains(e.id.as_str()))
101 .map(|i| c.remove(i))
102 })
103 })
104 });
105
106 if let Some(before_entity) = found {
107 matched_before.insert(&before_entity.id);
108 matched_after.insert(&after_entity.id);
109
110 let change_type = if before_entity.file_path != after_entity.file_path {
111 ChangeType::Moved
112 } else {
113 ChangeType::Renamed
114 };
115
116 let old_file_path = if before_entity.file_path != after_entity.file_path {
117 Some(before_entity.file_path.clone())
118 } else {
119 None
120 };
121
122 changes.push(SemanticChange {
123 id: format!("change::{}", after_entity.id),
124 entity_id: after_entity.id.clone(),
125 change_type,
126 entity_type: after_entity.entity_type.clone(),
127 entity_name: after_entity.name.clone(),
128 file_path: after_entity.file_path.clone(),
129 old_file_path,
130 before_content: Some(before_entity.content.clone()),
131 after_content: Some(after_entity.content.clone()),
132 commit_sha: commit_sha.map(String::from),
133 author: author.map(String::from),
134 timestamp: None,
135 structural_change: None,
136 });
137 }
138 }
139
140 let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
142 .iter()
143 .filter(|e| !matched_before.contains(e.id.as_str()))
144 .copied()
145 .collect();
146 let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
147 .iter()
148 .filter(|e| !matched_after.contains(e.id.as_str()))
149 .copied()
150 .collect();
151
152 if let Some(sim_fn) = similarity_fn {
153 if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
154 const THRESHOLD: f64 = 0.8;
155 const SIZE_RATIO_CUTOFF: f64 = 0.5;
157
158 let before_lens: Vec<usize> = still_unmatched_before
160 .iter()
161 .map(|e| e.content.split_whitespace().count())
162 .collect();
163 let after_lens: Vec<usize> = still_unmatched_after
164 .iter()
165 .map(|e| e.content.split_whitespace().count())
166 .collect();
167
168 for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
169 let mut best_match: Option<&SemanticEntity> = None;
170 let mut best_score: f64 = 0.0;
171 let a_len = after_lens[ai];
172
173 for (bi, before_entity) in still_unmatched_before.iter().enumerate() {
174 if matched_before.contains(before_entity.id.as_str()) {
175 continue;
176 }
177 if before_entity.entity_type != after_entity.entity_type {
178 continue;
179 }
180
181 let b_len = before_lens[bi];
183 let (min_l, max_l) = if a_len < b_len { (a_len, b_len) } else { (b_len, a_len) };
184 if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
185 continue;
186 }
187
188 let score = sim_fn(before_entity, after_entity);
189 if score > best_score && score >= THRESHOLD {
190 best_score = score;
191 best_match = Some(before_entity);
192 }
193 }
194
195 if let Some(matched) = best_match {
196 matched_before.insert(&matched.id);
197 matched_after.insert(&after_entity.id);
198
199 let change_type = if matched.file_path != after_entity.file_path {
200 ChangeType::Moved
201 } else {
202 ChangeType::Renamed
203 };
204
205 let old_file_path = if matched.file_path != after_entity.file_path {
206 Some(matched.file_path.clone())
207 } else {
208 None
209 };
210
211 changes.push(SemanticChange {
212 id: format!("change::{}", after_entity.id),
213 entity_id: after_entity.id.clone(),
214 change_type,
215 entity_type: after_entity.entity_type.clone(),
216 entity_name: after_entity.name.clone(),
217 file_path: after_entity.file_path.clone(),
218 old_file_path,
219 before_content: Some(matched.content.clone()),
220 after_content: Some(after_entity.content.clone()),
221 commit_sha: commit_sha.map(String::from),
222 author: author.map(String::from),
223 timestamp: None,
224 structural_change: None,
225 });
226 }
227 }
228 }
229 }
230
231 for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
233 changes.push(SemanticChange {
234 id: format!("change::deleted::{}", entity.id),
235 entity_id: entity.id.clone(),
236 change_type: ChangeType::Deleted,
237 entity_type: entity.entity_type.clone(),
238 entity_name: entity.name.clone(),
239 file_path: entity.file_path.clone(),
240 old_file_path: None,
241 before_content: Some(entity.content.clone()),
242 after_content: None,
243 commit_sha: commit_sha.map(String::from),
244 author: author.map(String::from),
245 timestamp: None,
246 structural_change: None,
247 });
248 }
249
250 for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
252 changes.push(SemanticChange {
253 id: format!("change::added::{}", entity.id),
254 entity_id: entity.id.clone(),
255 change_type: ChangeType::Added,
256 entity_type: entity.entity_type.clone(),
257 entity_name: entity.name.clone(),
258 file_path: entity.file_path.clone(),
259 old_file_path: None,
260 before_content: None,
261 after_content: Some(entity.content.clone()),
262 commit_sha: commit_sha.map(String::from),
263 author: author.map(String::from),
264 timestamp: None,
265 structural_change: None,
266 });
267 }
268
269 MatchResult { changes }
270}
271
272pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
274 let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
275 let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
276
277 let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
279 (tokens_a.len(), tokens_b.len())
280 } else {
281 (tokens_b.len(), tokens_a.len())
282 };
283 if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
284 return 0.0;
285 }
286
287 let set_a: HashSet<&str> = tokens_a.into_iter().collect();
288 let set_b: HashSet<&str> = tokens_b.into_iter().collect();
289
290 let intersection_size = set_a.intersection(&set_b).count();
291 let union_size = set_a.union(&set_b).count();
292
293 if union_size == 0 {
294 return 0.0;
295 }
296
297 intersection_size as f64 / union_size as f64
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303 use crate::utils::hash::content_hash;
304
305 fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
306 SemanticEntity {
307 id: id.to_string(),
308 file_path: file_path.to_string(),
309 entity_type: "function".to_string(),
310 name: name.to_string(),
311 parent_id: None,
312 content: content.to_string(),
313 content_hash: content_hash(content),
314 structural_hash: None,
315 start_line: 1,
316 end_line: 1,
317 metadata: None,
318 }
319 }
320
321 #[test]
322 fn test_exact_match_modified() {
323 let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
324 let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
325 let result = match_entities(&before, &after, "a.ts", None, None, None);
326 assert_eq!(result.changes.len(), 1);
327 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
328 }
329
330 #[test]
331 fn test_exact_match_unchanged() {
332 let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
333 let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
334 let result = match_entities(&before, &after, "a.ts", None, None, None);
335 assert_eq!(result.changes.len(), 0);
336 }
337
338 #[test]
339 fn test_added_deleted() {
340 let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
341 let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
342 let result = match_entities(&before, &after, "a.ts", None, None, None);
343 assert_eq!(result.changes.len(), 2);
344 let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
345 assert!(types.contains(&ChangeType::Deleted));
346 assert!(types.contains(&ChangeType::Added));
347 }
348
349 #[test]
350 fn test_content_hash_rename() {
351 let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
352 let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
353 let result = match_entities(&before, &after, "a.ts", None, None, None);
354 assert_eq!(result.changes.len(), 1);
355 assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
356 }
357
358 #[test]
359 fn test_default_similarity() {
360 let a = make_entity("a", "a", "the quick brown fox", "a.ts");
361 let b = make_entity("b", "b", "the quick brown dog", "a.ts");
362 let score = default_similarity(&a, &b);
363 assert!(score > 0.5);
364 assert!(score < 1.0);
365 }
366}