Skip to main content

sem_core/model/
identity.rs

1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6pub struct MatchResult {
7    pub changes: Vec<SemanticChange>,
8}
9
10/// 3-phase entity matching algorithm:
11/// 1. Exact ID match — same entity ID in before/after → modified or unchanged
12/// 2. Content hash match — same hash, different ID → renamed or moved
13/// 3. Fuzzy similarity — >80% content similarity → probable rename
14pub fn match_entities(
15    before: &[SemanticEntity],
16    after: &[SemanticEntity],
17    _file_path: &str,
18    _similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
19    commit_sha: Option<&str>,
20    author: Option<&str>,
21) -> MatchResult {
22    let mut changes: Vec<SemanticChange> = Vec::new();
23    let mut matched_before: HashSet<&str> = HashSet::new();
24    let mut matched_after: HashSet<&str> = HashSet::new();
25
26    let before_by_id: HashMap<&str, &SemanticEntity> =
27        before.iter().map(|e| (e.id.as_str(), e)).collect();
28    let after_by_id: HashMap<&str, &SemanticEntity> =
29        after.iter().map(|e| (e.id.as_str(), e)).collect();
30
31    // Phase 1: Exact ID match
32    for (&id, after_entity) in &after_by_id {
33        if let Some(before_entity) = before_by_id.get(id) {
34            matched_before.insert(id);
35            matched_after.insert(id);
36
37            if before_entity.content_hash != after_entity.content_hash {
38                let structural_change = match (&before_entity.structural_hash, &after_entity.structural_hash) {
39                    (Some(before_sh), Some(after_sh)) => Some(before_sh != after_sh),
40                    _ => None,
41                };
42                changes.push(SemanticChange {
43                    id: format!("change::{id}"),
44                    entity_id: id.to_string(),
45                    change_type: ChangeType::Modified,
46                    entity_type: after_entity.entity_type.clone(),
47                    entity_name: after_entity.name.clone(),
48                    entity_line: after_entity.start_line,
49                    file_path: after_entity.file_path.clone(),
50                    old_entity_name: None,
51                    old_file_path: None,
52                    before_content: Some(before_entity.content.clone()),
53                    after_content: Some(after_entity.content.clone()),
54                    commit_sha: commit_sha.map(String::from),
55                    author: author.map(String::from),
56                    timestamp: None,
57                    structural_change,
58                });
59            }
60        }
61    }
62
63    // Collect unmatched
64    let unmatched_before: Vec<&SemanticEntity> = before
65        .iter()
66        .filter(|e| !matched_before.contains(e.id.as_str()))
67        .collect();
68    let unmatched_after: Vec<&SemanticEntity> = after
69        .iter()
70        .filter(|e| !matched_after.contains(e.id.as_str()))
71        .collect();
72
73    // Phase 2: Content hash match (rename/move detection)
74    let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
75    let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
76    for entity in &unmatched_before {
77        before_by_hash
78            .entry(entity.content_hash.as_str())
79            .or_default()
80            .push(entity);
81        if let Some(ref sh) = entity.structural_hash {
82            before_by_structural
83                .entry(sh.as_str())
84                .or_default()
85                .push(entity);
86        }
87    }
88
89    for after_entity in &unmatched_after {
90        if matched_after.contains(after_entity.id.as_str()) {
91            continue;
92        }
93        // Try exact content_hash first
94        let found = before_by_hash
95            .get_mut(after_entity.content_hash.as_str())
96            .and_then(|c| c.pop());
97        // Fall back to structural_hash (formatting/comment changes don't matter)
98        let found = found.or_else(|| {
99            after_entity.structural_hash.as_ref().and_then(|sh| {
100                before_by_structural.get_mut(sh.as_str()).and_then(|c| {
101                    c.iter()
102                        .position(|e| !matched_before.contains(e.id.as_str()))
103                        .map(|i| c.remove(i))
104                })
105            })
106        });
107
108        if let Some(before_entity) = found {
109            matched_before.insert(&before_entity.id);
110            matched_after.insert(&after_entity.id);
111
112            // If name and file are the same, only the parent qualifier in the ID changed
113            // (e.g. parent class was renamed). Skip — the entity itself is unchanged.
114            if before_entity.name == after_entity.name
115                && before_entity.file_path == after_entity.file_path
116                && before_entity.content_hash == after_entity.content_hash
117            {
118                continue;
119            }
120
121            let change_type = if before_entity.file_path != after_entity.file_path {
122                ChangeType::Moved
123            } else {
124                ChangeType::Renamed
125            };
126
127            let old_file_path = if before_entity.file_path != after_entity.file_path {
128                Some(before_entity.file_path.clone())
129            } else {
130                None
131            };
132
133            let old_entity_name = if before_entity.name != after_entity.name {
134                Some(before_entity.name.clone())
135            } else {
136                None
137            };
138
139            changes.push(SemanticChange {
140                id: format!("change::{}", after_entity.id),
141                entity_id: after_entity.id.clone(),
142                change_type,
143                entity_type: after_entity.entity_type.clone(),
144                entity_name: after_entity.name.clone(),
145                entity_line: after_entity.start_line,
146                file_path: after_entity.file_path.clone(),
147                old_entity_name,
148                old_file_path,
149                before_content: Some(before_entity.content.clone()),
150                after_content: Some(after_entity.content.clone()),
151                commit_sha: commit_sha.map(String::from),
152                author: author.map(String::from),
153                timestamp: None,
154                structural_change: None,
155            });
156        }
157    }
158
159    // Phase 3: Fuzzy similarity (>80% threshold)
160    // Optimized: pre-compute token sets once per entity, group by type
161    let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
162        .iter()
163        .filter(|e| !matched_before.contains(e.id.as_str()))
164        .copied()
165        .collect();
166    let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
167        .iter()
168        .filter(|e| !matched_after.contains(e.id.as_str()))
169        .copied()
170        .collect();
171
172    if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
173        const THRESHOLD: f64 = 0.8;
174        const SIZE_RATIO_CUTOFF: f64 = 0.5;
175
176        // Pre-compute token sets once per entity (N+M instead of N×M allocations)
177        let before_sets: Vec<HashSet<&str>> = still_unmatched_before
178            .iter()
179            .map(|e| e.content.split_whitespace().collect())
180            .collect();
181        let after_sets: Vec<HashSet<&str>> = still_unmatched_after
182            .iter()
183            .map(|e| e.content.split_whitespace().collect())
184            .collect();
185
186        // Group before entities by type: O(sum(n_t × m_t)) instead of O(N×M)
187        let mut before_by_type: HashMap<&str, Vec<usize>> = HashMap::new();
188        for (i, e) in still_unmatched_before.iter().enumerate() {
189            before_by_type
190                .entry(e.entity_type.as_str())
191                .or_default()
192                .push(i);
193        }
194
195        for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
196            let candidates = match before_by_type.get(after_entity.entity_type.as_str()) {
197                Some(indices) => indices,
198                None => continue,
199            };
200
201            let a_set = &after_sets[ai];
202            let a_len = a_set.len();
203            let mut best_idx: Option<usize> = None;
204            let mut best_score: f64 = 0.0;
205
206            for &bi in candidates {
207                if matched_before.contains(still_unmatched_before[bi].id.as_str()) {
208                    continue;
209                }
210
211                let b_set = &before_sets[bi];
212                let b_len = b_set.len();
213
214                // Size ratio filter using pre-computed set lengths
215                let (min_l, max_l) = if a_len < b_len {
216                    (a_len, b_len)
217                } else {
218                    (b_len, a_len)
219                };
220                if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
221                    continue;
222                }
223
224                // Inline Jaccard on pre-computed sets
225                let intersection = a_set.intersection(b_set).count();
226                let union = a_len + b_len - intersection;
227                let score = if union == 0 {
228                    0.0
229                } else {
230                    intersection as f64 / union as f64
231                };
232
233                if score >= THRESHOLD && score > best_score {
234                    best_score = score;
235                    best_idx = Some(bi);
236                }
237            }
238
239            if let Some(bi) = best_idx {
240                let matched = still_unmatched_before[bi];
241                matched_before.insert(&matched.id);
242                matched_after.insert(&after_entity.id);
243
244                // If name and file are the same, only the parent qualifier changed.
245                if matched.name == after_entity.name
246                    && matched.file_path == after_entity.file_path
247                    && matched.content_hash == after_entity.content_hash
248                {
249                    continue;
250                }
251
252                let change_type = if matched.file_path != after_entity.file_path {
253                    ChangeType::Moved
254                } else {
255                    ChangeType::Renamed
256                };
257
258                let old_file_path = if matched.file_path != after_entity.file_path {
259                    Some(matched.file_path.clone())
260                } else {
261                    None
262                };
263
264                let old_entity_name = if matched.name != after_entity.name {
265                    Some(matched.name.clone())
266                } else {
267                    None
268                };
269
270                changes.push(SemanticChange {
271                    id: format!("change::{}", after_entity.id),
272                    entity_id: after_entity.id.clone(),
273                    change_type,
274                    entity_type: after_entity.entity_type.clone(),
275                    entity_name: after_entity.name.clone(),
276                    entity_line: after_entity.start_line,
277                    file_path: after_entity.file_path.clone(),
278                    old_entity_name,
279                    old_file_path,
280                    before_content: Some(matched.content.clone()),
281                    after_content: Some(after_entity.content.clone()),
282                    commit_sha: commit_sha.map(String::from),
283                    author: author.map(String::from),
284                    timestamp: None,
285                    structural_change: None,
286                });
287            }
288        }
289    }
290
291    // Phase 4: Intra-file reorder detection
292    // For entities that matched by exact ID with identical content (unchanged),
293    // check if their relative ordering changed within the file.
294    detect_reorders(before, after, &matched_before, &matched_after, &mut changes, commit_sha, author);
295
296    // Remaining unmatched before = deleted
297    for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
298        changes.push(SemanticChange {
299            id: format!("change::deleted::{}", entity.id),
300            entity_id: entity.id.clone(),
301            change_type: ChangeType::Deleted,
302            entity_type: entity.entity_type.clone(),
303            entity_name: entity.name.clone(),
304            entity_line: entity.start_line,
305            file_path: entity.file_path.clone(),
306            old_entity_name: None,
307            old_file_path: None,
308            before_content: Some(entity.content.clone()),
309            after_content: None,
310            commit_sha: commit_sha.map(String::from),
311            author: author.map(String::from),
312            timestamp: None,
313            structural_change: None,
314        });
315    }
316
317    // Remaining unmatched after = added
318    for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
319        changes.push(SemanticChange {
320            id: format!("change::added::{}", entity.id),
321            entity_id: entity.id.clone(),
322            change_type: ChangeType::Added,
323            entity_type: entity.entity_type.clone(),
324            entity_name: entity.name.clone(),
325            entity_line: entity.start_line,
326            file_path: entity.file_path.clone(),
327            old_entity_name: None,
328            old_file_path: None,
329            before_content: None,
330            after_content: Some(entity.content.clone()),
331            commit_sha: commit_sha.map(String::from),
332            author: author.map(String::from),
333            timestamp: None,
334            structural_change: None,
335        });
336    }
337
338    // Deduplicate: when a parent (class) is Modified and one or more of its
339    // children (methods) are also Modified, drop the parent. The child diffs
340    // are more specific and the parent body overlaps with them.
341    // Only applies to Modified; Added/Deleted should still show all entities.
342    let modified_ids: HashSet<&str> = changes
343        .iter()
344        .filter(|c| c.change_type == ChangeType::Modified)
345        .map(|c| c.entity_id.as_str())
346        .collect();
347
348    if modified_ids.len() > 1 {
349        let mut parents_to_remove: HashSet<&str> = HashSet::new();
350        for entity in after.iter().chain(before.iter()) {
351            if let Some(ref pid) = entity.parent_id {
352                if modified_ids.contains(entity.id.as_str())
353                    && modified_ids.contains(pid.as_str())
354                {
355                    parents_to_remove.insert(pid.as_str());
356                }
357            }
358        }
359
360        if !parents_to_remove.is_empty() {
361            changes.retain(|c| {
362                !(c.change_type == ChangeType::Modified
363                    && parents_to_remove.contains(c.entity_id.as_str()))
364            });
365        }
366    }
367
368    MatchResult { changes }
369}
370
371/// Default content similarity using Jaccard index on whitespace-split tokens
372pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
373    let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
374    let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
375
376    // Early rejection: if token counts differ too much, Jaccard can't reach 0.8
377    let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
378        (tokens_a.len(), tokens_b.len())
379    } else {
380        (tokens_b.len(), tokens_a.len())
381    };
382    if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
383        return 0.0;
384    }
385
386    let set_a: HashSet<&str> = tokens_a.into_iter().collect();
387    let set_b: HashSet<&str> = tokens_b.into_iter().collect();
388
389    let intersection_size = set_a.intersection(&set_b).count();
390    let union_size = set_a.union(&set_b).count();
391
392    if union_size == 0 {
393        return 0.0;
394    }
395
396    intersection_size as f64 / union_size as f64
397}
398
399/// Detect intra-file reordering of unchanged entities.
400///
401/// Takes entities that matched by exact ID with identical content and checks
402/// if their relative ordering changed. Uses longest increasing subsequence
403/// (LIS) on the "after" positions to find the minimum set of moved entities.
404fn detect_reorders(
405    before: &[SemanticEntity],
406    after: &[SemanticEntity],
407    matched_before: &HashSet<&str>,
408    matched_after: &HashSet<&str>,
409    changes: &mut Vec<SemanticChange>,
410    commit_sha: Option<&str>,
411    author: Option<&str>,
412) {
413    // Collect unchanged entities: matched by ID with same content_hash
414    let before_by_id: HashMap<&str, &SemanticEntity> =
415        before.iter().map(|e| (e.id.as_str(), e)).collect();
416
417    // Group by file. For each file, collect unchanged entities in their
418    // before-order, then look up their after-positions.
419    let mut by_file: HashMap<&str, Vec<(&SemanticEntity, &SemanticEntity)>> = HashMap::new();
420    for after_entity in after {
421        if !matched_after.contains(after_entity.id.as_str()) {
422            continue;
423        }
424        if let Some(before_entity) = before_by_id.get(after_entity.id.as_str()) {
425            if !matched_before.contains(before_entity.id.as_str()) {
426                continue;
427            }
428            // Only consider truly unchanged entities (same content)
429            if before_entity.content_hash != after_entity.content_hash {
430                continue;
431            }
432            // Only intra-file
433            if before_entity.file_path != after_entity.file_path {
434                continue;
435            }
436            by_file
437                .entry(after_entity.file_path.as_str())
438                .or_default()
439                .push((before_entity, after_entity));
440        }
441    }
442
443    for (_file, pairs) in &mut by_file {
444        if pairs.len() < 2 {
445            continue;
446        }
447
448        // Sort by before start_line to get the "before" ordering
449        pairs.sort_by_key(|(b, _)| b.start_line);
450
451        // Map to after start_lines in before-order
452        let after_lines: Vec<usize> = pairs.iter().map(|(_, a)| a.start_line).collect();
453
454        // Find LIS indices (entities that stayed in relative order)
455        let lis_set = longest_increasing_subsequence_indices(&after_lines);
456
457        // Entities NOT in LIS were reordered
458        for (i, (_before_entity, after_entity)) in pairs.iter().enumerate() {
459            if lis_set.contains(&i) {
460                continue;
461            }
462            changes.push(SemanticChange {
463                id: format!("change::reordered::{}", after_entity.id),
464                entity_id: after_entity.id.clone(),
465                change_type: ChangeType::Reordered,
466                entity_type: after_entity.entity_type.clone(),
467                entity_name: after_entity.name.clone(),
468                entity_line: after_entity.start_line,
469                file_path: after_entity.file_path.clone(),
470                old_entity_name: None,
471                old_file_path: None,
472                before_content: None,
473                after_content: None,
474                commit_sha: commit_sha.map(String::from),
475                author: author.map(String::from),
476                timestamp: None,
477                structural_change: None,
478            });
479        }
480    }
481}
482
483/// Find indices that form the longest increasing subsequence.
484/// Returns a HashSet of indices in the original array that are part of the LIS.
485fn longest_increasing_subsequence_indices(seq: &[usize]) -> HashSet<usize> {
486    let n = seq.len();
487    if n == 0 {
488        return HashSet::new();
489    }
490
491    // tails[i] = index in seq of the smallest tail element for IS of length i+1
492    let mut tails: Vec<usize> = Vec::new();
493    // parent[i] = index of previous element in the LIS ending at seq[i]
494    let mut parent: Vec<Option<usize>> = vec![None; n];
495    // tail_idx[i] = index in seq that tails[i] points to
496    let mut tail_idx: Vec<usize> = Vec::new();
497
498    for i in 0..n {
499        let pos = tails.partition_point(|&t| t < seq[i]);
500        if pos == tails.len() {
501            tails.push(seq[i]);
502            tail_idx.push(i);
503        } else {
504            tails[pos] = seq[i];
505            tail_idx[pos] = i;
506        }
507        parent[i] = if pos > 0 { Some(tail_idx[pos - 1]) } else { None };
508    }
509
510    // Trace back to find actual LIS indices
511    let mut result = HashSet::new();
512    let mut idx = *tail_idx.last().unwrap();
513    result.insert(idx);
514    while let Some(p) = parent[idx] {
515        result.insert(p);
516        idx = p;
517    }
518    result
519}
520
521#[cfg(test)]
522mod tests {
523    use super::*;
524    use crate::utils::hash::content_hash;
525
526    fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
527        SemanticEntity {
528            id: id.to_string(),
529            file_path: file_path.to_string(),
530            entity_type: "function".to_string(),
531            name: name.to_string(),
532            parent_id: None,
533            content: content.to_string(),
534            content_hash: content_hash(content),
535            structural_hash: None,
536            start_line: 1,
537            end_line: 1,
538            metadata: None,
539        }
540    }
541
542    #[test]
543    fn test_exact_match_modified() {
544        let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
545        let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
546        let result = match_entities(&before, &after, "a.ts", None, None, None);
547        assert_eq!(result.changes.len(), 1);
548        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
549    }
550
551    #[test]
552    fn test_exact_match_unchanged() {
553        let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
554        let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
555        let result = match_entities(&before, &after, "a.ts", None, None, None);
556        assert_eq!(result.changes.len(), 0);
557    }
558
559    #[test]
560    fn test_added_deleted() {
561        let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
562        let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
563        let result = match_entities(&before, &after, "a.ts", None, None, None);
564        assert_eq!(result.changes.len(), 2);
565        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
566        assert!(types.contains(&ChangeType::Deleted));
567        assert!(types.contains(&ChangeType::Added));
568    }
569
570    #[test]
571    fn test_content_hash_rename() {
572        let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
573        let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
574        let result = match_entities(&before, &after, "a.ts", None, None, None);
575        assert_eq!(result.changes.len(), 1);
576        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
577    }
578
579    #[test]
580    fn test_parent_child_dedup_class_method() {
581        // Class entity contains the method body in its content.
582        // parent_id stores the full entity ID of the parent.
583        let class_before = SemanticEntity {
584            id: "a.ts::class::DataStack".to_string(),
585            file_path: "a.ts".to_string(),
586            entity_type: "class".to_string(),
587            name: "DataStack".to_string(),
588            parent_id: None,
589            content: "class DataStack { constructor() {} genPg() { old } }".to_string(),
590            content_hash: content_hash("class DataStack { constructor() {} genPg() { old } }"),
591            structural_hash: None,
592            start_line: 1,
593            end_line: 10,
594            metadata: None,
595        };
596        let method_before = SemanticEntity {
597            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
598            file_path: "a.ts".to_string(),
599            entity_type: "method".to_string(),
600            name: "genPg".to_string(),
601            parent_id: Some("a.ts::class::DataStack".to_string()),
602            content: "genPg() { old }".to_string(),
603            content_hash: content_hash("genPg() { old }"),
604            structural_hash: None,
605            start_line: 5,
606            end_line: 8,
607            metadata: None,
608        };
609
610        let class_after = SemanticEntity {
611            id: "a.ts::class::DataStack".to_string(),
612            file_path: "a.ts".to_string(),
613            entity_type: "class".to_string(),
614            name: "DataStack".to_string(),
615            parent_id: None,
616            content: "class DataStack { constructor() {} genPg() { new } }".to_string(),
617            content_hash: content_hash("class DataStack { constructor() {} genPg() { new } }"),
618            structural_hash: None,
619            start_line: 1,
620            end_line: 10,
621            metadata: None,
622        };
623        let method_after = SemanticEntity {
624            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
625            file_path: "a.ts".to_string(),
626            entity_type: "method".to_string(),
627            name: "genPg".to_string(),
628            parent_id: Some("a.ts::class::DataStack".to_string()),
629            content: "genPg() { new }".to_string(),
630            content_hash: content_hash("genPg() { new }"),
631            structural_hash: None,
632            start_line: 5,
633            end_line: 8,
634            metadata: None,
635        };
636
637        let before = vec![class_before, method_before];
638        let after = vec![class_after, method_after];
639        let result = match_entities(&before, &after, "a.ts", None, None, None);
640
641        // Should only report the method change, not the class
642        assert_eq!(result.changes.len(), 1);
643        assert_eq!(result.changes[0].entity_name, "genPg");
644        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
645    }
646
647    #[test]
648    fn test_parent_not_deduped_when_no_child_changes() {
649        // Only the class-level content changes (e.g. a field added), no method changes
650        let class_before = SemanticEntity {
651            id: "a.ts::class::Foo".to_string(),
652            file_path: "a.ts".to_string(),
653            entity_type: "class".to_string(),
654            name: "Foo".to_string(),
655            parent_id: None,
656            content: "class Foo { bar() {} }".to_string(),
657            content_hash: content_hash("class Foo { bar() {} }"),
658            structural_hash: None,
659            start_line: 1,
660            end_line: 5,
661            metadata: None,
662        };
663        let method_before = SemanticEntity {
664            id: "a.ts::a.ts::class::Foo::bar".to_string(),
665            file_path: "a.ts".to_string(),
666            entity_type: "method".to_string(),
667            name: "bar".to_string(),
668            parent_id: Some("a.ts::class::Foo".to_string()),
669            content: "bar() {}".to_string(),
670            content_hash: content_hash("bar() {}"),
671            structural_hash: None,
672            start_line: 2,
673            end_line: 4,
674            metadata: None,
675        };
676
677        let class_after = SemanticEntity {
678            id: "a.ts::class::Foo".to_string(),
679            file_path: "a.ts".to_string(),
680            entity_type: "class".to_string(),
681            name: "Foo".to_string(),
682            parent_id: None,
683            content: "class Foo { x = 1; bar() {} }".to_string(),
684            content_hash: content_hash("class Foo { x = 1; bar() {} }"),
685            structural_hash: None,
686            start_line: 1,
687            end_line: 6,
688            metadata: None,
689        };
690        let method_after = SemanticEntity {
691            id: "a.ts::a.ts::class::Foo::bar".to_string(),
692            file_path: "a.ts".to_string(),
693            entity_type: "method".to_string(),
694            name: "bar".to_string(),
695            parent_id: Some("a.ts::class::Foo".to_string()),
696            content: "bar() {}".to_string(),
697            content_hash: content_hash("bar() {}"),
698            structural_hash: None,
699            start_line: 3,
700            end_line: 5,
701            metadata: None,
702        };
703
704        let before = vec![class_before, method_before];
705        let after = vec![class_after, method_after];
706        let result = match_entities(&before, &after, "a.ts", None, None, None);
707
708        // Class changed but method didn't, so class should still appear
709        assert_eq!(result.changes.len(), 1);
710        assert_eq!(result.changes[0].entity_name, "Foo");
711        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
712    }
713
714    fn make_entity_at(id: &str, name: &str, content: &str, file_path: &str, line: usize) -> SemanticEntity {
715        SemanticEntity {
716            id: id.to_string(),
717            file_path: file_path.to_string(),
718            entity_type: "function".to_string(),
719            name: name.to_string(),
720            parent_id: None,
721            content: content.to_string(),
722            content_hash: content_hash(content),
723            structural_hash: None,
724            start_line: line,
725            end_line: line + 2,
726            metadata: None,
727        }
728    }
729
730    #[test]
731    fn test_reorder_detection() {
732        let before = vec![
733            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
734            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
735            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 9),
736        ];
737        let after = vec![
738            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
739            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 5),
740            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 9),
741        ];
742        let result = match_entities(&before, &after, "a.rs", None, None, None);
743        assert_eq!(result.changes.len(), 1);
744        assert_eq!(result.changes[0].change_type, ChangeType::Reordered);
745        // Either beta or gamma is marked, LIS picks the minimum
746        assert!(result.changes[0].entity_name == "beta" || result.changes[0].entity_name == "gamma");
747    }
748
749    #[test]
750    fn test_no_reorder_when_order_preserved() {
751        let before = vec![
752            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
753            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
754        ];
755        let after = vec![
756            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
757            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 10),
758        ];
759        let result = match_entities(&before, &after, "a.rs", None, None, None);
760        // Lines shifted but relative order is same, no reorder
761        assert_eq!(result.changes.len(), 0);
762    }
763
764    #[test]
765    fn test_default_similarity() {
766        let a = make_entity("a", "a", "the quick brown fox", "a.ts");
767        let b = make_entity("b", "b", "the quick brown dog", "a.ts");
768        let score = default_similarity(&a, &b);
769        assert!(score > 0.5);
770        assert!(score < 1.0);
771    }
772}