Skip to main content

sem_core/model/
identity.rs

1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6fn parent_name(
7    entity: &SemanticEntity,
8    by_id: &HashMap<&str, &SemanticEntity>,
9) -> Option<String> {
10    let mut parts: Vec<&str> = Vec::new();
11    let mut visited: HashSet<&str> = HashSet::new();
12    let mut pid = entity.parent_id.as_deref()?;
13    loop {
14        if !visited.insert(pid) {
15            break;
16        }
17        match by_id.get(pid) {
18            Some(parent) => {
19                // Skip ancestors with empty names (e.g. JSON's empty-string
20                // root-package key in package-lock.json). The full path is
21                // still recoverable from entity_id; the displayed chain is
22                // for human readability.
23                if !parent.name.is_empty() {
24                    parts.push(parent.name.as_str());
25                }
26                match parent.parent_id.as_deref() {
27                    Some(next) => pid = next,
28                    None => break,
29                }
30            }
31            None => break,
32        }
33    }
34    if parts.is_empty() {
35        return None;
36    }
37    parts.reverse();
38    Some(parts.join("::"))
39}
40
41pub struct MatchResult {
42    pub changes: Vec<SemanticChange>,
43}
44
45fn classify_match(before: &SemanticEntity, after: &SemanticEntity) -> ChangeType {
46    if before.file_path != after.file_path {
47        ChangeType::Moved
48    } else if before.parent_id != after.parent_id {
49        ChangeType::Moved // intra-file scope move (e.g. method moved between classes)
50    } else {
51        ChangeType::Renamed
52    }
53}
54
55fn make_change(
56    after_entity: &SemanticEntity,
57    change_type: ChangeType,
58    before_entity: Option<&SemanticEntity>,
59    commit_sha: Option<&str>,
60    author: Option<&str>,
61    by_id: &HashMap<&str, &SemanticEntity>,
62) -> SemanticChange {
63    let prefix = match change_type {
64        ChangeType::Added => "added::",
65        ChangeType::Deleted => "deleted::",
66        ChangeType::Reordered => "reordered::",
67        _ => "",
68    };
69    // For deleted entities, use the before entity as the primary source
70    let primary = if change_type == ChangeType::Deleted {
71        before_entity.unwrap_or(after_entity)
72    } else {
73        after_entity
74    };
75    SemanticChange {
76        id: format!("change::{prefix}{}", primary.id),
77        entity_id: primary.id.clone(),
78        change_type,
79        entity_type: primary.entity_type.clone(),
80        entity_name: primary.name.clone(),
81        entity_line: primary.start_line,
82        parent_name: parent_name(primary, by_id),
83        file_path: primary.file_path.clone(),
84        old_entity_name: before_entity.and_then(|b| {
85            (b.name != after_entity.name).then(|| b.name.clone())
86        }),
87        old_file_path: before_entity.and_then(|b| {
88            (b.file_path != after_entity.file_path).then(|| b.file_path.clone())
89        }),
90        old_parent_id: before_entity.and_then(|b| {
91            (b.parent_id != after_entity.parent_id).then(|| b.parent_id.clone()).flatten()
92        }),
93        before_content: before_entity.map(|b| b.content.clone()),
94        after_content: if change_type == ChangeType::Deleted || change_type == ChangeType::Reordered {
95            None
96        } else {
97            Some(after_entity.content.clone())
98        },
99        commit_sha: commit_sha.map(String::from),
100        author: author.map(String::from),
101        timestamp: None,
102        structural_change: None,
103    }
104}
105
106/// 3-phase entity matching algorithm:
107/// 1. Exact ID match — same entity ID in before/after → modified or unchanged
108/// 2. Content hash match — same hash, different ID → renamed or moved
109/// 3. Fuzzy similarity — >80% content similarity → probable rename
110pub fn match_entities(
111    before: &[SemanticEntity],
112    after: &[SemanticEntity],
113    _file_path: &str,
114    _similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
115    commit_sha: Option<&str>,
116    author: Option<&str>,
117) -> MatchResult {
118    let mut changes: Vec<SemanticChange> = Vec::new();
119    let mut matched_before: HashSet<&str> = HashSet::new();
120    let mut matched_after: HashSet<&str> = HashSet::new();
121
122    let before_by_id: HashMap<&str, &SemanticEntity> =
123        before.iter().map(|e| (e.id.as_str(), e)).collect();
124    let after_by_id: HashMap<&str, &SemanticEntity> =
125        after.iter().map(|e| (e.id.as_str(), e)).collect();
126
127    // Combined map for ancestor-chain lookup: after takes precedence so the
128    // displayed path reflects the post-change tree for non-deleted entities.
129    let combined_by_id: HashMap<&str, &SemanticEntity> = before
130        .iter()
131        .map(|e| (e.id.as_str(), e))
132        .chain(after.iter().map(|e| (e.id.as_str(), e)))
133        .collect();
134
135    // Phase 1: Exact ID match
136    for (&id, after_entity) in &after_by_id {
137        if let Some(before_entity) = before_by_id.get(id) {
138            matched_before.insert(id);
139            matched_after.insert(id);
140
141            if before_entity.content_hash != after_entity.content_hash {
142                let mut change = make_change(after_entity, ChangeType::Modified, Some(before_entity), commit_sha, author, &combined_by_id);
143                change.structural_change = match (&before_entity.structural_hash, &after_entity.structural_hash) {
144                    (Some(before_sh), Some(after_sh)) => Some(before_sh != after_sh),
145                    _ => None,
146                };
147                changes.push(change);
148            }
149        }
150    }
151
152    // Collect unmatched
153    let unmatched_before: Vec<&SemanticEntity> = before
154        .iter()
155        .filter(|e| !matched_before.contains(e.id.as_str()))
156        .collect();
157    let unmatched_after: Vec<&SemanticEntity> = after
158        .iter()
159        .filter(|e| !matched_after.contains(e.id.as_str()))
160        .collect();
161
162    // Phase 2: Content hash match (rename/move detection)
163    let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
164    let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
165    for entity in &unmatched_before {
166        before_by_hash
167            .entry(entity.content_hash.as_str())
168            .or_default()
169            .push(entity);
170        if let Some(ref sh) = entity.structural_hash {
171            before_by_structural
172                .entry(sh.as_str())
173                .or_default()
174                .push(entity);
175        }
176    }
177
178    for after_entity in &unmatched_after {
179        if matched_after.contains(after_entity.id.as_str()) {
180            continue;
181        }
182        // Try exact content_hash first
183        let found = before_by_hash
184            .get_mut(after_entity.content_hash.as_str())
185            .and_then(|c| c.pop());
186        // Fall back to structural_hash (formatting/comment changes don't matter)
187        let found = found.or_else(|| {
188            after_entity.structural_hash.as_ref().and_then(|sh| {
189                before_by_structural.get_mut(sh.as_str()).and_then(|c| {
190                    c.iter()
191                        .position(|e| !matched_before.contains(e.id.as_str()))
192                        .map(|i| c.remove(i))
193                })
194            })
195        });
196
197        if let Some(before_entity) = found {
198            matched_before.insert(&before_entity.id);
199            matched_after.insert(&after_entity.id);
200
201            // If name, file, and parent are the same, only the parent qualifier in the ID changed
202            // (e.g. parent class was renamed). Skip — the entity itself is unchanged.
203            // But if parent_id differs, this is an intra-file move (e.g. method moved between classes).
204            if before_entity.name == after_entity.name
205                && before_entity.file_path == after_entity.file_path
206                && before_entity.content_hash == after_entity.content_hash
207                && before_entity.parent_id == after_entity.parent_id
208            {
209                continue;
210            }
211
212            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
213        }
214    }
215
216    // Phase 3: Fuzzy similarity (>80% threshold)
217    // Optimized: pre-compute token sets once per entity, group by type
218    let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
219        .iter()
220        .filter(|e| !matched_before.contains(e.id.as_str()))
221        .copied()
222        .collect();
223    let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
224        .iter()
225        .filter(|e| !matched_after.contains(e.id.as_str()))
226        .copied()
227        .collect();
228
229    if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
230        const THRESHOLD: f64 = 0.8;
231        const SIZE_RATIO_CUTOFF: f64 = 0.5;
232
233        // Pre-compute token sets once per entity (N+M instead of N×M allocations)
234        let before_sets: Vec<HashSet<&str>> = still_unmatched_before
235            .iter()
236            .map(|e| e.content.split_whitespace().collect())
237            .collect();
238        let after_sets: Vec<HashSet<&str>> = still_unmatched_after
239            .iter()
240            .map(|e| e.content.split_whitespace().collect())
241            .collect();
242
243        // Group before entities by type: O(sum(n_t × m_t)) instead of O(N×M)
244        let mut before_by_type: HashMap<&str, Vec<usize>> = HashMap::new();
245        for (i, e) in still_unmatched_before.iter().enumerate() {
246            before_by_type
247                .entry(e.entity_type.as_str())
248                .or_default()
249                .push(i);
250        }
251
252        for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
253            let candidates = match before_by_type.get(after_entity.entity_type.as_str()) {
254                Some(indices) => indices,
255                None => continue,
256            };
257
258            let a_set = &after_sets[ai];
259            let a_len = a_set.len();
260            let mut best_idx: Option<usize> = None;
261            let mut best_score: f64 = 0.0;
262
263            for &bi in candidates {
264                if matched_before.contains(still_unmatched_before[bi].id.as_str()) {
265                    continue;
266                }
267
268                let b_set = &before_sets[bi];
269                let b_len = b_set.len();
270
271                // Size ratio filter using pre-computed set lengths
272                let (min_l, max_l) = if a_len < b_len {
273                    (a_len, b_len)
274                } else {
275                    (b_len, a_len)
276                };
277                if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
278                    continue;
279                }
280
281                // Inline Jaccard on pre-computed sets
282                let intersection = a_set.intersection(b_set).count();
283                let union = a_len + b_len - intersection;
284                let score = if union == 0 {
285                    0.0
286                } else {
287                    intersection as f64 / union as f64
288                };
289
290                if score >= THRESHOLD && score > best_score {
291                    best_score = score;
292                    best_idx = Some(bi);
293                }
294            }
295
296            if let Some(bi) = best_idx {
297                let matched = still_unmatched_before[bi];
298                matched_before.insert(&matched.id);
299                matched_after.insert(&after_entity.id);
300
301                // If name, file, and parent are the same, only the parent qualifier changed.
302                if matched.name == after_entity.name
303                    && matched.file_path == after_entity.file_path
304                    && matched.content_hash == after_entity.content_hash
305                    && matched.parent_id == after_entity.parent_id
306                {
307                    continue;
308                }
309
310                changes.push(make_change(after_entity, classify_match(matched, after_entity), Some(matched), commit_sha, author, &combined_by_id));
311            }
312        }
313    }
314
315    // Phase 4: Intra-file reorder detection
316    // For entities that matched by exact ID with identical content (unchanged),
317    // check if their relative ordering changed within the file.
318    detect_reorders(before, after, &matched_before, &matched_after, &mut changes, commit_sha, author, &combined_by_id);
319
320    // Remaining unmatched before = deleted
321    for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
322        changes.push(make_change(entity, ChangeType::Deleted, Some(entity), commit_sha, author, &combined_by_id));
323    }
324
325    // Remaining unmatched after = added
326    for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
327        changes.push(make_change(entity, ChangeType::Added, None, commit_sha, author, &combined_by_id));
328    }
329
330    MatchResult { changes }
331}
332
333/// Default content similarity using Jaccard index on whitespace-split tokens
334pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
335    let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
336    let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
337
338    // Early rejection: if token counts differ too much, Jaccard can't reach 0.8
339    let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
340        (tokens_a.len(), tokens_b.len())
341    } else {
342        (tokens_b.len(), tokens_a.len())
343    };
344    if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
345        return 0.0;
346    }
347
348    let set_a: HashSet<&str> = tokens_a.into_iter().collect();
349    let set_b: HashSet<&str> = tokens_b.into_iter().collect();
350
351    let intersection_size = set_a.intersection(&set_b).count();
352    let union_size = set_a.union(&set_b).count();
353
354    if union_size == 0 {
355        return 0.0;
356    }
357
358    intersection_size as f64 / union_size as f64
359}
360
361/// Detect intra-file reordering of unchanged entities.
362///
363/// Takes entities that matched by exact ID with identical content and checks
364/// if their relative ordering changed. Uses longest increasing subsequence
365/// (LIS) on the "after" positions to find the minimum set of moved entities.
366fn detect_reorders(
367    before: &[SemanticEntity],
368    after: &[SemanticEntity],
369    matched_before: &HashSet<&str>,
370    matched_after: &HashSet<&str>,
371    changes: &mut Vec<SemanticChange>,
372    commit_sha: Option<&str>,
373    author: Option<&str>,
374    by_id: &HashMap<&str, &SemanticEntity>,
375) {
376    // Collect unchanged entities: matched by ID with same content_hash
377    let before_by_id: HashMap<&str, &SemanticEntity> =
378        before.iter().map(|e| (e.id.as_str(), e)).collect();
379
380    // Group by file. For each file, collect unchanged entities in their
381    // before-order, then look up their after-positions.
382    let mut by_file: HashMap<&str, Vec<(&SemanticEntity, &SemanticEntity)>> = HashMap::new();
383    for after_entity in after {
384        if !matched_after.contains(after_entity.id.as_str()) {
385            continue;
386        }
387        if let Some(before_entity) = before_by_id.get(after_entity.id.as_str()) {
388            if !matched_before.contains(before_entity.id.as_str()) {
389                continue;
390            }
391            // Only consider truly unchanged entities (same content)
392            if before_entity.content_hash != after_entity.content_hash {
393                continue;
394            }
395            // Only intra-file
396            if before_entity.file_path != after_entity.file_path {
397                continue;
398            }
399            by_file
400                .entry(after_entity.file_path.as_str())
401                .or_default()
402                .push((before_entity, after_entity));
403        }
404    }
405
406    for (_file, pairs) in &mut by_file {
407        if pairs.len() < 2 {
408            continue;
409        }
410
411        // Sort by before start_line to get the "before" ordering
412        pairs.sort_by_key(|(b, _)| b.start_line);
413
414        // Map to after start_lines in before-order
415        let after_lines: Vec<usize> = pairs.iter().map(|(_, a)| a.start_line).collect();
416
417        // Find LIS indices (entities that stayed in relative order)
418        let lis_set = longest_increasing_subsequence_indices(&after_lines);
419
420        // Entities NOT in LIS were reordered
421        for (i, (_before_entity, after_entity)) in pairs.iter().enumerate() {
422            if lis_set.contains(&i) {
423                continue;
424            }
425            changes.push(make_change(after_entity, ChangeType::Reordered, None, commit_sha, author, by_id));
426        }
427    }
428}
429
430/// Find indices that form the longest increasing subsequence.
431/// Returns a HashSet of indices in the original array that are part of the LIS.
432fn longest_increasing_subsequence_indices(seq: &[usize]) -> HashSet<usize> {
433    let n = seq.len();
434    if n == 0 {
435        return HashSet::new();
436    }
437
438    // tails[i] = index in seq of the smallest tail element for IS of length i+1
439    let mut tails: Vec<usize> = Vec::new();
440    // parent[i] = index of previous element in the LIS ending at seq[i]
441    let mut parent: Vec<Option<usize>> = vec![None; n];
442    // tail_idx[i] = index in seq that tails[i] points to
443    let mut tail_idx: Vec<usize> = Vec::new();
444
445    for i in 0..n {
446        let pos = tails.partition_point(|&t| t < seq[i]);
447        if pos == tails.len() {
448            tails.push(seq[i]);
449            tail_idx.push(i);
450        } else {
451            tails[pos] = seq[i];
452            tail_idx[pos] = i;
453        }
454        parent[i] = if pos > 0 { Some(tail_idx[pos - 1]) } else { None };
455    }
456
457    // Trace back to find actual LIS indices
458    let mut result = HashSet::new();
459    let mut idx = *tail_idx.last().unwrap();
460    result.insert(idx);
461    while let Some(p) = parent[idx] {
462        result.insert(p);
463        idx = p;
464    }
465    result
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471    use crate::utils::hash::content_hash;
472
473    fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
474        SemanticEntity {
475            id: id.to_string(),
476            file_path: file_path.to_string(),
477            entity_type: "function".to_string(),
478            name: name.to_string(),
479            parent_id: None,
480            content: content.to_string(),
481            content_hash: content_hash(content),
482            structural_hash: None,
483            start_line: 1,
484            end_line: 1,
485            metadata: None,
486        }
487    }
488
489    #[test]
490    fn test_exact_match_modified() {
491        let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
492        let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
493        let result = match_entities(&before, &after, "a.ts", None, None, None);
494        assert_eq!(result.changes.len(), 1);
495        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
496    }
497
498    #[test]
499    fn test_exact_match_unchanged() {
500        let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
501        let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
502        let result = match_entities(&before, &after, "a.ts", None, None, None);
503        assert_eq!(result.changes.len(), 0);
504    }
505
506    #[test]
507    fn test_added_deleted() {
508        let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
509        let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
510        let result = match_entities(&before, &after, "a.ts", None, None, None);
511        assert_eq!(result.changes.len(), 2);
512        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
513        assert!(types.contains(&ChangeType::Deleted));
514        assert!(types.contains(&ChangeType::Added));
515    }
516
517    #[test]
518    fn test_content_hash_rename() {
519        let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
520        let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
521        let result = match_entities(&before, &after, "a.ts", None, None, None);
522        assert_eq!(result.changes.len(), 1);
523        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
524    }
525
526    #[test]
527    fn test_parent_child_dedup_class_method() {
528        // Class entity contains the method body in its content.
529        // parent_id stores the full entity ID of the parent.
530        let class_before = SemanticEntity {
531            id: "a.ts::class::DataStack".to_string(),
532            file_path: "a.ts".to_string(),
533            entity_type: "class".to_string(),
534            name: "DataStack".to_string(),
535            parent_id: None,
536            content: "class DataStack { constructor() {} genPg() { old } }".to_string(),
537            content_hash: content_hash("class DataStack { constructor() {} genPg() { old } }"),
538            structural_hash: None,
539            start_line: 1,
540            end_line: 10,
541            metadata: None,
542        };
543        let method_before = SemanticEntity {
544            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
545            file_path: "a.ts".to_string(),
546            entity_type: "method".to_string(),
547            name: "genPg".to_string(),
548            parent_id: Some("a.ts::class::DataStack".to_string()),
549            content: "genPg() { old }".to_string(),
550            content_hash: content_hash("genPg() { old }"),
551            structural_hash: None,
552            start_line: 5,
553            end_line: 8,
554            metadata: None,
555        };
556
557        let class_after = SemanticEntity {
558            id: "a.ts::class::DataStack".to_string(),
559            file_path: "a.ts".to_string(),
560            entity_type: "class".to_string(),
561            name: "DataStack".to_string(),
562            parent_id: None,
563            content: "class DataStack { constructor() {} genPg() { new } }".to_string(),
564            content_hash: content_hash("class DataStack { constructor() {} genPg() { new } }"),
565            structural_hash: None,
566            start_line: 1,
567            end_line: 10,
568            metadata: None,
569        };
570        let method_after = SemanticEntity {
571            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
572            file_path: "a.ts".to_string(),
573            entity_type: "method".to_string(),
574            name: "genPg".to_string(),
575            parent_id: Some("a.ts::class::DataStack".to_string()),
576            content: "genPg() { new }".to_string(),
577            content_hash: content_hash("genPg() { new }"),
578            structural_hash: None,
579            start_line: 5,
580            end_line: 8,
581            metadata: None,
582        };
583
584        let before = vec![class_before, method_before];
585        let after = vec![class_after, method_after];
586        let result = match_entities(&before, &after, "a.ts", None, None, None);
587
588        // match_entities no longer deduplicates — suppression happens in differ.rs.
589        // Both the class and the method are Modified here.
590        assert_eq!(result.changes.len(), 2);
591        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
592        assert!(types.iter().all(|t| *t == ChangeType::Modified));
593    }
594
595    #[test]
596    fn test_parent_not_deduped_when_no_child_changes() {
597        // Only the class-level content changes (e.g. a field added), no method changes
598        let class_before = SemanticEntity {
599            id: "a.ts::class::Foo".to_string(),
600            file_path: "a.ts".to_string(),
601            entity_type: "class".to_string(),
602            name: "Foo".to_string(),
603            parent_id: None,
604            content: "class Foo { bar() {} }".to_string(),
605            content_hash: content_hash("class Foo { bar() {} }"),
606            structural_hash: None,
607            start_line: 1,
608            end_line: 5,
609            metadata: None,
610        };
611        let method_before = SemanticEntity {
612            id: "a.ts::a.ts::class::Foo::bar".to_string(),
613            file_path: "a.ts".to_string(),
614            entity_type: "method".to_string(),
615            name: "bar".to_string(),
616            parent_id: Some("a.ts::class::Foo".to_string()),
617            content: "bar() {}".to_string(),
618            content_hash: content_hash("bar() {}"),
619            structural_hash: None,
620            start_line: 2,
621            end_line: 4,
622            metadata: None,
623        };
624
625        let class_after = SemanticEntity {
626            id: "a.ts::class::Foo".to_string(),
627            file_path: "a.ts".to_string(),
628            entity_type: "class".to_string(),
629            name: "Foo".to_string(),
630            parent_id: None,
631            content: "class Foo { x = 1; bar() {} }".to_string(),
632            content_hash: content_hash("class Foo { x = 1; bar() {} }"),
633            structural_hash: None,
634            start_line: 1,
635            end_line: 6,
636            metadata: None,
637        };
638        let method_after = SemanticEntity {
639            id: "a.ts::a.ts::class::Foo::bar".to_string(),
640            file_path: "a.ts".to_string(),
641            entity_type: "method".to_string(),
642            name: "bar".to_string(),
643            parent_id: Some("a.ts::class::Foo".to_string()),
644            content: "bar() {}".to_string(),
645            content_hash: content_hash("bar() {}"),
646            structural_hash: None,
647            start_line: 3,
648            end_line: 5,
649            metadata: None,
650        };
651
652        let before = vec![class_before, method_before];
653        let after = vec![class_after, method_after];
654        let result = match_entities(&before, &after, "a.ts", None, None, None);
655
656        // Class changed but method didn't, so class should still appear
657        assert_eq!(result.changes.len(), 1);
658        assert_eq!(result.changes[0].entity_name, "Foo");
659        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
660    }
661
662    fn make_entity_with_parent(id: &str, name: &str, content: &str, file_path: &str, parent_id: Option<&str>) -> SemanticEntity {
663        SemanticEntity {
664            id: id.to_string(),
665            file_path: file_path.to_string(),
666            entity_type: "method".to_string(),
667            name: name.to_string(),
668            parent_id: parent_id.map(String::from),
669            content: content.to_string(),
670            content_hash: content_hash(content),
671            structural_hash: None,
672            start_line: 1,
673            end_line: 1,
674            metadata: None,
675        }
676    }
677
678    #[test]
679    fn test_intra_file_move_between_classes() {
680        // Method moves from ClassA to ClassB in the same file
681        let before = vec![make_entity_with_parent(
682            "a.rs::class::ClassA::foo", "foo", "fn foo() { do_thing() }",
683            "a.rs", Some("a.rs::class::ClassA"),
684        )];
685        let after = vec![make_entity_with_parent(
686            "a.rs::class::ClassB::foo", "foo", "fn foo() { do_thing() }",
687            "a.rs", Some("a.rs::class::ClassB"),
688        )];
689        let result = match_entities(&before, &after, "a.rs", None, None, None);
690        assert_eq!(result.changes.len(), 1);
691        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
692        assert_eq!(result.changes[0].old_parent_id, Some("a.rs::class::ClassA".to_string()));
693    }
694
695    #[test]
696    fn test_same_parent_is_rename_not_move() {
697        // Same parent, different name = rename (not move)
698        // Content must be identical (same hash) so Phase 2 catches it
699        let body = "fn method(&self) { let x = self.compute(); self.validate(x); self.store(x) }";
700        let before = vec![make_entity_with_parent(
701            "a.rs::class::Foo::old_method", "old_method", body,
702            "a.rs", Some("a.rs::class::Foo"),
703        )];
704        let after = vec![make_entity_with_parent(
705            "a.rs::class::Foo::new_method", "new_method", body,
706            "a.rs", Some("a.rs::class::Foo"),
707        )];
708        let result = match_entities(&before, &after, "a.rs", None, None, None);
709        assert_eq!(result.changes.len(), 1);
710        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
711        assert!(result.changes[0].old_parent_id.is_none());
712    }
713
714    fn make_entity_at(id: &str, name: &str, content: &str, file_path: &str, line: usize) -> SemanticEntity {
715        SemanticEntity {
716            id: id.to_string(),
717            file_path: file_path.to_string(),
718            entity_type: "function".to_string(),
719            name: name.to_string(),
720            parent_id: None,
721            content: content.to_string(),
722            content_hash: content_hash(content),
723            structural_hash: None,
724            start_line: line,
725            end_line: line + 2,
726            metadata: None,
727        }
728    }
729
730    #[test]
731    fn test_reorder_detection() {
732        let before = vec![
733            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
734            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
735            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 9),
736        ];
737        let after = vec![
738            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
739            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 5),
740            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 9),
741        ];
742        let result = match_entities(&before, &after, "a.rs", None, None, None);
743        assert_eq!(result.changes.len(), 1);
744        assert_eq!(result.changes[0].change_type, ChangeType::Reordered);
745        // Either beta or gamma is marked, LIS picks the minimum
746        assert!(result.changes[0].entity_name == "beta" || result.changes[0].entity_name == "gamma");
747    }
748
749    #[test]
750    fn test_no_reorder_when_order_preserved() {
751        let before = vec![
752            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
753            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
754        ];
755        let after = vec![
756            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
757            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 10),
758        ];
759        let result = match_entities(&before, &after, "a.rs", None, None, None);
760        // Lines shifted but relative order is same, no reorder
761        assert_eq!(result.changes.len(), 0);
762    }
763
764    #[test]
765    fn test_default_similarity() {
766        let a = make_entity("a", "a", "the quick brown fox", "a.ts");
767        let b = make_entity("b", "b", "the quick brown dog", "a.ts");
768        let score = default_similarity(&a, &b);
769        assert!(score > 0.5);
770        assert!(score < 1.0);
771    }
772
773    #[test]
774    fn parent_name_terminates_on_cyclic_parent_id() {
775        // Two entities whose parent_id chains form a cycle. parent_name
776        // would loop forever without the visited-set guard.
777        let a = make_entity_with_parent("A", "A", "", "f", Some("B"));
778        let b = make_entity_with_parent("B", "B", "", "f", Some("A"));
779        let mut by_id: HashMap<&str, &SemanticEntity> = HashMap::new();
780        by_id.insert("A", &a);
781        by_id.insert("B", &b);
782        // Synthesize a leaf whose parent_id enters the cycle via A.
783        let leaf = make_entity_with_parent("L", "L", "", "f", Some("A"));
784        let chain = parent_name(&leaf, &by_id);
785        // Must terminate. We don't assert exact contents — order/composition
786        // depends on which side of the cycle is reached first; the safety
787        // property is "this returns at all."
788        assert!(chain.is_some());
789    }
790}