Skip to main content

sem_core/model/
identity.rs

1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6fn parent_name(
7    entity: &SemanticEntity,
8    by_id: &HashMap<&str, &SemanticEntity>,
9) -> Option<String> {
10    let mut parts: Vec<&str> = Vec::new();
11    let mut visited: HashSet<&str> = HashSet::new();
12    let mut pid = entity.parent_id.as_deref()?;
13    loop {
14        if !visited.insert(pid) {
15            break;
16        }
17        match by_id.get(pid) {
18            Some(parent) => {
19                // Skip ancestors with empty names (e.g. JSON's empty-string
20                // root-package key in package-lock.json). The full path is
21                // still recoverable from entity_id; the displayed chain is
22                // for human readability.
23                if !parent.name.is_empty() {
24                    parts.push(parent.name.as_str());
25                }
26                match parent.parent_id.as_deref() {
27                    Some(next) => pid = next,
28                    None => break,
29                }
30            }
31            None => break,
32        }
33    }
34    if parts.is_empty() {
35        return None;
36    }
37    parts.reverse();
38    Some(parts.join("::"))
39}
40
41pub struct MatchResult {
42    pub changes: Vec<SemanticChange>,
43}
44
45fn classify_match(before: &SemanticEntity, after: &SemanticEntity) -> ChangeType {
46    if before.file_path != after.file_path {
47        ChangeType::Moved
48    } else if before.parent_id != after.parent_id {
49        ChangeType::Moved // intra-file scope move (e.g. method moved between classes)
50    } else {
51        ChangeType::Renamed
52    }
53}
54
55fn same_signature_across_file_rename(
56    before: &SemanticEntity,
57    after: &SemanticEntity,
58    before_by_id: &HashMap<&str, &SemanticEntity>,
59    after_by_id: &HashMap<&str, &SemanticEntity>,
60) -> bool {
61    before.file_path != after.file_path
62        && before.entity_type == after.entity_type
63        && before.name == after.name
64        && parent_name(before, before_by_id) == parent_name(after, after_by_id)
65}
66
67fn make_change(
68    after_entity: &SemanticEntity,
69    change_type: ChangeType,
70    before_entity: Option<&SemanticEntity>,
71    commit_sha: Option<&str>,
72    author: Option<&str>,
73    by_id: &HashMap<&str, &SemanticEntity>,
74) -> SemanticChange {
75    let prefix = match change_type {
76        ChangeType::Added => "added::",
77        ChangeType::Deleted => "deleted::",
78        ChangeType::Reordered => "reordered::",
79        _ => "",
80    };
81    // For deleted entities, use the before entity as the primary source
82    let primary = if change_type == ChangeType::Deleted {
83        before_entity.unwrap_or(after_entity)
84    } else {
85        after_entity
86    };
87    SemanticChange {
88        id: format!("change::{prefix}{}", primary.id),
89        entity_id: primary.id.clone(),
90        change_type,
91        entity_type: primary.entity_type.clone(),
92        entity_name: primary.name.clone(),
93        entity_line: primary.start_line,
94        parent_name: parent_name(primary, by_id),
95        file_path: primary.file_path.clone(),
96        old_entity_name: before_entity.and_then(|b| {
97            (b.name != after_entity.name).then(|| b.name.clone())
98        }),
99        old_file_path: before_entity.and_then(|b| {
100            (b.file_path != after_entity.file_path).then(|| b.file_path.clone())
101        }),
102        old_parent_id: before_entity.and_then(|b| {
103            (b.parent_id != after_entity.parent_id).then(|| b.parent_id.clone()).flatten()
104        }),
105        before_content: before_entity.map(|b| b.content.clone()),
106        after_content: if change_type == ChangeType::Deleted || change_type == ChangeType::Reordered {
107            None
108        } else {
109            Some(after_entity.content.clone())
110        },
111        commit_sha: commit_sha.map(String::from),
112        author: author.map(String::from),
113        timestamp: None,
114        structural_change: None,
115    }
116}
117
118/// Entity matching algorithm:
119/// 1. Exact ID match — same entity ID in before/after → modified or unchanged
120/// 2. Content hash match — same hash, different ID → renamed or moved
121/// 3. Same signature across file rename → moved, even if content changed
122/// 4. Fuzzy similarity — >80% content similarity → probable rename
123pub fn match_entities(
124    before: &[SemanticEntity],
125    after: &[SemanticEntity],
126    _file_path: &str,
127    similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
128    commit_sha: Option<&str>,
129    author: Option<&str>,
130) -> MatchResult {
131    let mut changes: Vec<SemanticChange> = Vec::new();
132    let mut matched_before: HashSet<&str> = HashSet::new();
133    let mut matched_after: HashSet<&str> = HashSet::new();
134
135    let before_by_id: HashMap<&str, &SemanticEntity> =
136        before.iter().map(|e| (e.id.as_str(), e)).collect();
137    let after_by_id: HashMap<&str, &SemanticEntity> =
138        after.iter().map(|e| (e.id.as_str(), e)).collect();
139
140    // Combined map for ancestor-chain lookup: after takes precedence so the
141    // displayed path reflects the post-change tree for non-deleted entities.
142    let combined_by_id: HashMap<&str, &SemanticEntity> = before
143        .iter()
144        .map(|e| (e.id.as_str(), e))
145        .chain(after.iter().map(|e| (e.id.as_str(), e)))
146        .collect();
147
148    // Phase 1: Exact ID match
149    for (&id, after_entity) in &after_by_id {
150        if let Some(before_entity) = before_by_id.get(id) {
151            matched_before.insert(id);
152            matched_after.insert(id);
153
154            if before_entity.content_hash != after_entity.content_hash {
155                let mut change = make_change(after_entity, ChangeType::Modified, Some(before_entity), commit_sha, author, &combined_by_id);
156                change.structural_change = match (&before_entity.structural_hash, &after_entity.structural_hash) {
157                    (Some(before_sh), Some(after_sh)) => Some(before_sh != after_sh),
158                    _ => None,
159                };
160                changes.push(change);
161            }
162        }
163    }
164
165    // Collect unmatched
166    let unmatched_before: Vec<&SemanticEntity> = before
167        .iter()
168        .filter(|e| !matched_before.contains(e.id.as_str()))
169        .collect();
170    let unmatched_after: Vec<&SemanticEntity> = after
171        .iter()
172        .filter(|e| !matched_after.contains(e.id.as_str()))
173        .collect();
174
175    // Phase 2: Content hash match (rename/move detection)
176    let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
177    let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
178    for entity in &unmatched_before {
179        before_by_hash
180            .entry(entity.content_hash.as_str())
181            .or_default()
182            .push(entity);
183        if let Some(ref sh) = entity.structural_hash {
184            before_by_structural
185                .entry(sh.as_str())
186                .or_default()
187                .push(entity);
188        }
189    }
190
191    for after_entity in &unmatched_after {
192        if matched_after.contains(after_entity.id.as_str()) {
193            continue;
194        }
195        // Try exact content_hash first
196        let found = before_by_hash
197            .get_mut(after_entity.content_hash.as_str())
198            .and_then(|c| c.pop());
199        // Fall back to structural_hash (formatting/comment changes don't matter)
200        let found = found.or_else(|| {
201            after_entity.structural_hash.as_ref().and_then(|sh| {
202                before_by_structural.get_mut(sh.as_str()).and_then(|c| {
203                    c.iter()
204                        .position(|e| !matched_before.contains(e.id.as_str()))
205                        .map(|i| c.remove(i))
206                })
207            })
208        });
209
210        if let Some(before_entity) = found {
211            matched_before.insert(&before_entity.id);
212            matched_after.insert(&after_entity.id);
213
214            // If name, file, and parent are the same, only the parent qualifier in the ID changed
215            // (e.g. parent class was renamed). Skip — the entity itself is unchanged.
216            // But if parent_id differs, this is an intra-file move (e.g. method moved between classes).
217            if before_entity.name == after_entity.name
218                && before_entity.file_path == after_entity.file_path
219                && before_entity.content_hash == after_entity.content_hash
220                && before_entity.parent_id == after_entity.parent_id
221            {
222                continue;
223            }
224
225            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
226        }
227    }
228
229    // Phase 3: Same logical signature across a file rename.
230    // A file path change changes entity IDs, so renamed files with edited
231    // entities need a signature fallback to avoid add/delete pairs.
232    for after_entity in &unmatched_after {
233        if matched_after.contains(after_entity.id.as_str()) {
234            continue;
235        }
236
237        let mut best_match: Option<&SemanticEntity> = None;
238        let mut best_score = f64::NEG_INFINITY;
239
240        for before_entity in &unmatched_before {
241            if matched_before.contains(before_entity.id.as_str()) {
242                continue;
243            }
244            if !same_signature_across_file_rename(before_entity, after_entity, &before_by_id, &after_by_id) {
245                continue;
246            }
247
248            let score = similarity_fn
249                .map(|f| f(before_entity, after_entity))
250                .unwrap_or_else(|| default_similarity(before_entity, after_entity));
251            if score > best_score {
252                best_score = score;
253                best_match = Some(before_entity);
254            }
255        }
256
257        if let Some(before_entity) = best_match {
258            matched_before.insert(&before_entity.id);
259            matched_after.insert(&after_entity.id);
260            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
261        }
262    }
263
264    // Phase 4: Fuzzy similarity (>80% threshold)
265    // Optimized: pre-compute token sets once per entity, group by type
266    let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
267        .iter()
268        .filter(|e| !matched_before.contains(e.id.as_str()))
269        .copied()
270        .collect();
271    let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
272        .iter()
273        .filter(|e| !matched_after.contains(e.id.as_str()))
274        .copied()
275        .collect();
276
277    if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
278        const THRESHOLD: f64 = 0.8;
279        const SIZE_RATIO_CUTOFF: f64 = 0.5;
280
281        // Pre-compute token sets once per entity (N+M instead of N×M allocations)
282        let before_sets: Vec<HashSet<&str>> = still_unmatched_before
283            .iter()
284            .map(|e| e.content.split_whitespace().collect())
285            .collect();
286        let after_sets: Vec<HashSet<&str>> = still_unmatched_after
287            .iter()
288            .map(|e| e.content.split_whitespace().collect())
289            .collect();
290
291        // Group before entities by type: O(sum(n_t × m_t)) instead of O(N×M)
292        let mut before_by_type: HashMap<&str, Vec<usize>> = HashMap::new();
293        for (i, e) in still_unmatched_before.iter().enumerate() {
294            before_by_type
295                .entry(e.entity_type.as_str())
296                .or_default()
297                .push(i);
298        }
299
300        for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
301            let candidates = match before_by_type.get(after_entity.entity_type.as_str()) {
302                Some(indices) => indices,
303                None => continue,
304            };
305
306            let a_set = &after_sets[ai];
307            let a_len = a_set.len();
308            let mut best_idx: Option<usize> = None;
309            let mut best_score: f64 = 0.0;
310
311            for &bi in candidates {
312                if matched_before.contains(still_unmatched_before[bi].id.as_str()) {
313                    continue;
314                }
315
316                let b_set = &before_sets[bi];
317                let b_len = b_set.len();
318
319                // Size ratio filter using pre-computed set lengths
320                let (min_l, max_l) = if a_len < b_len {
321                    (a_len, b_len)
322                } else {
323                    (b_len, a_len)
324                };
325                if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
326                    continue;
327                }
328
329                // Inline Jaccard on pre-computed sets
330                let intersection = a_set.intersection(b_set).count();
331                let union = a_len + b_len - intersection;
332                let score = if union == 0 {
333                    0.0
334                } else {
335                    intersection as f64 / union as f64
336                };
337
338                if score >= THRESHOLD && score > best_score {
339                    best_score = score;
340                    best_idx = Some(bi);
341                }
342            }
343
344            if let Some(bi) = best_idx {
345                let matched = still_unmatched_before[bi];
346                matched_before.insert(&matched.id);
347                matched_after.insert(&after_entity.id);
348
349                // If name, file, and parent are the same, only the parent qualifier changed.
350                if matched.name == after_entity.name
351                    && matched.file_path == after_entity.file_path
352                    && matched.content_hash == after_entity.content_hash
353                    && matched.parent_id == after_entity.parent_id
354                {
355                    continue;
356                }
357
358                changes.push(make_change(after_entity, classify_match(matched, after_entity), Some(matched), commit_sha, author, &combined_by_id));
359            }
360        }
361    }
362
363    // Phase 5: Intra-file reorder detection
364    // For entities that matched by exact ID with identical content (unchanged),
365    // check if their relative ordering changed within the file.
366    detect_reorders(before, after, &matched_before, &matched_after, &mut changes, commit_sha, author, &combined_by_id);
367
368    // Remaining unmatched before = deleted
369    for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
370        changes.push(make_change(entity, ChangeType::Deleted, Some(entity), commit_sha, author, &combined_by_id));
371    }
372
373    // Remaining unmatched after = added
374    for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
375        changes.push(make_change(entity, ChangeType::Added, None, commit_sha, author, &combined_by_id));
376    }
377
378    MatchResult { changes }
379}
380
381/// Default content similarity using Jaccard index on whitespace-split tokens
382pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
383    let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
384    let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
385
386    // Early rejection: if token counts differ too much, Jaccard can't reach 0.8
387    let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
388        (tokens_a.len(), tokens_b.len())
389    } else {
390        (tokens_b.len(), tokens_a.len())
391    };
392    if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
393        return 0.0;
394    }
395
396    let set_a: HashSet<&str> = tokens_a.into_iter().collect();
397    let set_b: HashSet<&str> = tokens_b.into_iter().collect();
398
399    let intersection_size = set_a.intersection(&set_b).count();
400    let union_size = set_a.union(&set_b).count();
401
402    if union_size == 0 {
403        return 0.0;
404    }
405
406    intersection_size as f64 / union_size as f64
407}
408
409/// Detect intra-file reordering of unchanged entities.
410///
411/// Takes entities that matched by exact ID with identical content and checks
412/// if their relative ordering changed. Uses longest increasing subsequence
413/// (LIS) on the "after" positions to find the minimum set of moved entities.
414fn detect_reorders(
415    before: &[SemanticEntity],
416    after: &[SemanticEntity],
417    matched_before: &HashSet<&str>,
418    matched_after: &HashSet<&str>,
419    changes: &mut Vec<SemanticChange>,
420    commit_sha: Option<&str>,
421    author: Option<&str>,
422    by_id: &HashMap<&str, &SemanticEntity>,
423) {
424    // Collect unchanged entities: matched by ID with same content_hash
425    let before_by_id: HashMap<&str, &SemanticEntity> =
426        before.iter().map(|e| (e.id.as_str(), e)).collect();
427
428    // Group by file. For each file, collect unchanged entities in their
429    // before-order, then look up their after-positions.
430    let mut by_file: HashMap<&str, Vec<(&SemanticEntity, &SemanticEntity)>> = HashMap::new();
431    for after_entity in after {
432        if !matched_after.contains(after_entity.id.as_str()) {
433            continue;
434        }
435        if let Some(before_entity) = before_by_id.get(after_entity.id.as_str()) {
436            if !matched_before.contains(before_entity.id.as_str()) {
437                continue;
438            }
439            // Only consider truly unchanged entities (same content)
440            if before_entity.content_hash != after_entity.content_hash {
441                continue;
442            }
443            // Only intra-file
444            if before_entity.file_path != after_entity.file_path {
445                continue;
446            }
447            by_file
448                .entry(after_entity.file_path.as_str())
449                .or_default()
450                .push((before_entity, after_entity));
451        }
452    }
453
454    for (_file, pairs) in &mut by_file {
455        if pairs.len() < 2 {
456            continue;
457        }
458
459        // Sort by before start_line to get the "before" ordering
460        pairs.sort_by_key(|(b, _)| b.start_line);
461
462        // Map to after start_lines in before-order
463        let after_lines: Vec<usize> = pairs.iter().map(|(_, a)| a.start_line).collect();
464
465        // Find LIS indices (entities that stayed in relative order)
466        let lis_set = longest_increasing_subsequence_indices(&after_lines);
467
468        // Entities NOT in LIS were reordered
469        for (i, (_before_entity, after_entity)) in pairs.iter().enumerate() {
470            if lis_set.contains(&i) {
471                continue;
472            }
473            changes.push(make_change(after_entity, ChangeType::Reordered, None, commit_sha, author, by_id));
474        }
475    }
476}
477
478/// Find indices that form the longest increasing subsequence.
479/// Returns a HashSet of indices in the original array that are part of the LIS.
480fn longest_increasing_subsequence_indices(seq: &[usize]) -> HashSet<usize> {
481    let n = seq.len();
482    if n == 0 {
483        return HashSet::new();
484    }
485
486    // tails[i] = index in seq of the smallest tail element for IS of length i+1
487    let mut tails: Vec<usize> = Vec::new();
488    // parent[i] = index of previous element in the LIS ending at seq[i]
489    let mut parent: Vec<Option<usize>> = vec![None; n];
490    // tail_idx[i] = index in seq that tails[i] points to
491    let mut tail_idx: Vec<usize> = Vec::new();
492
493    for i in 0..n {
494        let pos = tails.partition_point(|&t| t < seq[i]);
495        if pos == tails.len() {
496            tails.push(seq[i]);
497            tail_idx.push(i);
498        } else {
499            tails[pos] = seq[i];
500            tail_idx[pos] = i;
501        }
502        parent[i] = if pos > 0 { Some(tail_idx[pos - 1]) } else { None };
503    }
504
505    // Trace back to find actual LIS indices
506    let mut result = HashSet::new();
507    let mut idx = *tail_idx.last().unwrap();
508    result.insert(idx);
509    while let Some(p) = parent[idx] {
510        result.insert(p);
511        idx = p;
512    }
513    result
514}
515
516#[cfg(test)]
517mod tests {
518    use super::*;
519    use crate::utils::hash::content_hash;
520
521    fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
522        SemanticEntity {
523            id: id.to_string(),
524            file_path: file_path.to_string(),
525            entity_type: "function".to_string(),
526            name: name.to_string(),
527            parent_id: None,
528            content: content.to_string(),
529            content_hash: content_hash(content),
530            structural_hash: None,
531            start_line: 1,
532            end_line: 1,
533            metadata: None,
534        }
535    }
536
537    #[test]
538    fn test_exact_match_modified() {
539        let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
540        let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
541        let result = match_entities(&before, &after, "a.ts", None, None, None);
542        assert_eq!(result.changes.len(), 1);
543        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
544    }
545
546    #[test]
547    fn test_exact_match_unchanged() {
548        let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
549        let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
550        let result = match_entities(&before, &after, "a.ts", None, None, None);
551        assert_eq!(result.changes.len(), 0);
552    }
553
554    #[test]
555    fn test_added_deleted() {
556        let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
557        let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
558        let result = match_entities(&before, &after, "a.ts", None, None, None);
559        assert_eq!(result.changes.len(), 2);
560        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
561        assert!(types.contains(&ChangeType::Deleted));
562        assert!(types.contains(&ChangeType::Added));
563    }
564
565    #[test]
566    fn test_content_hash_rename() {
567        let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
568        let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
569        let result = match_entities(&before, &after, "a.ts", None, None, None);
570        assert_eq!(result.changes.len(), 1);
571        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
572    }
573
574    #[test]
575    fn test_same_signature_file_rename_with_content_change_is_moved() {
576        let before = vec![make_entity(
577            "old.ts::function::foo",
578            "foo",
579            "export function foo() { return alpha + beta + gamma; }",
580            "old.ts",
581        )];
582        let after = vec![make_entity(
583            "new.ts::function::foo",
584            "foo",
585            "export function foo() { return one + two + three; }",
586            "new.ts",
587        )];
588
589        let result = match_entities(&before, &after, "new.ts", None, None, None);
590
591        assert_eq!(result.changes.len(), 1);
592        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
593        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
594    }
595
596    #[test]
597    fn test_parent_child_dedup_class_method() {
598        // Class entity contains the method body in its content.
599        // parent_id stores the full entity ID of the parent.
600        let class_before = SemanticEntity {
601            id: "a.ts::class::DataStack".to_string(),
602            file_path: "a.ts".to_string(),
603            entity_type: "class".to_string(),
604            name: "DataStack".to_string(),
605            parent_id: None,
606            content: "class DataStack { constructor() {} genPg() { old } }".to_string(),
607            content_hash: content_hash("class DataStack { constructor() {} genPg() { old } }"),
608            structural_hash: None,
609            start_line: 1,
610            end_line: 10,
611            metadata: None,
612        };
613        let method_before = SemanticEntity {
614            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
615            file_path: "a.ts".to_string(),
616            entity_type: "method".to_string(),
617            name: "genPg".to_string(),
618            parent_id: Some("a.ts::class::DataStack".to_string()),
619            content: "genPg() { old }".to_string(),
620            content_hash: content_hash("genPg() { old }"),
621            structural_hash: None,
622            start_line: 5,
623            end_line: 8,
624            metadata: None,
625        };
626
627        let class_after = SemanticEntity {
628            id: "a.ts::class::DataStack".to_string(),
629            file_path: "a.ts".to_string(),
630            entity_type: "class".to_string(),
631            name: "DataStack".to_string(),
632            parent_id: None,
633            content: "class DataStack { constructor() {} genPg() { new } }".to_string(),
634            content_hash: content_hash("class DataStack { constructor() {} genPg() { new } }"),
635            structural_hash: None,
636            start_line: 1,
637            end_line: 10,
638            metadata: None,
639        };
640        let method_after = SemanticEntity {
641            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
642            file_path: "a.ts".to_string(),
643            entity_type: "method".to_string(),
644            name: "genPg".to_string(),
645            parent_id: Some("a.ts::class::DataStack".to_string()),
646            content: "genPg() { new }".to_string(),
647            content_hash: content_hash("genPg() { new }"),
648            structural_hash: None,
649            start_line: 5,
650            end_line: 8,
651            metadata: None,
652        };
653
654        let before = vec![class_before, method_before];
655        let after = vec![class_after, method_after];
656        let result = match_entities(&before, &after, "a.ts", None, None, None);
657
658        // match_entities no longer deduplicates — suppression happens in differ.rs.
659        // Both the class and the method are Modified here.
660        assert_eq!(result.changes.len(), 2);
661        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
662        assert!(types.iter().all(|t| *t == ChangeType::Modified));
663    }
664
665    #[test]
666    fn test_parent_not_deduped_when_no_child_changes() {
667        // Only the class-level content changes (e.g. a field added), no method changes
668        let class_before = SemanticEntity {
669            id: "a.ts::class::Foo".to_string(),
670            file_path: "a.ts".to_string(),
671            entity_type: "class".to_string(),
672            name: "Foo".to_string(),
673            parent_id: None,
674            content: "class Foo { bar() {} }".to_string(),
675            content_hash: content_hash("class Foo { bar() {} }"),
676            structural_hash: None,
677            start_line: 1,
678            end_line: 5,
679            metadata: None,
680        };
681        let method_before = SemanticEntity {
682            id: "a.ts::a.ts::class::Foo::bar".to_string(),
683            file_path: "a.ts".to_string(),
684            entity_type: "method".to_string(),
685            name: "bar".to_string(),
686            parent_id: Some("a.ts::class::Foo".to_string()),
687            content: "bar() {}".to_string(),
688            content_hash: content_hash("bar() {}"),
689            structural_hash: None,
690            start_line: 2,
691            end_line: 4,
692            metadata: None,
693        };
694
695        let class_after = SemanticEntity {
696            id: "a.ts::class::Foo".to_string(),
697            file_path: "a.ts".to_string(),
698            entity_type: "class".to_string(),
699            name: "Foo".to_string(),
700            parent_id: None,
701            content: "class Foo { x = 1; bar() {} }".to_string(),
702            content_hash: content_hash("class Foo { x = 1; bar() {} }"),
703            structural_hash: None,
704            start_line: 1,
705            end_line: 6,
706            metadata: None,
707        };
708        let method_after = SemanticEntity {
709            id: "a.ts::a.ts::class::Foo::bar".to_string(),
710            file_path: "a.ts".to_string(),
711            entity_type: "method".to_string(),
712            name: "bar".to_string(),
713            parent_id: Some("a.ts::class::Foo".to_string()),
714            content: "bar() {}".to_string(),
715            content_hash: content_hash("bar() {}"),
716            structural_hash: None,
717            start_line: 3,
718            end_line: 5,
719            metadata: None,
720        };
721
722        let before = vec![class_before, method_before];
723        let after = vec![class_after, method_after];
724        let result = match_entities(&before, &after, "a.ts", None, None, None);
725
726        // Class changed but method didn't, so class should still appear
727        assert_eq!(result.changes.len(), 1);
728        assert_eq!(result.changes[0].entity_name, "Foo");
729        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
730    }
731
732    fn make_entity_with_parent(id: &str, name: &str, content: &str, file_path: &str, parent_id: Option<&str>) -> SemanticEntity {
733        SemanticEntity {
734            id: id.to_string(),
735            file_path: file_path.to_string(),
736            entity_type: "method".to_string(),
737            name: name.to_string(),
738            parent_id: parent_id.map(String::from),
739            content: content.to_string(),
740            content_hash: content_hash(content),
741            structural_hash: None,
742            start_line: 1,
743            end_line: 1,
744            metadata: None,
745        }
746    }
747
748    #[test]
749    fn test_intra_file_move_between_classes() {
750        // Method moves from ClassA to ClassB in the same file
751        let before = vec![make_entity_with_parent(
752            "a.rs::class::ClassA::foo", "foo", "fn foo() { do_thing() }",
753            "a.rs", Some("a.rs::class::ClassA"),
754        )];
755        let after = vec![make_entity_with_parent(
756            "a.rs::class::ClassB::foo", "foo", "fn foo() { do_thing() }",
757            "a.rs", Some("a.rs::class::ClassB"),
758        )];
759        let result = match_entities(&before, &after, "a.rs", None, None, None);
760        assert_eq!(result.changes.len(), 1);
761        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
762        assert_eq!(result.changes[0].old_parent_id, Some("a.rs::class::ClassA".to_string()));
763    }
764
765    #[test]
766    fn test_same_parent_is_rename_not_move() {
767        // Same parent, different name = rename (not move)
768        // Content must be identical (same hash) so Phase 2 catches it
769        let body = "fn method(&self) { let x = self.compute(); self.validate(x); self.store(x) }";
770        let before = vec![make_entity_with_parent(
771            "a.rs::class::Foo::old_method", "old_method", body,
772            "a.rs", Some("a.rs::class::Foo"),
773        )];
774        let after = vec![make_entity_with_parent(
775            "a.rs::class::Foo::new_method", "new_method", body,
776            "a.rs", Some("a.rs::class::Foo"),
777        )];
778        let result = match_entities(&before, &after, "a.rs", None, None, None);
779        assert_eq!(result.changes.len(), 1);
780        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
781        assert!(result.changes[0].old_parent_id.is_none());
782    }
783
784    fn make_entity_at(id: &str, name: &str, content: &str, file_path: &str, line: usize) -> SemanticEntity {
785        SemanticEntity {
786            id: id.to_string(),
787            file_path: file_path.to_string(),
788            entity_type: "function".to_string(),
789            name: name.to_string(),
790            parent_id: None,
791            content: content.to_string(),
792            content_hash: content_hash(content),
793            structural_hash: None,
794            start_line: line,
795            end_line: line + 2,
796            metadata: None,
797        }
798    }
799
800    #[test]
801    fn test_reorder_detection() {
802        let before = vec![
803            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
804            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
805            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 9),
806        ];
807        let after = vec![
808            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
809            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 5),
810            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 9),
811        ];
812        let result = match_entities(&before, &after, "a.rs", None, None, None);
813        assert_eq!(result.changes.len(), 1);
814        assert_eq!(result.changes[0].change_type, ChangeType::Reordered);
815        // Either beta or gamma is marked, LIS picks the minimum
816        assert!(result.changes[0].entity_name == "beta" || result.changes[0].entity_name == "gamma");
817    }
818
819    #[test]
820    fn test_no_reorder_when_order_preserved() {
821        let before = vec![
822            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
823            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
824        ];
825        let after = vec![
826            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
827            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 10),
828        ];
829        let result = match_entities(&before, &after, "a.rs", None, None, None);
830        // Lines shifted but relative order is same, no reorder
831        assert_eq!(result.changes.len(), 0);
832    }
833
834    #[test]
835    fn test_default_similarity() {
836        let a = make_entity("a", "a", "the quick brown fox", "a.ts");
837        let b = make_entity("b", "b", "the quick brown dog", "a.ts");
838        let score = default_similarity(&a, &b);
839        assert!(score > 0.5);
840        assert!(score < 1.0);
841    }
842
843    #[test]
844    fn parent_name_terminates_on_cyclic_parent_id() {
845        // Two entities whose parent_id chains form a cycle. parent_name
846        // would loop forever without the visited-set guard.
847        let a = make_entity_with_parent("A", "A", "", "f", Some("B"));
848        let b = make_entity_with_parent("B", "B", "", "f", Some("A"));
849        let mut by_id: HashMap<&str, &SemanticEntity> = HashMap::new();
850        by_id.insert("A", &a);
851        by_id.insert("B", &b);
852        // Synthesize a leaf whose parent_id enters the cycle via A.
853        let leaf = make_entity_with_parent("L", "L", "", "f", Some("A"));
854        let chain = parent_name(&leaf, &by_id);
855        // Must terminate. We don't assert exact contents — order/composition
856        // depends on which side of the cycle is reached first; the safety
857        // property is "this returns at all."
858        assert!(chain.is_some());
859    }
860}