Skip to main content

sem_core/model/
identity.rs

1use std::collections::{HashMap, HashSet};
2
3use super::change::{ChangeType, SemanticChange};
4use super::entity::SemanticEntity;
5
6fn parent_name(
7    entity: &SemanticEntity,
8    by_id: &HashMap<&str, &SemanticEntity>,
9) -> Option<String> {
10    let mut parts: Vec<&str> = Vec::new();
11    let mut visited: HashSet<&str> = HashSet::new();
12    let mut pid = entity.parent_id.as_deref()?;
13    loop {
14        if !visited.insert(pid) {
15            break;
16        }
17        match by_id.get(pid) {
18            Some(parent) => {
19                // Skip ancestors with empty names (e.g. JSON's empty-string
20                // root-package key in package-lock.json). The full path is
21                // still recoverable from entity_id; the displayed chain is
22                // for human readability.
23                if !parent.name.is_empty() {
24                    parts.push(parent.name.as_str());
25                }
26                match parent.parent_id.as_deref() {
27                    Some(next) => pid = next,
28                    None => break,
29                }
30            }
31            None => break,
32        }
33    }
34    if parts.is_empty() {
35        return None;
36    }
37    parts.reverse();
38    Some(parts.join("::"))
39}
40
41pub struct MatchResult {
42    pub changes: Vec<SemanticChange>,
43}
44
45fn classify_match(before: &SemanticEntity, after: &SemanticEntity) -> ChangeType {
46    if before.file_path != after.file_path {
47        ChangeType::Moved
48    } else if before.parent_id != after.parent_id {
49        ChangeType::Moved // intra-file scope move (e.g. method moved between classes)
50    } else {
51        ChangeType::Renamed
52    }
53}
54
55fn same_signature_across_file_rename(
56    before: &SemanticEntity,
57    after: &SemanticEntity,
58    before_by_id: &HashMap<&str, &SemanticEntity>,
59    after_by_id: &HashMap<&str, &SemanticEntity>,
60) -> bool {
61    before.file_path != after.file_path
62        && before.entity_type == after.entity_type
63        && before.name == after.name
64        && parent_name(before, before_by_id) == parent_name(after, after_by_id)
65}
66
67fn structural_change_between(before: &SemanticEntity, after: &SemanticEntity) -> Option<bool> {
68    if before.content_hash == after.content_hash {
69        return None;
70    }
71
72    match (&before.structural_hash, &after.structural_hash) {
73        (Some(before_hash), Some(after_hash)) => Some(before_hash != after_hash),
74        _ => None,
75    }
76}
77
78fn make_change(
79    after_entity: &SemanticEntity,
80    change_type: ChangeType,
81    before_entity: Option<&SemanticEntity>,
82    commit_sha: Option<&str>,
83    author: Option<&str>,
84    by_id: &HashMap<&str, &SemanticEntity>,
85) -> SemanticChange {
86    let prefix = match change_type {
87        ChangeType::Added => "added::",
88        ChangeType::Deleted => "deleted::",
89        ChangeType::Reordered => "reordered::",
90        _ => "",
91    };
92    // For deleted entities, use the before entity as the primary source
93    let primary = if change_type == ChangeType::Deleted {
94        before_entity.unwrap_or(after_entity)
95    } else {
96        after_entity
97    };
98    let structural_change = before_entity.and_then(|before| {
99        if matches!(change_type, ChangeType::Deleted | ChangeType::Reordered) {
100            None
101        } else {
102            structural_change_between(before, after_entity)
103        }
104    });
105    SemanticChange {
106        id: format!("change::{prefix}{}", primary.id),
107        entity_id: primary.id.clone(),
108        change_type,
109        entity_type: primary.entity_type.clone(),
110        entity_name: primary.name.clone(),
111        entity_line: primary.start_line,
112        start_line: primary.start_line,
113        end_line: primary.end_line,
114        old_start_line: before_entity.map(|b| b.start_line),
115        old_end_line: before_entity.map(|b| b.end_line),
116        parent_name: parent_name(primary, by_id),
117        file_path: primary.file_path.clone(),
118        old_entity_name: before_entity.and_then(|b| {
119            (b.name != after_entity.name).then(|| b.name.clone())
120        }),
121        old_file_path: before_entity.and_then(|b| {
122            (b.file_path != after_entity.file_path).then(|| b.file_path.clone())
123        }),
124        old_parent_id: before_entity.and_then(|b| {
125            (b.parent_id != after_entity.parent_id).then(|| b.parent_id.clone()).flatten()
126        }),
127        before_content: if change_type == ChangeType::Reordered {
128            None
129        } else {
130            before_entity.map(|b| b.content.clone())
131        },
132        after_content: if change_type == ChangeType::Deleted || change_type == ChangeType::Reordered {
133            None
134        } else {
135            Some(after_entity.content.clone())
136        },
137        commit_sha: commit_sha.map(String::from),
138        author: author.map(String::from),
139        timestamp: None,
140        structural_change,
141    }
142}
143
144/// Entity matching algorithm:
145/// 1. Exact ID match — same entity ID in before/after → modified or unchanged
146/// 2. Content hash match — same hash, different ID → renamed or moved
147/// 3. Same signature across file rename → moved, even if content changed
148/// 4. Fuzzy similarity — >80% content similarity → probable rename
149pub fn match_entities(
150    before: &[SemanticEntity],
151    after: &[SemanticEntity],
152    _file_path: &str,
153    similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
154    commit_sha: Option<&str>,
155    author: Option<&str>,
156) -> MatchResult {
157    let mut changes: Vec<SemanticChange> = Vec::new();
158    let mut matched_before: HashSet<&str> = HashSet::new();
159    let mut matched_after: HashSet<&str> = HashSet::new();
160
161    let before_by_id: HashMap<&str, &SemanticEntity> =
162        before.iter().map(|e| (e.id.as_str(), e)).collect();
163    let after_by_id: HashMap<&str, &SemanticEntity> =
164        after.iter().map(|e| (e.id.as_str(), e)).collect();
165
166    // Combined map for ancestor-chain lookup: after takes precedence so the
167    // displayed path reflects the post-change tree for non-deleted entities.
168    let combined_by_id: HashMap<&str, &SemanticEntity> = before
169        .iter()
170        .map(|e| (e.id.as_str(), e))
171        .chain(after.iter().map(|e| (e.id.as_str(), e)))
172        .collect();
173
174    // Phase 1: Exact ID match
175    for (&id, after_entity) in &after_by_id {
176        if let Some(before_entity) = before_by_id.get(id) {
177            matched_before.insert(id);
178            matched_after.insert(id);
179
180            if before_entity.content_hash != after_entity.content_hash {
181                changes.push(make_change(
182                    after_entity,
183                    ChangeType::Modified,
184                    Some(before_entity),
185                    commit_sha,
186                    author,
187                    &combined_by_id,
188                ));
189            }
190        }
191    }
192
193    // Collect unmatched
194    let unmatched_before: Vec<&SemanticEntity> = before
195        .iter()
196        .filter(|e| !matched_before.contains(e.id.as_str()))
197        .collect();
198    let unmatched_after: Vec<&SemanticEntity> = after
199        .iter()
200        .filter(|e| !matched_after.contains(e.id.as_str()))
201        .collect();
202
203    // Phase 2: Content hash match (rename/move detection)
204    let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
205    let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
206    for entity in &unmatched_before {
207        before_by_hash
208            .entry(entity.content_hash.as_str())
209            .or_default()
210            .push(entity);
211        if let Some(ref sh) = entity.structural_hash {
212            before_by_structural
213                .entry(sh.as_str())
214                .or_default()
215                .push(entity);
216        }
217    }
218
219    for after_entity in &unmatched_after {
220        if matched_after.contains(after_entity.id.as_str()) {
221            continue;
222        }
223        // Try exact content_hash first
224        let found = before_by_hash
225            .get_mut(after_entity.content_hash.as_str())
226            .and_then(|c| c.pop());
227        // Fall back to structural_hash (formatting/comment changes don't matter)
228        let found = found.or_else(|| {
229            after_entity.structural_hash.as_ref().and_then(|sh| {
230                before_by_structural.get_mut(sh.as_str()).and_then(|c| {
231                    c.iter()
232                        .position(|e| !matched_before.contains(e.id.as_str()))
233                        .map(|i| c.remove(i))
234                })
235            })
236        });
237
238        if let Some(before_entity) = found {
239            matched_before.insert(&before_entity.id);
240            matched_after.insert(&after_entity.id);
241
242            // If name, file, and parent are the same, only the parent qualifier in the ID changed
243            // (e.g. parent class was renamed). Skip — the entity itself is unchanged.
244            // But if parent_id differs, this is an intra-file move (e.g. method moved between classes).
245            if before_entity.name == after_entity.name
246                && before_entity.file_path == after_entity.file_path
247                && before_entity.content_hash == after_entity.content_hash
248                && before_entity.parent_id == after_entity.parent_id
249            {
250                continue;
251            }
252
253            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
254        }
255    }
256
257    // Phase 3: Same logical signature across a file rename.
258    // A file path change changes entity IDs, so renamed files with edited
259    // entities need a signature fallback to avoid add/delete pairs.
260    for after_entity in &unmatched_after {
261        if matched_after.contains(after_entity.id.as_str()) {
262            continue;
263        }
264
265        let mut best_match: Option<&SemanticEntity> = None;
266        let mut best_score = f64::NEG_INFINITY;
267
268        for before_entity in &unmatched_before {
269            if matched_before.contains(before_entity.id.as_str()) {
270                continue;
271            }
272            if !same_signature_across_file_rename(before_entity, after_entity, &before_by_id, &after_by_id) {
273                continue;
274            }
275
276            let score = similarity_fn
277                .map(|f| f(before_entity, after_entity))
278                .unwrap_or_else(|| default_similarity(before_entity, after_entity));
279            if score > best_score {
280                best_score = score;
281                best_match = Some(before_entity);
282            }
283        }
284
285        if let Some(before_entity) = best_match {
286            matched_before.insert(&before_entity.id);
287            matched_after.insert(&after_entity.id);
288            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
289        }
290    }
291
292    // Phase 4: Fuzzy similarity (>80% threshold)
293    // Optimized: pre-compute token sets once per entity, group by type
294    let still_unmatched_before: Vec<&SemanticEntity> = unmatched_before
295        .iter()
296        .filter(|e| !matched_before.contains(e.id.as_str()))
297        .copied()
298        .collect();
299    let still_unmatched_after: Vec<&SemanticEntity> = unmatched_after
300        .iter()
301        .filter(|e| !matched_after.contains(e.id.as_str()))
302        .copied()
303        .collect();
304
305    if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
306        const THRESHOLD: f64 = 0.8;
307        const SIZE_RATIO_CUTOFF: f64 = 0.5;
308
309        // Pre-compute token sets once per entity (N+M instead of N×M allocations)
310        let before_sets: Vec<HashSet<&str>> = still_unmatched_before
311            .iter()
312            .map(|e| e.content.split_whitespace().collect())
313            .collect();
314        let after_sets: Vec<HashSet<&str>> = still_unmatched_after
315            .iter()
316            .map(|e| e.content.split_whitespace().collect())
317            .collect();
318
319        // Group before entities by type: O(sum(n_t × m_t)) instead of O(N×M)
320        let mut before_by_type: HashMap<&str, Vec<usize>> = HashMap::new();
321        for (i, e) in still_unmatched_before.iter().enumerate() {
322            before_by_type
323                .entry(e.entity_type.as_str())
324                .or_default()
325                .push(i);
326        }
327
328        for (ai, after_entity) in still_unmatched_after.iter().enumerate() {
329            let candidates = match before_by_type.get(after_entity.entity_type.as_str()) {
330                Some(indices) => indices,
331                None => continue,
332            };
333
334            let a_set = &after_sets[ai];
335            let a_len = a_set.len();
336            let mut best_idx: Option<usize> = None;
337            let mut best_score: f64 = 0.0;
338
339            for &bi in candidates {
340                if matched_before.contains(still_unmatched_before[bi].id.as_str()) {
341                    continue;
342                }
343
344                let b_set = &before_sets[bi];
345                let b_len = b_set.len();
346
347                // Size ratio filter using pre-computed set lengths
348                let (min_l, max_l) = if a_len < b_len {
349                    (a_len, b_len)
350                } else {
351                    (b_len, a_len)
352                };
353                if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
354                    continue;
355                }
356
357                // Inline Jaccard on pre-computed sets
358                let intersection = a_set.intersection(b_set).count();
359                let union = a_len + b_len - intersection;
360                let score = if union == 0 {
361                    0.0
362                } else {
363                    intersection as f64 / union as f64
364                };
365
366                if score >= THRESHOLD && score > best_score {
367                    best_score = score;
368                    best_idx = Some(bi);
369                }
370            }
371
372            if let Some(bi) = best_idx {
373                let matched = still_unmatched_before[bi];
374                matched_before.insert(&matched.id);
375                matched_after.insert(&after_entity.id);
376
377                // If name, file, and parent are the same, only the parent qualifier changed.
378                if matched.name == after_entity.name
379                    && matched.file_path == after_entity.file_path
380                    && matched.content_hash == after_entity.content_hash
381                    && matched.parent_id == after_entity.parent_id
382                {
383                    continue;
384                }
385
386                changes.push(make_change(after_entity, classify_match(matched, after_entity), Some(matched), commit_sha, author, &combined_by_id));
387            }
388        }
389    }
390
391    // Phase 5: Intra-file reorder detection
392    // For entities that matched by exact ID with identical content (unchanged),
393    // check if their relative ordering changed within the file.
394    detect_reorders(before, after, &matched_before, &matched_after, &mut changes, commit_sha, author, &combined_by_id);
395
396    // Remaining unmatched before = deleted
397    for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
398        changes.push(make_change(entity, ChangeType::Deleted, Some(entity), commit_sha, author, &combined_by_id));
399    }
400
401    // Remaining unmatched after = added
402    for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
403        changes.push(make_change(entity, ChangeType::Added, None, commit_sha, author, &combined_by_id));
404    }
405
406    MatchResult { changes }
407}
408
409/// Default content similarity using Jaccard index on whitespace-split tokens
410pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
411    let tokens_a: Vec<&str> = a.content.split_whitespace().collect();
412    let tokens_b: Vec<&str> = b.content.split_whitespace().collect();
413
414    // Early rejection: if token counts differ too much, Jaccard can't reach 0.8
415    let (min_c, max_c) = if tokens_a.len() < tokens_b.len() {
416        (tokens_a.len(), tokens_b.len())
417    } else {
418        (tokens_b.len(), tokens_a.len())
419    };
420    if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
421        return 0.0;
422    }
423
424    let set_a: HashSet<&str> = tokens_a.into_iter().collect();
425    let set_b: HashSet<&str> = tokens_b.into_iter().collect();
426
427    let intersection_size = set_a.intersection(&set_b).count();
428    let union_size = set_a.union(&set_b).count();
429
430    if union_size == 0 {
431        return 0.0;
432    }
433
434    intersection_size as f64 / union_size as f64
435}
436
437/// Detect intra-file reordering of unchanged entities.
438///
439/// Takes entities that matched by exact ID with identical content and checks
440/// if their relative ordering changed. Uses longest increasing subsequence
441/// (LIS) on the "after" positions to find the minimum set of moved entities.
442fn detect_reorders(
443    before: &[SemanticEntity],
444    after: &[SemanticEntity],
445    matched_before: &HashSet<&str>,
446    matched_after: &HashSet<&str>,
447    changes: &mut Vec<SemanticChange>,
448    commit_sha: Option<&str>,
449    author: Option<&str>,
450    by_id: &HashMap<&str, &SemanticEntity>,
451) {
452    // Collect unchanged entities: matched by ID with same content_hash
453    let before_by_id: HashMap<&str, &SemanticEntity> =
454        before.iter().map(|e| (e.id.as_str(), e)).collect();
455
456    // Group by file. For each file, collect unchanged entities in their
457    // before-order, then look up their after-positions.
458    let mut by_file: HashMap<&str, Vec<(&SemanticEntity, &SemanticEntity)>> = HashMap::new();
459    for after_entity in after {
460        if !matched_after.contains(after_entity.id.as_str()) {
461            continue;
462        }
463        if let Some(before_entity) = before_by_id.get(after_entity.id.as_str()) {
464            if !matched_before.contains(before_entity.id.as_str()) {
465                continue;
466            }
467            // Only consider truly unchanged entities (same content)
468            if before_entity.content_hash != after_entity.content_hash {
469                continue;
470            }
471            // Only intra-file
472            if before_entity.file_path != after_entity.file_path {
473                continue;
474            }
475            by_file
476                .entry(after_entity.file_path.as_str())
477                .or_default()
478                .push((before_entity, after_entity));
479        }
480    }
481
482    for (_file, pairs) in &mut by_file {
483        if pairs.len() < 2 {
484            continue;
485        }
486
487        // Sort by before start_line to get the "before" ordering
488        pairs.sort_by_key(|(b, _)| b.start_line);
489
490        // Map to after start_lines in before-order
491        let after_lines: Vec<usize> = pairs.iter().map(|(_, a)| a.start_line).collect();
492
493        // Find LIS indices (entities that stayed in relative order)
494        let lis_set = longest_increasing_subsequence_indices(&after_lines);
495
496        // Entities NOT in LIS were reordered
497        for (i, (before_entity, after_entity)) in pairs.iter().enumerate() {
498            if lis_set.contains(&i) {
499                continue;
500            }
501            changes.push(make_change(after_entity, ChangeType::Reordered, Some(before_entity), commit_sha, author, by_id));
502        }
503    }
504}
505
506/// Find indices that form the longest increasing subsequence.
507/// Returns a HashSet of indices in the original array that are part of the LIS.
508fn longest_increasing_subsequence_indices(seq: &[usize]) -> HashSet<usize> {
509    let n = seq.len();
510    if n == 0 {
511        return HashSet::new();
512    }
513
514    // tails[i] = index in seq of the smallest tail element for IS of length i+1
515    let mut tails: Vec<usize> = Vec::new();
516    // parent[i] = index of previous element in the LIS ending at seq[i]
517    let mut parent: Vec<Option<usize>> = vec![None; n];
518    // tail_idx[i] = index in seq that tails[i] points to
519    let mut tail_idx: Vec<usize> = Vec::new();
520
521    for i in 0..n {
522        let pos = tails.partition_point(|&t| t < seq[i]);
523        if pos == tails.len() {
524            tails.push(seq[i]);
525            tail_idx.push(i);
526        } else {
527            tails[pos] = seq[i];
528            tail_idx[pos] = i;
529        }
530        parent[i] = if pos > 0 { Some(tail_idx[pos - 1]) } else { None };
531    }
532
533    // Trace back to find actual LIS indices
534    let mut result = HashSet::new();
535    let mut idx = *tail_idx.last().unwrap();
536    result.insert(idx);
537    while let Some(p) = parent[idx] {
538        result.insert(p);
539        idx = p;
540    }
541    result
542}
543
544#[cfg(test)]
545mod tests {
546    use super::*;
547    use crate::utils::hash::content_hash;
548
549    fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
550        SemanticEntity {
551            id: id.to_string(),
552            file_path: file_path.to_string(),
553            entity_type: "function".to_string(),
554            name: name.to_string(),
555            parent_id: None,
556            content: content.to_string(),
557            content_hash: content_hash(content),
558            structural_hash: None,
559            start_line: 1,
560            end_line: 1,
561            metadata: None,
562        }
563    }
564
565    #[test]
566    fn test_exact_match_modified() {
567        let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
568        let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
569        let result = match_entities(&before, &after, "a.ts", None, None, None);
570        assert_eq!(result.changes.len(), 1);
571        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
572    }
573
574    #[test]
575    fn test_change_line_spans_track_current_and_previous_entities() {
576        let before = vec![make_entity_at(
577            "a::f::foo",
578            "foo",
579            "fn foo() { old }",
580            "a.rs",
581            3,
582        )];
583        let after = vec![make_entity_at(
584            "a::f::foo",
585            "foo",
586            "fn foo() { new }",
587            "a.rs",
588            7,
589        )];
590
591        let result = match_entities(&before, &after, "a.rs", None, None, None);
592
593        assert_eq!(result.changes.len(), 1);
594        assert_eq!(result.changes[0].start_line, 7);
595        assert_eq!(result.changes[0].end_line, 9);
596        assert_eq!(result.changes[0].old_start_line, Some(3));
597        assert_eq!(result.changes[0].old_end_line, Some(5));
598    }
599
600    #[test]
601    fn test_exact_match_unchanged() {
602        let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
603        let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
604        let result = match_entities(&before, &after, "a.ts", None, None, None);
605        assert_eq!(result.changes.len(), 0);
606    }
607
608    #[test]
609    fn test_added_deleted() {
610        let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
611        let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
612        let result = match_entities(&before, &after, "a.ts", None, None, None);
613        assert_eq!(result.changes.len(), 2);
614        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
615        assert!(types.contains(&ChangeType::Deleted));
616        assert!(types.contains(&ChangeType::Added));
617    }
618
619    #[test]
620    fn test_content_hash_rename() {
621        let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
622        let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
623        let result = match_entities(&before, &after, "a.ts", None, None, None);
624        assert_eq!(result.changes.len(), 1);
625        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
626    }
627
628    #[test]
629    fn test_same_signature_file_rename_with_content_change_is_moved() {
630        let mut before_entity = make_entity(
631            "old.ts::function::foo",
632            "foo",
633            "export function foo() { return alpha + beta + gamma; }",
634            "old.ts",
635        );
636        before_entity.structural_hash = Some("before-structure".to_string());
637        let mut after_entity = make_entity(
638            "new.ts::function::foo",
639            "foo",
640            "export function foo() { return one + two + three; }",
641            "new.ts",
642        );
643        after_entity.structural_hash = Some("after-structure".to_string());
644        let before = vec![before_entity];
645        let after = vec![after_entity];
646
647        let result = match_entities(&before, &after, "new.ts", None, None, None);
648
649        assert_eq!(result.changes.len(), 1);
650        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
651        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
652        assert_eq!(result.changes[0].structural_change, Some(true));
653    }
654
655    #[test]
656    fn test_moved_content_change_without_structural_hash_is_unknown_structurally() {
657        let before = vec![make_entity(
658            "old.ts::function::foo",
659            "foo",
660            "export function foo() { return alpha + beta + gamma; }",
661            "old.ts",
662        )];
663        let after = vec![make_entity(
664            "new.ts::function::foo",
665            "foo",
666            "export function foo() { return one + two + three; }",
667            "new.ts",
668        )];
669
670        let result = match_entities(&before, &after, "new.ts", None, None, None);
671
672        assert_eq!(result.changes.len(), 1);
673        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
674        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
675        assert_eq!(result.changes[0].structural_change, None);
676    }
677
678    #[test]
679    fn test_parent_child_dedup_class_method() {
680        // Class entity contains the method body in its content.
681        // parent_id stores the full entity ID of the parent.
682        let class_before = SemanticEntity {
683            id: "a.ts::class::DataStack".to_string(),
684            file_path: "a.ts".to_string(),
685            entity_type: "class".to_string(),
686            name: "DataStack".to_string(),
687            parent_id: None,
688            content: "class DataStack { constructor() {} genPg() { old } }".to_string(),
689            content_hash: content_hash("class DataStack { constructor() {} genPg() { old } }"),
690            structural_hash: None,
691            start_line: 1,
692            end_line: 10,
693            metadata: None,
694        };
695        let method_before = SemanticEntity {
696            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
697            file_path: "a.ts".to_string(),
698            entity_type: "method".to_string(),
699            name: "genPg".to_string(),
700            parent_id: Some("a.ts::class::DataStack".to_string()),
701            content: "genPg() { old }".to_string(),
702            content_hash: content_hash("genPg() { old }"),
703            structural_hash: None,
704            start_line: 5,
705            end_line: 8,
706            metadata: None,
707        };
708
709        let class_after = SemanticEntity {
710            id: "a.ts::class::DataStack".to_string(),
711            file_path: "a.ts".to_string(),
712            entity_type: "class".to_string(),
713            name: "DataStack".to_string(),
714            parent_id: None,
715            content: "class DataStack { constructor() {} genPg() { new } }".to_string(),
716            content_hash: content_hash("class DataStack { constructor() {} genPg() { new } }"),
717            structural_hash: None,
718            start_line: 1,
719            end_line: 10,
720            metadata: None,
721        };
722        let method_after = SemanticEntity {
723            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
724            file_path: "a.ts".to_string(),
725            entity_type: "method".to_string(),
726            name: "genPg".to_string(),
727            parent_id: Some("a.ts::class::DataStack".to_string()),
728            content: "genPg() { new }".to_string(),
729            content_hash: content_hash("genPg() { new }"),
730            structural_hash: None,
731            start_line: 5,
732            end_line: 8,
733            metadata: None,
734        };
735
736        let before = vec![class_before, method_before];
737        let after = vec![class_after, method_after];
738        let result = match_entities(&before, &after, "a.ts", None, None, None);
739
740        // match_entities no longer deduplicates — suppression happens in differ.rs.
741        // Both the class and the method are Modified here.
742        assert_eq!(result.changes.len(), 2);
743        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
744        assert!(types.iter().all(|t| *t == ChangeType::Modified));
745    }
746
747    #[test]
748    fn test_parent_not_deduped_when_no_child_changes() {
749        // Only the class-level content changes (e.g. a field added), no method changes
750        let class_before = SemanticEntity {
751            id: "a.ts::class::Foo".to_string(),
752            file_path: "a.ts".to_string(),
753            entity_type: "class".to_string(),
754            name: "Foo".to_string(),
755            parent_id: None,
756            content: "class Foo { bar() {} }".to_string(),
757            content_hash: content_hash("class Foo { bar() {} }"),
758            structural_hash: None,
759            start_line: 1,
760            end_line: 5,
761            metadata: None,
762        };
763        let method_before = SemanticEntity {
764            id: "a.ts::a.ts::class::Foo::bar".to_string(),
765            file_path: "a.ts".to_string(),
766            entity_type: "method".to_string(),
767            name: "bar".to_string(),
768            parent_id: Some("a.ts::class::Foo".to_string()),
769            content: "bar() {}".to_string(),
770            content_hash: content_hash("bar() {}"),
771            structural_hash: None,
772            start_line: 2,
773            end_line: 4,
774            metadata: None,
775        };
776
777        let class_after = SemanticEntity {
778            id: "a.ts::class::Foo".to_string(),
779            file_path: "a.ts".to_string(),
780            entity_type: "class".to_string(),
781            name: "Foo".to_string(),
782            parent_id: None,
783            content: "class Foo { x = 1; bar() {} }".to_string(),
784            content_hash: content_hash("class Foo { x = 1; bar() {} }"),
785            structural_hash: None,
786            start_line: 1,
787            end_line: 6,
788            metadata: None,
789        };
790        let method_after = SemanticEntity {
791            id: "a.ts::a.ts::class::Foo::bar".to_string(),
792            file_path: "a.ts".to_string(),
793            entity_type: "method".to_string(),
794            name: "bar".to_string(),
795            parent_id: Some("a.ts::class::Foo".to_string()),
796            content: "bar() {}".to_string(),
797            content_hash: content_hash("bar() {}"),
798            structural_hash: None,
799            start_line: 3,
800            end_line: 5,
801            metadata: None,
802        };
803
804        let before = vec![class_before, method_before];
805        let after = vec![class_after, method_after];
806        let result = match_entities(&before, &after, "a.ts", None, None, None);
807
808        // Class changed but method didn't, so class should still appear
809        assert_eq!(result.changes.len(), 1);
810        assert_eq!(result.changes[0].entity_name, "Foo");
811        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
812    }
813
814    fn make_entity_with_parent(id: &str, name: &str, content: &str, file_path: &str, parent_id: Option<&str>) -> SemanticEntity {
815        SemanticEntity {
816            id: id.to_string(),
817            file_path: file_path.to_string(),
818            entity_type: "method".to_string(),
819            name: name.to_string(),
820            parent_id: parent_id.map(String::from),
821            content: content.to_string(),
822            content_hash: content_hash(content),
823            structural_hash: None,
824            start_line: 1,
825            end_line: 1,
826            metadata: None,
827        }
828    }
829
830    #[test]
831    fn test_intra_file_move_between_classes() {
832        // Method moves from ClassA to ClassB in the same file
833        let before = vec![make_entity_with_parent(
834            "a.rs::class::ClassA::foo", "foo", "fn foo() { do_thing() }",
835            "a.rs", Some("a.rs::class::ClassA"),
836        )];
837        let after = vec![make_entity_with_parent(
838            "a.rs::class::ClassB::foo", "foo", "fn foo() { do_thing() }",
839            "a.rs", Some("a.rs::class::ClassB"),
840        )];
841        let result = match_entities(&before, &after, "a.rs", None, None, None);
842        assert_eq!(result.changes.len(), 1);
843        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
844        assert_eq!(result.changes[0].old_parent_id, Some("a.rs::class::ClassA".to_string()));
845    }
846
847    #[test]
848    fn test_same_parent_is_rename_not_move() {
849        // Same parent, different name = rename (not move)
850        // Content must be identical (same hash) so Phase 2 catches it
851        let body = "fn method(&self) { let x = self.compute(); self.validate(x); self.store(x) }";
852        let before = vec![make_entity_with_parent(
853            "a.rs::class::Foo::old_method", "old_method", body,
854            "a.rs", Some("a.rs::class::Foo"),
855        )];
856        let after = vec![make_entity_with_parent(
857            "a.rs::class::Foo::new_method", "new_method", body,
858            "a.rs", Some("a.rs::class::Foo"),
859        )];
860        let result = match_entities(&before, &after, "a.rs", None, None, None);
861        assert_eq!(result.changes.len(), 1);
862        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
863        assert!(result.changes[0].old_parent_id.is_none());
864    }
865
866    fn make_entity_at(id: &str, name: &str, content: &str, file_path: &str, line: usize) -> SemanticEntity {
867        SemanticEntity {
868            id: id.to_string(),
869            file_path: file_path.to_string(),
870            entity_type: "function".to_string(),
871            name: name.to_string(),
872            parent_id: None,
873            content: content.to_string(),
874            content_hash: content_hash(content),
875            structural_hash: None,
876            start_line: line,
877            end_line: line + 2,
878            metadata: None,
879        }
880    }
881
882    #[test]
883    fn test_reorder_detection() {
884        let before = vec![
885            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
886            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
887            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 9),
888        ];
889        let after = vec![
890            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
891            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 5),
892            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 9),
893        ];
894        let result = match_entities(&before, &after, "a.rs", None, None, None);
895        assert_eq!(result.changes.len(), 1);
896        assert_eq!(result.changes[0].change_type, ChangeType::Reordered);
897        assert!(result.changes[0].before_content.is_none());
898        assert!(result.changes[0].old_start_line.is_some());
899        assert!(result.changes[0].old_end_line.is_some());
900        assert_ne!(result.changes[0].old_start_line, Some(result.changes[0].start_line));
901        // Either beta or gamma is marked, LIS picks the minimum
902        assert!(result.changes[0].entity_name == "beta" || result.changes[0].entity_name == "gamma");
903    }
904
905    #[test]
906    fn test_no_reorder_when_order_preserved() {
907        let before = vec![
908            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
909            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
910        ];
911        let after = vec![
912            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
913            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 10),
914        ];
915        let result = match_entities(&before, &after, "a.rs", None, None, None);
916        // Lines shifted but relative order is same, no reorder
917        assert_eq!(result.changes.len(), 0);
918    }
919
920    #[test]
921    fn test_default_similarity() {
922        let a = make_entity("a", "a", "the quick brown fox", "a.ts");
923        let b = make_entity("b", "b", "the quick brown dog", "a.ts");
924        let score = default_similarity(&a, &b);
925        assert!(score > 0.5);
926        assert!(score < 1.0);
927    }
928
929    #[test]
930    fn parent_name_terminates_on_cyclic_parent_id() {
931        // Two entities whose parent_id chains form a cycle. parent_name
932        // would loop forever without the visited-set guard.
933        let a = make_entity_with_parent("A", "A", "", "f", Some("B"));
934        let b = make_entity_with_parent("B", "B", "", "f", Some("A"));
935        let mut by_id: HashMap<&str, &SemanticEntity> = HashMap::new();
936        by_id.insert("A", &a);
937        by_id.insert("B", &b);
938        // Synthesize a leaf whose parent_id enters the cycle via A.
939        let leaf = make_entity_with_parent("L", "L", "", "f", Some("A"));
940        let chain = parent_name(&leaf, &by_id);
941        // Must terminate. We don't assert exact contents — order/composition
942        // depends on which side of the cycle is reached first; the safety
943        // property is "this returns at all."
944        assert!(chain.is_some());
945    }
946}