Skip to main content

sem_core/model/
identity.rs

1use std::cmp::Ordering;
2use std::collections::{HashMap, HashSet};
3
4use super::change::{ChangeType, SemanticChange};
5use super::entity::SemanticEntity;
6
7fn parent_name(
8    entity: &SemanticEntity,
9    by_id: &HashMap<&str, &SemanticEntity>,
10) -> Option<String> {
11    let mut parts: Vec<&str> = Vec::new();
12    let mut visited: HashSet<&str> = HashSet::new();
13    let mut pid = entity.parent_id.as_deref()?;
14    loop {
15        if !visited.insert(pid) {
16            break;
17        }
18        match by_id.get(pid) {
19            Some(parent) => {
20                // Skip ancestors with empty names (e.g. JSON's empty-string
21                // root-package key in package-lock.json). The full path is
22                // still recoverable from entity_id; the displayed chain is
23                // for human readability.
24                if !parent.name.is_empty() {
25                    parts.push(parent.name.as_str());
26                }
27                match parent.parent_id.as_deref() {
28                    Some(next) => pid = next,
29                    None => break,
30                }
31            }
32            None => break,
33        }
34    }
35    if parts.is_empty() {
36        return None;
37    }
38    parts.reverse();
39    Some(parts.join("::"))
40}
41
42pub struct MatchResult {
43    pub changes: Vec<SemanticChange>,
44}
45
46type SameFileSignatureKey<'a> = (&'a str, &'a str, &'a str, Option<&'a str>);
47type RenameSignatureKey<'a> = (&'a str, &'a str, Option<&'a str>);
48const SAME_FILE_SIGNATURE_MIN_SIMILARITY: f64 = 0.3;
49
50struct ContentTokens<'a> {
51    token_count: usize,
52    unique_tokens: HashSet<&'a str>,
53}
54
55struct TokenCache<'a> {
56    tokens: Vec<Option<ContentTokens<'a>>>,
57}
58
59impl<'a> TokenCache<'a> {
60    fn new(len: usize) -> Self {
61        Self {
62            tokens: std::iter::repeat_with(|| None).take(len).collect(),
63        }
64    }
65
66    fn get(&mut self, entities: &[&'a SemanticEntity], idx: usize) -> &ContentTokens<'a> {
67        if self.tokens[idx].is_none() {
68            let content: &'a str = entities[idx].content.as_str();
69            self.tokens[idx] = Some(tokenize_content(content));
70        }
71        self.tokens[idx].as_ref().unwrap()
72    }
73}
74
75fn tokenize_content(content: &str) -> ContentTokens<'_> {
76    let mut token_count = 0;
77    let mut unique_tokens = HashSet::new();
78    for token in content.split_whitespace() {
79        token_count += 1;
80        unique_tokens.insert(token);
81    }
82    ContentTokens { token_count, unique_tokens }
83}
84
85fn jaccard_similarity(a: &HashSet<&str>, b: &HashSet<&str>) -> f64 {
86    let intersection_size = a.intersection(b).count();
87    let union_size = a.len() + b.len() - intersection_size;
88    if union_size == 0 {
89        return 0.0;
90    }
91    intersection_size as f64 / union_size as f64
92}
93
94fn default_similarity_from_tokens(a: &ContentTokens<'_>, b: &ContentTokens<'_>) -> f64 {
95    let (min_c, max_c) = if a.token_count < b.token_count {
96        (a.token_count, b.token_count)
97    } else {
98        (b.token_count, a.token_count)
99    };
100    if max_c > 0 && (min_c as f64 / max_c as f64) < 0.6 {
101        return 0.0;
102    }
103    jaccard_similarity(&a.unique_tokens, &b.unique_tokens)
104}
105
106fn classify_match(before: &SemanticEntity, after: &SemanticEntity) -> ChangeType {
107    if before.file_path != after.file_path {
108        ChangeType::Moved
109    } else if before.parent_id != after.parent_id {
110        ChangeType::Moved // intra-file scope move (e.g. method moved between classes)
111    } else if before.entity_type != after.entity_type || before.name != after.name {
112        ChangeType::Renamed
113    } else {
114        ChangeType::Modified
115    }
116}
117
118fn structural_change_between(before: &SemanticEntity, after: &SemanticEntity) -> Option<bool> {
119    if before.content_hash == after.content_hash {
120        return None;
121    }
122
123    match (&before.structural_hash, &after.structural_hash) {
124        (Some(before_hash), Some(after_hash)) => Some(before_hash != after_hash),
125        _ => None,
126    }
127}
128
129fn make_change(
130    after_entity: &SemanticEntity,
131    change_type: ChangeType,
132    before_entity: Option<&SemanticEntity>,
133    commit_sha: Option<&str>,
134    author: Option<&str>,
135    by_id: &HashMap<&str, &SemanticEntity>,
136) -> SemanticChange {
137    let prefix = match change_type {
138        ChangeType::Added => "added::",
139        ChangeType::Deleted => "deleted::",
140        ChangeType::Reordered => "reordered::",
141        _ => "",
142    };
143    // For deleted entities, use the before entity as the primary source
144    let primary = if change_type == ChangeType::Deleted {
145        before_entity.unwrap_or(after_entity)
146    } else {
147        after_entity
148    };
149    let structural_change = before_entity.and_then(|before| {
150        if matches!(change_type, ChangeType::Deleted | ChangeType::Reordered) {
151            None
152        } else {
153            structural_change_between(before, after_entity)
154        }
155    });
156    SemanticChange {
157        id: format!("change::{prefix}{}", primary.id),
158        entity_id: primary.id.clone(),
159        change_type,
160        entity_type: primary.entity_type.clone(),
161        entity_name: primary.name.clone(),
162        entity_line: primary.start_line,
163        start_line: primary.start_line,
164        end_line: primary.end_line,
165        old_start_line: before_entity.map(|b| b.start_line),
166        old_end_line: before_entity.map(|b| b.end_line),
167        parent_name: parent_name(primary, by_id),
168        file_path: primary.file_path.clone(),
169        old_entity_name: before_entity.and_then(|b| {
170            (b.name != after_entity.name).then(|| b.name.clone())
171        }),
172        old_file_path: before_entity.and_then(|b| {
173            (b.file_path != after_entity.file_path).then(|| b.file_path.clone())
174        }),
175        old_parent_id: before_entity.and_then(|b| {
176            (b.parent_id != after_entity.parent_id).then(|| b.parent_id.clone()).flatten()
177        }),
178        before_content: if change_type == ChangeType::Reordered {
179            None
180        } else {
181            before_entity.map(|b| b.content.clone())
182        },
183        after_content: if change_type == ChangeType::Deleted || change_type == ChangeType::Reordered {
184            None
185        } else {
186            Some(after_entity.content.clone())
187        },
188        commit_sha: commit_sha.map(String::from),
189        author: author.map(String::from),
190        timestamp: None,
191        structural_change,
192    }
193}
194
195/// Entity matching algorithm:
196/// 1. Exact ID match — same entity ID in before/after → modified or unchanged
197/// 2. Content hash match — same hash, different ID → modified, renamed, or moved
198/// 3. Same signature across file rename → moved, even if content changed
199/// 4. Fuzzy similarity — >80% content similarity → modified, renamed, or moved
200pub fn match_entities(
201    before: &[SemanticEntity],
202    after: &[SemanticEntity],
203    _file_path: &str,
204    similarity_fn: Option<&dyn Fn(&SemanticEntity, &SemanticEntity) -> f64>,
205    commit_sha: Option<&str>,
206    author: Option<&str>,
207) -> MatchResult {
208    let mut changes: Vec<SemanticChange> = Vec::new();
209    let mut matched_before: HashSet<&str> = HashSet::new();
210    let mut matched_after: HashSet<&str> = HashSet::new();
211
212    let before_by_id: HashMap<&str, &SemanticEntity> =
213        before.iter().map(|e| (e.id.as_str(), e)).collect();
214    let after_by_id: HashMap<&str, &SemanticEntity> =
215        after.iter().map(|e| (e.id.as_str(), e)).collect();
216
217    // Combined map for ancestor-chain lookup: after takes precedence so the
218    // displayed path reflects the post-change tree for non-deleted entities.
219    let combined_by_id: HashMap<&str, &SemanticEntity> = before
220        .iter()
221        .map(|e| (e.id.as_str(), e))
222        .chain(after.iter().map(|e| (e.id.as_str(), e)))
223        .collect();
224
225    // Phase 1: Exact ID match
226    for (&id, after_entity) in &after_by_id {
227        if let Some(before_entity) = before_by_id.get(id) {
228            matched_before.insert(id);
229            matched_after.insert(id);
230
231            if before_entity.content_hash != after_entity.content_hash {
232                changes.push(make_change(
233                    after_entity,
234                    ChangeType::Modified,
235                    Some(before_entity),
236                    commit_sha,
237                    author,
238                    &combined_by_id,
239                ));
240            }
241        }
242    }
243
244    // Collect unmatched
245    let unmatched_before: Vec<&SemanticEntity> = before
246        .iter()
247        .filter(|e| !matched_before.contains(e.id.as_str()))
248        .collect();
249    let unmatched_after: Vec<&SemanticEntity> = after
250        .iter()
251        .filter(|e| !matched_after.contains(e.id.as_str()))
252        .collect();
253    let mut unmatched_before_tokens = TokenCache::new(unmatched_before.len());
254    let mut unmatched_after_tokens = TokenCache::new(unmatched_after.len());
255
256    // Phase 2: Content hash match (rename/move detection)
257    let mut before_by_hash: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
258    let mut before_by_structural: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
259    for entity in &unmatched_before {
260        before_by_hash
261            .entry(entity.content_hash.as_str())
262            .or_default()
263            .push(entity);
264        if let Some(ref sh) = entity.structural_hash {
265            before_by_structural
266                .entry(sh.as_str())
267                .or_default()
268                .push(entity);
269        }
270    }
271
272    for after_entity in &unmatched_after {
273        if matched_after.contains(after_entity.id.as_str()) {
274            continue;
275        }
276        // Try exact content_hash first
277        let found = before_by_hash
278            .get_mut(after_entity.content_hash.as_str())
279            .and_then(|c| c.pop());
280        // Fall back to structural_hash (formatting/comment changes don't matter)
281        let found = found.or_else(|| {
282            after_entity.structural_hash.as_ref().and_then(|sh| {
283                before_by_structural.get_mut(sh.as_str()).and_then(|c| {
284                    c.iter()
285                        .position(|e| !matched_before.contains(e.id.as_str()))
286                        .map(|i| c.remove(i))
287                })
288            })
289        });
290
291        if let Some(before_entity) = found {
292            matched_before.insert(&before_entity.id);
293            matched_after.insert(&after_entity.id);
294
295            // If name, file, and parent are the same, only the parent qualifier in the ID changed
296            // (e.g. parent class was renamed). Skip — the entity itself is unchanged.
297            // But if parent_id differs, this is an intra-file move (e.g. method moved between classes).
298            if before_entity.name == after_entity.name
299                && before_entity.file_path == after_entity.file_path
300                && before_entity.content_hash == after_entity.content_hash
301                && before_entity.parent_id == after_entity.parent_id
302            {
303                continue;
304            }
305
306            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
307        }
308    }
309
310    // Phase 3: Same logical signature within a file.
311    // Collision groups can shrink or grow, changing only the disambiguator
312    // portion of an ID. Match those entities before the generic fuzzy pass.
313    let unmatched_before_parent_names: Vec<Option<String>> = unmatched_before
314        .iter()
315        .map(|entity| parent_name(entity, &before_by_id))
316        .collect();
317    let unmatched_after_parent_names: Vec<Option<String>> = unmatched_after
318        .iter()
319        .map(|entity| parent_name(entity, &after_by_id))
320        .collect();
321
322    let mut before_by_same_file_signature: HashMap<SameFileSignatureKey<'_>, Vec<usize>> =
323        HashMap::new();
324    for (before_idx, before_entity) in unmatched_before.iter().enumerate() {
325        if matched_before.contains(before_entity.id.as_str()) {
326            continue;
327        }
328        let key = (
329            before_entity.file_path.as_str(),
330            before_entity.entity_type.as_str(),
331            before_entity.name.as_str(),
332            unmatched_before_parent_names[before_idx].as_deref(),
333        );
334        before_by_same_file_signature
335            .entry(key)
336            .or_default()
337            .push(before_idx);
338    }
339
340    let mut after_by_same_file_signature: HashMap<SameFileSignatureKey<'_>, Vec<usize>> =
341        HashMap::new();
342    for (after_idx, after_entity) in unmatched_after.iter().enumerate() {
343        if matched_after.contains(after_entity.id.as_str()) {
344            continue;
345        }
346        let key = (
347            after_entity.file_path.as_str(),
348            after_entity.entity_type.as_str(),
349            after_entity.name.as_str(),
350            unmatched_after_parent_names[after_idx].as_deref(),
351        );
352        after_by_same_file_signature
353            .entry(key)
354            .or_default()
355            .push(after_idx);
356    }
357
358    let mut same_file_keys: Vec<SameFileSignatureKey<'_>> = after_by_same_file_signature
359        .keys()
360        .copied()
361        .filter(|key| before_by_same_file_signature.contains_key(key))
362        .collect();
363    same_file_keys.sort_unstable();
364
365    for key in same_file_keys {
366        let before_indices = &before_by_same_file_signature[&key];
367        let after_indices = &after_by_same_file_signature[&key];
368        let mut same_file_candidates: Vec<(f64, usize, usize, usize)> = Vec::new();
369
370        for &after_idx in after_indices {
371            let after_entity = unmatched_after[after_idx];
372            if matched_after.contains(after_entity.id.as_str()) {
373                continue;
374            }
375            for &before_idx in before_indices {
376                let before_entity = unmatched_before[before_idx];
377                if matched_before.contains(before_entity.id.as_str()) {
378                    continue;
379                }
380
381                let score = match similarity_fn {
382                    Some(f) => f(before_entity, after_entity),
383                    None => default_similarity_from_tokens(
384                        unmatched_before_tokens.get(&unmatched_before, before_idx),
385                        unmatched_after_tokens.get(&unmatched_after, after_idx),
386                    ),
387                };
388                same_file_candidates.push((
389                    score,
390                    before_entity.start_line.abs_diff(after_entity.start_line),
391                    before_idx,
392                    after_idx,
393                ));
394            }
395        }
396
397        same_file_candidates.sort_by(|a, b| {
398            b.0.partial_cmp(&a.0)
399                .unwrap_or(Ordering::Equal)
400                .then_with(|| a.1.cmp(&b.1))
401                .then_with(|| a.2.cmp(&b.2))
402                .then_with(|| a.3.cmp(&b.3))
403        });
404
405        for (score, _line_distance, before_idx, after_idx) in same_file_candidates {
406            if !score.is_finite() || score < SAME_FILE_SIGNATURE_MIN_SIMILARITY {
407                continue;
408            }
409            let before_entity = unmatched_before[before_idx];
410            let after_entity = unmatched_after[after_idx];
411            if matched_before.contains(before_entity.id.as_str())
412                || matched_after.contains(after_entity.id.as_str())
413            {
414                continue;
415            }
416
417            matched_before.insert(before_entity.id.as_str());
418            matched_after.insert(after_entity.id.as_str());
419
420            if before_entity.content_hash == after_entity.content_hash {
421                continue;
422            }
423
424            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
425        }
426    }
427
428    // Phase 4: Same logical signature across a file rename.
429    // A file path change changes entity IDs, so renamed files with edited
430    // entities need a signature fallback to avoid add/delete pairs.
431    let mut before_by_rename_signature: HashMap<RenameSignatureKey<'_>, Vec<usize>> =
432        HashMap::new();
433    for (before_idx, before_entity) in unmatched_before.iter().enumerate() {
434        if matched_before.contains(before_entity.id.as_str()) {
435            continue;
436        }
437        let key = (
438            before_entity.entity_type.as_str(),
439            before_entity.name.as_str(),
440            unmatched_before_parent_names[before_idx].as_deref(),
441        );
442        before_by_rename_signature
443            .entry(key)
444            .or_default()
445            .push(before_idx);
446    }
447
448    for (after_idx, after_entity) in unmatched_after.iter().enumerate() {
449        if matched_after.contains(after_entity.id.as_str()) {
450            continue;
451        }
452
453        let key = (
454            after_entity.entity_type.as_str(),
455            after_entity.name.as_str(),
456            unmatched_after_parent_names[after_idx].as_deref(),
457        );
458        let Some(before_indices) = before_by_rename_signature.get(&key) else {
459            continue;
460        };
461
462        let mut best_match: Option<&SemanticEntity> = None;
463        let mut best_score = f64::NEG_INFINITY;
464
465        for &before_idx in before_indices {
466            let before_entity = unmatched_before[before_idx];
467            if matched_before.contains(before_entity.id.as_str()) {
468                continue;
469            }
470            if before_entity.file_path == after_entity.file_path {
471                continue;
472            }
473
474            let score = match similarity_fn {
475                Some(f) => f(before_entity, after_entity),
476                None => default_similarity_from_tokens(
477                    unmatched_before_tokens.get(&unmatched_before, before_idx),
478                    unmatched_after_tokens.get(&unmatched_after, after_idx),
479                ),
480            };
481            if score > best_score {
482                best_score = score;
483                best_match = Some(before_entity);
484            }
485        }
486
487        if let Some(before_entity) = best_match {
488            matched_before.insert(before_entity.id.as_str());
489            matched_after.insert(after_entity.id.as_str());
490            changes.push(make_change(after_entity, classify_match(before_entity, after_entity), Some(before_entity), commit_sha, author, &combined_by_id));
491        }
492    }
493
494    // Phase 5: Fuzzy similarity (>80% threshold)
495    // Cache token sets on demand and group by type.
496    let still_unmatched_before: Vec<(usize, &SemanticEntity)> = unmatched_before
497        .iter()
498        .enumerate()
499        .filter(|(_, e)| !matched_before.contains(e.id.as_str()))
500        .map(|(i, e)| (i, *e))
501        .collect();
502    let still_unmatched_after: Vec<(usize, &SemanticEntity)> = unmatched_after
503        .iter()
504        .enumerate()
505        .filter(|(_, e)| !matched_after.contains(e.id.as_str()))
506        .map(|(i, e)| (i, *e))
507        .collect();
508
509    if !still_unmatched_before.is_empty() && !still_unmatched_after.is_empty() {
510        const THRESHOLD: f64 = 0.8;
511        const SIZE_RATIO_CUTOFF: f64 = 0.5;
512
513        // Group before entities by type: O(sum(n_t × m_t)) instead of O(N×M)
514        let mut before_by_type: HashMap<&str, Vec<usize>> = HashMap::new();
515        for (i, (_, e)) in still_unmatched_before.iter().enumerate() {
516            before_by_type
517                .entry(e.entity_type.as_str())
518                .or_default()
519                .push(i);
520        }
521
522        for &(after_unmatched_idx, after_entity) in &still_unmatched_after {
523            let candidates = match before_by_type.get(after_entity.entity_type.as_str()) {
524                Some(indices) => indices,
525                None => continue,
526            };
527
528            let a_len = unmatched_after_tokens
529                .get(&unmatched_after, after_unmatched_idx)
530                .unique_tokens
531                .len();
532            let mut best_idx: Option<usize> = None;
533            let mut best_score: f64 = 0.0;
534
535            for &bi in candidates {
536                let (before_unmatched_idx, before_entity) = still_unmatched_before[bi];
537                if matched_before.contains(before_entity.id.as_str()) {
538                    continue;
539                }
540
541                let b_len = unmatched_before_tokens
542                    .get(&unmatched_before, before_unmatched_idx)
543                    .unique_tokens
544                    .len();
545
546                // Size ratio filter using pre-computed set lengths
547                let (min_l, max_l) = if a_len < b_len {
548                    (a_len, b_len)
549                } else {
550                    (b_len, a_len)
551                };
552                if max_l > 0 && (min_l as f64 / max_l as f64) < SIZE_RATIO_CUTOFF {
553                    continue;
554                }
555
556                // Jaccard on pre-computed sets
557                let score = jaccard_similarity(
558                    &unmatched_after_tokens
559                        .get(&unmatched_after, after_unmatched_idx)
560                        .unique_tokens,
561                    &unmatched_before_tokens
562                        .get(&unmatched_before, before_unmatched_idx)
563                        .unique_tokens,
564                );
565
566                if score >= THRESHOLD && score > best_score {
567                    best_score = score;
568                    best_idx = Some(bi);
569                }
570            }
571
572            if let Some(bi) = best_idx {
573                let matched = still_unmatched_before[bi].1;
574                matched_before.insert(&matched.id);
575                matched_after.insert(&after_entity.id);
576
577                // If name, file, and parent are the same, only the parent qualifier changed.
578                if matched.name == after_entity.name
579                    && matched.file_path == after_entity.file_path
580                    && matched.content_hash == after_entity.content_hash
581                    && matched.parent_id == after_entity.parent_id
582                {
583                    continue;
584                }
585
586                changes.push(make_change(after_entity, classify_match(matched, after_entity), Some(matched), commit_sha, author, &combined_by_id));
587            }
588        }
589    }
590
591    // Phase 6: Intra-file reorder detection
592    // For entities that matched by exact ID with identical content (unchanged),
593    // check if their relative ordering changed within the file.
594    detect_reorders(before, after, &matched_before, &matched_after, &mut changes, commit_sha, author, &combined_by_id);
595
596    // Remaining unmatched before = deleted
597    for entity in before.iter().filter(|e| !matched_before.contains(e.id.as_str())) {
598        changes.push(make_change(entity, ChangeType::Deleted, Some(entity), commit_sha, author, &combined_by_id));
599    }
600
601    // Remaining unmatched after = added
602    for entity in after.iter().filter(|e| !matched_after.contains(e.id.as_str())) {
603        changes.push(make_change(entity, ChangeType::Added, None, commit_sha, author, &combined_by_id));
604    }
605
606    MatchResult { changes }
607}
608
609/// Default content similarity using Jaccard index on whitespace-split tokens
610pub fn default_similarity(a: &SemanticEntity, b: &SemanticEntity) -> f64 {
611    let tokens_a = tokenize_content(&a.content);
612    let tokens_b = tokenize_content(&b.content);
613    default_similarity_from_tokens(&tokens_a, &tokens_b)
614}
615
616/// Detect intra-file reordering of unchanged entities.
617///
618/// Takes entities that matched by exact ID with identical content and checks
619/// if their relative ordering changed. Uses a longest non-decreasing
620/// subsequence on the "after" positions to find the minimum set of moved entities.
621fn detect_reorders(
622    before: &[SemanticEntity],
623    after: &[SemanticEntity],
624    matched_before: &HashSet<&str>,
625    matched_after: &HashSet<&str>,
626    changes: &mut Vec<SemanticChange>,
627    commit_sha: Option<&str>,
628    author: Option<&str>,
629    by_id: &HashMap<&str, &SemanticEntity>,
630) {
631    // Collect unchanged entities: matched by ID with same content_hash
632    let before_by_id: HashMap<&str, &SemanticEntity> =
633        before.iter().map(|e| (e.id.as_str(), e)).collect();
634    let before_index_by_id: HashMap<&str, usize> = before
635        .iter()
636        .enumerate()
637        .map(|(i, e)| (e.id.as_str(), i))
638        .collect();
639    let after_index_by_id: HashMap<&str, usize> = after
640        .iter()
641        .enumerate()
642        .map(|(i, e)| (e.id.as_str(), i))
643        .collect();
644
645    // Group by file. For each file, collect unchanged entities in their
646    // before-order, then look up their after-positions.
647    let mut by_file: HashMap<&str, Vec<(&SemanticEntity, &SemanticEntity, usize, usize)>> =
648        HashMap::new();
649    for after_entity in after {
650        if !matched_after.contains(after_entity.id.as_str()) {
651            continue;
652        }
653        if let Some(before_entity) = before_by_id.get(after_entity.id.as_str()) {
654            if !matched_before.contains(before_entity.id.as_str()) {
655                continue;
656            }
657            // Only consider truly unchanged entities (same content)
658            if before_entity.content_hash != after_entity.content_hash {
659                continue;
660            }
661            // Only intra-file
662            if before_entity.file_path != after_entity.file_path {
663                continue;
664            }
665            let (Some(&before_index), Some(&after_index)) = (
666                before_index_by_id.get(before_entity.id.as_str()),
667                after_index_by_id.get(after_entity.id.as_str()),
668            ) else {
669                continue;
670            };
671            by_file
672                .entry(after_entity.file_path.as_str())
673                .or_default()
674                .push((before_entity, after_entity, before_index, after_index));
675        }
676    }
677
678    for (_file, pairs) in &mut by_file {
679        if pairs.len() < 2 {
680            continue;
681        }
682
683        // Sort by before position to get the "before" ordering.
684        pairs.sort_by_key(|(b, _, before_index, _)| (b.start_line, *before_index));
685
686        // Map to after positions in before-order. The extraction index gives
687        // same-line entities a stable secondary ordering.
688        let after_positions: Vec<(usize, usize)> = pairs
689            .iter()
690            .map(|(_, a, _, after_index)| (a.start_line, *after_index))
691            .collect();
692
693        // Find LNDS indices (entities that stayed in relative order).
694        let lnds_set = longest_non_decreasing_subsequence_indices(&after_positions);
695
696        // Entities outside the LNDS were reordered.
697        for (i, (before_entity, after_entity, _, _)) in pairs.iter().enumerate() {
698            if lnds_set.contains(&i) {
699                continue;
700            }
701            changes.push(make_change(
702                after_entity,
703                ChangeType::Reordered,
704                Some(before_entity),
705                commit_sha,
706                author,
707                by_id,
708            ));
709        }
710    }
711}
712
713/// Find indices that form the longest non-decreasing subsequence.
714/// Returns a HashSet of indices in the original array that are part of the subsequence.
715fn longest_non_decreasing_subsequence_indices(seq: &[(usize, usize)]) -> HashSet<usize> {
716    let n = seq.len();
717    if n == 0 {
718        return HashSet::new();
719    }
720
721    // tails[i] = smallest tail position for a non-decreasing subsequence of length i+1
722    let mut tails: Vec<(usize, usize)> = Vec::new();
723    // parent[i] = index of previous element in the subsequence ending at seq[i]
724    let mut parent: Vec<Option<usize>> = vec![None; n];
725    // tail_idx[i] = index in seq that tails[i] points to
726    let mut tail_idx: Vec<usize> = Vec::new();
727
728    for i in 0..n {
729        // Non-decreasing subsequences use the first tail greater than the
730        // current position, allowing equal positions to extend the sequence.
731        let pos = tails.partition_point(|&t| t <= seq[i]);
732        if pos == tails.len() {
733            tails.push(seq[i]);
734            tail_idx.push(i);
735        } else {
736            tails[pos] = seq[i];
737            tail_idx[pos] = i;
738        }
739        parent[i] = if pos > 0 { Some(tail_idx[pos - 1]) } else { None };
740    }
741
742    // Trace back to find actual LIS indices
743    let mut result = HashSet::new();
744    let mut idx = *tail_idx.last().unwrap();
745    result.insert(idx);
746    while let Some(p) = parent[idx] {
747        result.insert(p);
748        idx = p;
749    }
750    result
751}
752
753#[cfg(test)]
754mod tests {
755    use super::*;
756    use crate::utils::hash::content_hash;
757
758    fn make_entity(id: &str, name: &str, content: &str, file_path: &str) -> SemanticEntity {
759        SemanticEntity {
760            id: id.to_string(),
761            file_path: file_path.to_string(),
762            entity_type: "function".to_string(),
763            name: name.to_string(),
764            parent_id: None,
765            content: content.to_string(),
766            content_hash: content_hash(content),
767            structural_hash: None,
768            start_line: 1,
769            end_line: 1,
770            metadata: None,
771        }
772    }
773
774    #[test]
775    fn test_exact_match_modified() {
776        let before = vec![make_entity("a::f::foo", "foo", "old content", "a.ts")];
777        let after = vec![make_entity("a::f::foo", "foo", "new content", "a.ts")];
778        let result = match_entities(&before, &after, "a.ts", None, None, None);
779        assert_eq!(result.changes.len(), 1);
780        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
781    }
782
783    #[test]
784    fn test_change_line_spans_track_current_and_previous_entities() {
785        let before = vec![make_entity_at(
786            "a::f::foo",
787            "foo",
788            "fn foo() { old }",
789            "a.rs",
790            3,
791        )];
792        let after = vec![make_entity_at(
793            "a::f::foo",
794            "foo",
795            "fn foo() { new }",
796            "a.rs",
797            7,
798        )];
799
800        let result = match_entities(&before, &after, "a.rs", None, None, None);
801
802        assert_eq!(result.changes.len(), 1);
803        assert_eq!(result.changes[0].start_line, 7);
804        assert_eq!(result.changes[0].end_line, 9);
805        assert_eq!(result.changes[0].old_start_line, Some(3));
806        assert_eq!(result.changes[0].old_end_line, Some(5));
807    }
808
809    #[test]
810    fn test_exact_match_unchanged() {
811        let before = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
812        let after = vec![make_entity("a::f::foo", "foo", "same", "a.ts")];
813        let result = match_entities(&before, &after, "a.ts", None, None, None);
814        assert_eq!(result.changes.len(), 0);
815    }
816
817    #[test]
818    fn test_added_deleted() {
819        let before = vec![make_entity("a::f::old", "old", "content", "a.ts")];
820        let after = vec![make_entity("a::f::new", "new", "different", "a.ts")];
821        let result = match_entities(&before, &after, "a.ts", None, None, None);
822        assert_eq!(result.changes.len(), 2);
823        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
824        assert!(types.contains(&ChangeType::Deleted));
825        assert!(types.contains(&ChangeType::Added));
826    }
827
828    #[test]
829    fn test_content_hash_rename() {
830        let before = vec![make_entity("a::f::old", "old", "same content", "a.ts")];
831        let after = vec![make_entity("a::f::new", "new", "same content", "a.ts")];
832        let result = match_entities(&before, &after, "a.ts", None, None, None);
833        assert_eq!(result.changes.len(), 1);
834        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
835    }
836
837    #[test]
838    fn test_same_name_fuzzy_match_is_modified() {
839        let before = vec![make_entity(
840            "a.ts::function::foo@L1",
841            "foo",
842            "function foo() { const value = input + 1; return process(value); }",
843            "a.ts",
844        )];
845        let after = vec![make_entity(
846            "a.ts::function::foo@L2",
847            "foo",
848            "function foo() { const value = input + 2; return process(value); }",
849            "a.ts",
850        )];
851
852        let result = match_entities(&before, &after, "a.ts", None, None, None);
853
854        assert_eq!(result.changes.len(), 1);
855        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
856        assert_eq!(result.changes[0].entity_name, "foo");
857        assert!(result.changes[0].old_entity_name.is_none());
858    }
859
860    #[test]
861    fn test_different_name_fuzzy_match_is_renamed() {
862        let before = vec![make_entity(
863            "a.ts::function::old_name@L1",
864            "old_name",
865            "function old_name(input: number) { const first = input + 1; const second = first * 2; const third = second - 3; return compute(third, first, second); }",
866            "a.ts",
867        )];
868        let after = vec![make_entity(
869            "a.ts::function::new_name@L2",
870            "new_name",
871            "function new_name(input: number) { const first = input + 1; const second = first * 2; const third = second - 3; return compute(third, first, second); }",
872            "a.ts",
873        )];
874
875        let result = match_entities(&before, &after, "a.ts", None, None, None);
876
877        assert_eq!(result.changes.len(), 1);
878        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
879        assert_eq!(result.changes[0].entity_name, "new_name");
880        assert_eq!(
881            result.changes[0].old_entity_name.as_deref(),
882            Some("old_name")
883        );
884    }
885
886    #[test]
887    fn test_same_signature_file_rename_with_content_change_is_moved() {
888        let mut before_entity = make_entity(
889            "old.ts::function::foo",
890            "foo",
891            "export function foo() { return alpha + beta + gamma; }",
892            "old.ts",
893        );
894        before_entity.structural_hash = Some("before-structure".to_string());
895        let mut after_entity = make_entity(
896            "new.ts::function::foo",
897            "foo",
898            "export function foo() { return one + two + three; }",
899            "new.ts",
900        );
901        after_entity.structural_hash = Some("after-structure".to_string());
902        let before = vec![before_entity];
903        let after = vec![after_entity];
904
905        let result = match_entities(&before, &after, "new.ts", None, None, None);
906
907        assert_eq!(result.changes.len(), 1);
908        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
909        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
910        assert_eq!(result.changes[0].structural_change, Some(true));
911    }
912
913    #[test]
914    fn test_same_signature_file_rename_keeps_duplicate_names_with_parents() {
915        let mut before_alpha_class = make_entity(
916            "old.ts::class::Alpha",
917            "Alpha",
918            "class Alpha {}",
919            "old.ts",
920        );
921        before_alpha_class.entity_type = "class".to_string();
922        let mut before_beta_class =
923            make_entity("old.ts::class::Beta", "Beta", "class Beta {}", "old.ts");
924        before_beta_class.entity_type = "class".to_string();
925
926        let mut after_alpha_class = make_entity(
927            "new.ts::class::Alpha",
928            "Alpha",
929            "class Alpha {}",
930            "new.ts",
931        );
932        after_alpha_class.entity_type = "class".to_string();
933        let mut after_beta_class =
934            make_entity("new.ts::class::Beta", "Beta", "class Beta {}", "new.ts");
935        after_beta_class.entity_type = "class".to_string();
936
937        let before = vec![
938            before_alpha_class,
939            before_beta_class,
940            make_entity_with_parent(
941                "old.ts::class::Alpha::run",
942                "run",
943                "run() { return alpha_original_value; }",
944                "old.ts",
945                Some("old.ts::class::Alpha"),
946            ),
947            make_entity_with_parent(
948                "old.ts::class::Beta::run",
949                "run",
950                "run() { return beta_original_value; }",
951                "old.ts",
952                Some("old.ts::class::Beta"),
953            ),
954        ];
955        let after = vec![
956            after_alpha_class,
957            after_beta_class,
958            make_entity_with_parent(
959                "new.ts::class::Alpha::run",
960                "run",
961                "run() { return alpha_changed_value; }",
962                "new.ts",
963                Some("new.ts::class::Alpha"),
964            ),
965            make_entity_with_parent(
966                "new.ts::class::Beta::run",
967                "run",
968                "run() { return beta_changed_value; }",
969                "new.ts",
970                Some("new.ts::class::Beta"),
971            ),
972        ];
973
974        let result = match_entities(&before, &after, "new.ts", None, None, None);
975        let method_added_or_deleted = result
976            .changes
977            .iter()
978            .filter(|change| {
979                change.entity_type == "method"
980                    && matches!(change.change_type, ChangeType::Added | ChangeType::Deleted)
981            })
982            .count();
983        let alpha = result
984            .changes
985            .iter()
986            .find(|change| change.entity_id == "new.ts::class::Alpha::run")
987            .expect("alpha method should be matched across the file rename");
988        let beta = result
989            .changes
990            .iter()
991            .find(|change| change.entity_id == "new.ts::class::Beta::run")
992            .expect("beta method should be matched across the file rename");
993
994        assert_eq!(method_added_or_deleted, 0, "{:?}", result.changes);
995        assert_eq!(alpha.change_type, ChangeType::Moved);
996        assert_eq!(alpha.old_parent_id.as_deref(), Some("old.ts::class::Alpha"));
997        assert_eq!(beta.change_type, ChangeType::Moved);
998        assert_eq!(beta.old_parent_id.as_deref(), Some("old.ts::class::Beta"));
999    }
1000
1001    #[test]
1002    fn test_moved_content_change_without_structural_hash_is_unknown_structurally() {
1003        let before = vec![make_entity(
1004            "old.ts::function::foo",
1005            "foo",
1006            "export function foo() { return alpha + beta + gamma; }",
1007            "old.ts",
1008        )];
1009        let after = vec![make_entity(
1010            "new.ts::function::foo",
1011            "foo",
1012            "export function foo() { return one + two + three; }",
1013            "new.ts",
1014        )];
1015
1016        let result = match_entities(&before, &after, "new.ts", None, None, None);
1017
1018        assert_eq!(result.changes.len(), 1);
1019        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1020        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.ts"));
1021        assert_eq!(result.changes[0].structural_change, None);
1022    }
1023
1024    #[test]
1025    fn test_parent_child_dedup_class_method() {
1026        // Class entity contains the method body in its content.
1027        // parent_id stores the full entity ID of the parent.
1028        let class_before = SemanticEntity {
1029            id: "a.ts::class::DataStack".to_string(),
1030            file_path: "a.ts".to_string(),
1031            entity_type: "class".to_string(),
1032            name: "DataStack".to_string(),
1033            parent_id: None,
1034            content: "class DataStack { constructor() {} genPg() { old } }".to_string(),
1035            content_hash: content_hash("class DataStack { constructor() {} genPg() { old } }"),
1036            structural_hash: None,
1037            start_line: 1,
1038            end_line: 10,
1039            metadata: None,
1040        };
1041        let method_before = SemanticEntity {
1042            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
1043            file_path: "a.ts".to_string(),
1044            entity_type: "method".to_string(),
1045            name: "genPg".to_string(),
1046            parent_id: Some("a.ts::class::DataStack".to_string()),
1047            content: "genPg() { old }".to_string(),
1048            content_hash: content_hash("genPg() { old }"),
1049            structural_hash: None,
1050            start_line: 5,
1051            end_line: 8,
1052            metadata: None,
1053        };
1054
1055        let class_after = SemanticEntity {
1056            id: "a.ts::class::DataStack".to_string(),
1057            file_path: "a.ts".to_string(),
1058            entity_type: "class".to_string(),
1059            name: "DataStack".to_string(),
1060            parent_id: None,
1061            content: "class DataStack { constructor() {} genPg() { new } }".to_string(),
1062            content_hash: content_hash("class DataStack { constructor() {} genPg() { new } }"),
1063            structural_hash: None,
1064            start_line: 1,
1065            end_line: 10,
1066            metadata: None,
1067        };
1068        let method_after = SemanticEntity {
1069            id: "a.ts::a.ts::class::DataStack::genPg".to_string(),
1070            file_path: "a.ts".to_string(),
1071            entity_type: "method".to_string(),
1072            name: "genPg".to_string(),
1073            parent_id: Some("a.ts::class::DataStack".to_string()),
1074            content: "genPg() { new }".to_string(),
1075            content_hash: content_hash("genPg() { new }"),
1076            structural_hash: None,
1077            start_line: 5,
1078            end_line: 8,
1079            metadata: None,
1080        };
1081
1082        let before = vec![class_before, method_before];
1083        let after = vec![class_after, method_after];
1084        let result = match_entities(&before, &after, "a.ts", None, None, None);
1085
1086        // match_entities no longer deduplicates — suppression happens in differ.rs.
1087        // Both the class and the method are Modified here.
1088        assert_eq!(result.changes.len(), 2);
1089        let types: Vec<ChangeType> = result.changes.iter().map(|c| c.change_type).collect();
1090        assert!(types.iter().all(|t| *t == ChangeType::Modified));
1091    }
1092
1093    #[test]
1094    fn test_parent_not_deduped_when_no_child_changes() {
1095        // Only the class-level content changes (e.g. a field added), no method changes
1096        let class_before = SemanticEntity {
1097            id: "a.ts::class::Foo".to_string(),
1098            file_path: "a.ts".to_string(),
1099            entity_type: "class".to_string(),
1100            name: "Foo".to_string(),
1101            parent_id: None,
1102            content: "class Foo { bar() {} }".to_string(),
1103            content_hash: content_hash("class Foo { bar() {} }"),
1104            structural_hash: None,
1105            start_line: 1,
1106            end_line: 5,
1107            metadata: None,
1108        };
1109        let method_before = SemanticEntity {
1110            id: "a.ts::a.ts::class::Foo::bar".to_string(),
1111            file_path: "a.ts".to_string(),
1112            entity_type: "method".to_string(),
1113            name: "bar".to_string(),
1114            parent_id: Some("a.ts::class::Foo".to_string()),
1115            content: "bar() {}".to_string(),
1116            content_hash: content_hash("bar() {}"),
1117            structural_hash: None,
1118            start_line: 2,
1119            end_line: 4,
1120            metadata: None,
1121        };
1122
1123        let class_after = SemanticEntity {
1124            id: "a.ts::class::Foo".to_string(),
1125            file_path: "a.ts".to_string(),
1126            entity_type: "class".to_string(),
1127            name: "Foo".to_string(),
1128            parent_id: None,
1129            content: "class Foo { x = 1; bar() {} }".to_string(),
1130            content_hash: content_hash("class Foo { x = 1; bar() {} }"),
1131            structural_hash: None,
1132            start_line: 1,
1133            end_line: 6,
1134            metadata: None,
1135        };
1136        let method_after = SemanticEntity {
1137            id: "a.ts::a.ts::class::Foo::bar".to_string(),
1138            file_path: "a.ts".to_string(),
1139            entity_type: "method".to_string(),
1140            name: "bar".to_string(),
1141            parent_id: Some("a.ts::class::Foo".to_string()),
1142            content: "bar() {}".to_string(),
1143            content_hash: content_hash("bar() {}"),
1144            structural_hash: None,
1145            start_line: 3,
1146            end_line: 5,
1147            metadata: None,
1148        };
1149
1150        let before = vec![class_before, method_before];
1151        let after = vec![class_after, method_after];
1152        let result = match_entities(&before, &after, "a.ts", None, None, None);
1153
1154        // Class changed but method didn't, so class should still appear
1155        assert_eq!(result.changes.len(), 1);
1156        assert_eq!(result.changes[0].entity_name, "Foo");
1157        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
1158    }
1159
1160    fn make_entity_with_parent(id: &str, name: &str, content: &str, file_path: &str, parent_id: Option<&str>) -> SemanticEntity {
1161        SemanticEntity {
1162            id: id.to_string(),
1163            file_path: file_path.to_string(),
1164            entity_type: "method".to_string(),
1165            name: name.to_string(),
1166            parent_id: parent_id.map(String::from),
1167            content: content.to_string(),
1168            content_hash: content_hash(content),
1169            structural_hash: None,
1170            start_line: 1,
1171            end_line: 1,
1172            metadata: None,
1173        }
1174    }
1175
1176    #[test]
1177    fn test_intra_file_move_between_classes() {
1178        // Method moves from ClassA to ClassB in the same file
1179        let before = vec![make_entity_with_parent(
1180            "a.rs::class::ClassA::foo", "foo", "fn foo() { do_thing() }",
1181            "a.rs", Some("a.rs::class::ClassA"),
1182        )];
1183        let after = vec![make_entity_with_parent(
1184            "a.rs::class::ClassB::foo", "foo", "fn foo() { do_thing() }",
1185            "a.rs", Some("a.rs::class::ClassB"),
1186        )];
1187        let result = match_entities(&before, &after, "a.rs", None, None, None);
1188        assert_eq!(result.changes.len(), 1);
1189        assert_eq!(result.changes[0].change_type, ChangeType::Moved);
1190        assert_eq!(result.changes[0].old_parent_id, Some("a.rs::class::ClassA".to_string()));
1191    }
1192
1193    #[test]
1194    fn test_same_parent_is_rename_not_move() {
1195        // Same parent, different name = rename (not move)
1196        // Content must be identical (same hash) so Phase 2 catches it
1197        let body = "fn method(&self) { let x = self.compute(); self.validate(x); self.store(x) }";
1198        let before = vec![make_entity_with_parent(
1199            "a.rs::class::Foo::old_method", "old_method", body,
1200            "a.rs", Some("a.rs::class::Foo"),
1201        )];
1202        let after = vec![make_entity_with_parent(
1203            "a.rs::class::Foo::new_method", "new_method", body,
1204            "a.rs", Some("a.rs::class::Foo"),
1205        )];
1206        let result = match_entities(&before, &after, "a.rs", None, None, None);
1207        assert_eq!(result.changes.len(), 1);
1208        assert_eq!(result.changes[0].change_type, ChangeType::Renamed);
1209        assert!(result.changes[0].old_parent_id.is_none());
1210    }
1211
1212    fn make_entity_at(id: &str, name: &str, content: &str, file_path: &str, line: usize) -> SemanticEntity {
1213        SemanticEntity {
1214            id: id.to_string(),
1215            file_path: file_path.to_string(),
1216            entity_type: "function".to_string(),
1217            name: name.to_string(),
1218            parent_id: None,
1219            content: content.to_string(),
1220            content_hash: content_hash(content),
1221            structural_hash: None,
1222            start_line: line,
1223            end_line: line + 2,
1224            metadata: None,
1225        }
1226    }
1227
1228    #[test]
1229    fn test_reorder_detection() {
1230        let before = vec![
1231            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1232            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
1233            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 9),
1234        ];
1235        let after = vec![
1236            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1237            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 5),
1238            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 9),
1239        ];
1240        let result = match_entities(&before, &after, "a.rs", None, None, None);
1241        assert_eq!(result.changes.len(), 1);
1242        assert_eq!(result.changes[0].change_type, ChangeType::Reordered);
1243        assert!(result.changes[0].before_content.is_none());
1244        assert!(result.changes[0].old_start_line.is_some());
1245        assert!(result.changes[0].old_end_line.is_some());
1246        assert_ne!(result.changes[0].old_start_line, Some(result.changes[0].start_line));
1247        // Either beta or gamma is marked, LIS picks the minimum
1248        assert!(result.changes[0].entity_name == "beta" || result.changes[0].entity_name == "gamma");
1249    }
1250
1251    #[test]
1252    fn test_no_reorder_when_order_preserved() {
1253        let before = vec![
1254            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1255            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 5),
1256        ];
1257        let after = vec![
1258            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1259            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 10),
1260        ];
1261        let result = match_entities(&before, &after, "a.rs", None, None, None);
1262        // Lines shifted but relative order is same, no reorder
1263        assert_eq!(result.changes.len(), 0);
1264    }
1265
1266    #[test]
1267    fn test_no_reorder_for_unchanged_entities_on_same_line() {
1268        let before = vec![
1269            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1270            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 1),
1271            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 1),
1272            make_entity_at("a::f::delta", "delta", "fn delta() {}", "a.rs", 1),
1273        ];
1274        let after = vec![
1275            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1276            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 1),
1277            make_entity_at("a::f::gamma", "gamma", "fn gamma() { 999 }", "a.rs", 1),
1278            make_entity_at("a::f::delta", "delta", "fn delta() {}", "a.rs", 1),
1279        ];
1280        let result = match_entities(&before, &after, "a.rs", None, None, None);
1281
1282        assert_eq!(result.changes.len(), 1);
1283        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
1284        assert_eq!(result.changes[0].entity_name, "gamma");
1285    }
1286
1287    #[test]
1288    fn test_collision_group_shrink_with_survivor_edit() {
1289        let before = vec![
1290            make_entity_at(
1291                "a::f::f@L1#1",
1292                "f",
1293                "function f(a: number): void {}",
1294                "a.ts",
1295                1,
1296            ),
1297            make_entity_at(
1298                "a::f::f@L1#2",
1299                "f",
1300                "function f(a: string): void {}",
1301                "a.ts",
1302                1,
1303            ),
1304        ];
1305        let after = vec![make_entity_at(
1306            "a::f::f",
1307            "f",
1308            "function f(a: number): void { console.log(a) }",
1309            "a.ts",
1310            1,
1311        )];
1312        let result = match_entities(&before, &after, "a.ts", None, None, None);
1313        let modified = result
1314            .changes
1315            .iter()
1316            .filter(|change| change.change_type == ChangeType::Modified)
1317            .count();
1318        let deleted = result
1319            .changes
1320            .iter()
1321            .filter(|change| change.change_type == ChangeType::Deleted)
1322            .count();
1323
1324        assert_eq!(modified, 1, "{:?}", result.changes);
1325        assert_eq!(deleted, 1, "{:?}", result.changes);
1326    }
1327
1328    #[test]
1329    fn test_collision_group_growth_matches_edited_survivor() {
1330        let before = vec![make_entity_at(
1331            "a::f::f",
1332            "f",
1333            "function f(): void { return oldValue + stableThing; }",
1334            "a.ts",
1335            1,
1336        )];
1337        let after = vec![
1338            make_entity_at(
1339                "a::f::f@L1#1",
1340                "f",
1341                "function f(): void { totallyDifferentAlphaBetaGamma(); }",
1342                "a.ts",
1343                1,
1344            ),
1345            make_entity_at(
1346                "a::f::f@L1#2",
1347                "f",
1348                "function f(): void { return oldValue + stableThing + changedThing; }",
1349                "a.ts",
1350                1,
1351            ),
1352        ];
1353        let result = match_entities(&before, &after, "a.ts", None, None, None);
1354        let modified = result
1355            .changes
1356            .iter()
1357            .find(|change| change.change_type == ChangeType::Modified)
1358            .expect("edited survivor should be modified");
1359        let added = result
1360            .changes
1361            .iter()
1362            .find(|change| change.change_type == ChangeType::Added)
1363            .expect("new duplicate should be added");
1364
1365        assert_eq!(result.changes.len(), 2, "{:?}", result.changes);
1366        assert_eq!(modified.entity_id, "a::f::f@L1#2");
1367        assert_eq!(added.entity_id, "a::f::f@L1#1");
1368    }
1369
1370    #[test]
1371    fn test_same_file_signature_rejects_unrelated_content() {
1372        let before = vec![
1373            make_entity_at(
1374                "a.ts::function::process@L1",
1375                "process",
1376                "function process(req: Request) { return validateInput(req.body); const result = processData(req.params); return formatResponse(result, req.headers); }",
1377                "a.ts",
1378                1,
1379            ),
1380            make_entity_at(
1381                "a.ts::function::process@L7",
1382                "process",
1383                "function process(socket: WebSocket): void { const conn = establishConnection(socket.url); conn.onMessage(data => parseProtobuf(data)); conn.onClose(() => cleanupResources(conn.id)); }",
1384                "a.ts",
1385                7,
1386            ),
1387        ];
1388        let after = vec![
1389            make_entity_at(
1390                "a.ts::function::process@L1",
1391                "process",
1392                "function process(req: Request) { return validateInput(req.body); const result = processData(req.params); return sendJSON(result); }",
1393                "a.ts",
1394                1,
1395            ),
1396            make_entity_at(
1397                "a.ts::function::process@L9",
1398                "process",
1399                "function process(file: File): Promise<string> { const buffer = await readFileAsBuffer(file); const hash = computeSHA256(buffer); await uploadToS3(hash, buffer); return generateCDNUrl(hash); }",
1400                "a.ts",
1401                9,
1402            ),
1403        ];
1404
1405        let result = match_entities(&before, &after, "a.ts", None, None, None);
1406        let modified = result
1407            .changes
1408            .iter()
1409            .filter(|change| change.change_type == ChangeType::Modified)
1410            .count();
1411        let added = result
1412            .changes
1413            .iter()
1414            .find(|change| change.change_type == ChangeType::Added)
1415            .expect("unrelated upload handler should be added");
1416        let deleted = result
1417            .changes
1418            .iter()
1419            .find(|change| change.change_type == ChangeType::Deleted)
1420            .expect("unrelated websocket handler should be deleted");
1421
1422        assert_eq!(modified, 1, "{:?}", result.changes);
1423        assert_eq!(added.entity_id, "a.ts::function::process@L9");
1424        assert_eq!(deleted.entity_id, "a.ts::function::process@L7");
1425    }
1426
1427    #[test]
1428    fn test_reorder_detection_uses_same_line_extraction_order() {
1429        let before = vec![
1430            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1431            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 1),
1432            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 1),
1433        ];
1434        let after = vec![
1435            make_entity_at("a::f::beta", "beta", "fn beta() {}", "a.rs", 1),
1436            make_entity_at("a::f::alpha", "alpha", "fn alpha() {}", "a.rs", 1),
1437            make_entity_at("a::f::gamma", "gamma", "fn gamma() {}", "a.rs", 1),
1438        ];
1439        let result = match_entities(&before, &after, "a.rs", None, None, None);
1440
1441        assert_eq!(result.changes.len(), 1);
1442        assert_eq!(result.changes[0].change_type, ChangeType::Reordered);
1443        assert!(
1444            result.changes[0].entity_name == "alpha" || result.changes[0].entity_name == "beta"
1445        );
1446    }
1447
1448    #[test]
1449    fn test_default_similarity() {
1450        let a = make_entity("a", "a", "the quick brown fox", "a.ts");
1451        let b = make_entity("b", "b", "the quick brown dog", "a.ts");
1452        let score = default_similarity(&a, &b);
1453        assert!(score > 0.5);
1454        assert!(score < 1.0);
1455    }
1456
1457    #[test]
1458    fn parent_name_terminates_on_cyclic_parent_id() {
1459        // Two entities whose parent_id chains form a cycle. parent_name
1460        // would loop forever without the visited-set guard.
1461        let a = make_entity_with_parent("A", "A", "", "f", Some("B"));
1462        let b = make_entity_with_parent("B", "B", "", "f", Some("A"));
1463        let mut by_id: HashMap<&str, &SemanticEntity> = HashMap::new();
1464        by_id.insert("A", &a);
1465        by_id.insert("B", &b);
1466        // Synthesize a leaf whose parent_id enters the cycle via A.
1467        let leaf = make_entity_with_parent("L", "L", "", "f", Some("A"));
1468        let chain = parent_name(&leaf, &by_id);
1469        // Must terminate. We don't assert exact contents — order/composition
1470        // depends on which side of the cycle is reached first; the safety
1471        // property is "this returns at all."
1472        assert!(chain.is_some());
1473    }
1474}