Skip to main content

sem_core/parser/
differ.rs

1#[cfg(feature = "parallel")]
2use rayon::prelude::*;
3use serde::Serialize;
4
5use crate::git::types::FileChange;
6
7macro_rules! maybe_par_iter {
8    ($slice:expr) => {{
9        #[cfg(feature = "parallel")]
10        {
11            $slice.par_iter()
12        }
13        #[cfg(not(feature = "parallel"))]
14        {
15            $slice.iter()
16        }
17    }};
18}
19use crate::model::change::{ChangeType, SemanticChange};
20use crate::model::entity::SemanticEntity;
21use crate::model::identity::match_entities;
22use crate::parser::registry::ParserRegistry;
23use std::collections::{HashMap, HashSet};
24
25#[derive(Debug, Clone, Serialize)]
26#[serde(rename_all = "camelCase")]
27pub struct DiffResult {
28    pub changes: Vec<SemanticChange>,
29    pub file_count: usize,
30    pub added_count: usize,
31    pub modified_count: usize,
32    pub deleted_count: usize,
33    pub moved_count: usize,
34    pub renamed_count: usize,
35    pub reordered_count: usize,
36    pub orphan_count: usize,
37    pub total_entities_before: usize,
38    pub total_entities_after: usize,
39}
40
41pub fn compute_semantic_diff(
42    file_changes: &[FileChange],
43    registry: &ParserRegistry,
44    commit_sha: Option<&str>,
45    author: Option<&str>,
46) -> DiffResult {
47    // Process files in parallel: each file's entity extraction and matching is independent
48    let per_file_changes: Vec<(String, Vec<SemanticChange>, usize, usize)> =
49        maybe_par_iter!(file_changes)
50            .filter_map(|file| {
51                let content_hint = file
52                    .after_content
53                    .as_deref()
54                    .or(file.before_content.as_deref())
55                    .unwrap_or("");
56                let resolved = registry.resolve_file_path(&file.file_path);
57                let detection_path = resolved.as_deref().unwrap_or(&file.file_path);
58                let plugin = registry.get_plugin_with_content(detection_path, content_hint)?;
59
60                let before_entities = if let Some(ref content) = file.before_content {
61                    let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
62                    let before_resolved = registry.resolve_file_path(before_path);
63                    let before_detection = before_resolved.as_deref().unwrap_or(before_path);
64                    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
65                        plugin.extract_entities(content, before_detection)
66                    })) {
67                        Ok(entities) => entities,
68                        Err(_) => Vec::new(),
69                    }
70                } else {
71                    Vec::new()
72                };
73
74                let after_entities = if let Some(ref content) = file.after_content {
75                    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
76                        plugin.extract_entities(content, detection_path)
77                    })) {
78                        Ok(entities) => entities,
79                        Err(_) => Vec::new(),
80                    }
81                } else {
82                    Vec::new()
83                };
84
85                let before_count = before_entities.len();
86                let after_count = after_entities.len();
87
88                let sim_fn = |a: &crate::model::entity::SemanticEntity,
89                              b: &crate::model::entity::SemanticEntity|
90                 -> f64 { plugin.compute_similarity(a, b) };
91
92                let mut result = match_entities(
93                    &before_entities,
94                    &after_entities,
95                    &file.file_path,
96                    Some(&sim_fn),
97                    commit_sha,
98                    author,
99                );
100
101                // Suppress parent entities whose modification is already explained
102                // by child entity changes (e.g. impl blocks when methods changed).
103                suppress_redundant_parents(&mut result.changes, &before_entities, &after_entities);
104
105                // Detect orphan changes (lines that changed outside any entity span).
106                let orphans = detect_orphan_changes(
107                    file,
108                    &before_entities,
109                    &after_entities,
110                    commit_sha,
111                    author,
112                );
113                result.changes.extend(orphans);
114
115                result.changes.sort_by_key(|change| change.entity_line);
116
117                if result.changes.is_empty() {
118                    None
119                } else {
120                    Some((
121                        file.file_path.clone(),
122                        result.changes,
123                        before_count,
124                        after_count,
125                    ))
126                }
127            })
128            .collect();
129
130    let mut all_changes: Vec<SemanticChange> = Vec::new();
131    let mut files_with_changes: HashSet<String> = HashSet::new();
132    let mut total_entities_before: usize = 0;
133    let mut total_entities_after: usize = 0;
134    for (file_path, changes, before_count, after_count) in per_file_changes {
135        files_with_changes.insert(file_path);
136        all_changes.extend(changes);
137        total_entities_before += before_count;
138        total_entities_after += after_count;
139    }
140
141    // Single-pass counting. Orphans are first-class changes for the
142    // change-type buckets, and orphan_count is cross-cutting metadata.
143    let mut added_count = 0;
144    let mut modified_count = 0;
145    let mut deleted_count = 0;
146    let mut moved_count = 0;
147    let mut renamed_count = 0;
148    let mut reordered_count = 0;
149    let mut orphan_count = 0;
150
151    for c in &all_changes {
152        if c.entity_type == "orphan" {
153            orphan_count += 1;
154        }
155        match c.change_type {
156            ChangeType::Added => added_count += 1,
157            ChangeType::Modified => modified_count += 1,
158            ChangeType::Deleted => deleted_count += 1,
159            ChangeType::Moved => {
160                moved_count += 1;
161                if c.has_content_change() {
162                    modified_count += 1;
163                }
164            }
165            ChangeType::Renamed => {
166                renamed_count += 1;
167                if c.has_content_change() {
168                    modified_count += 1;
169                }
170            }
171            ChangeType::Reordered => {
172                reordered_count += 1;
173                if c.has_content_change() {
174                    modified_count += 1;
175                }
176            }
177        }
178    }
179
180    DiffResult {
181        changes: all_changes,
182        file_count: files_with_changes.len(),
183        added_count,
184        modified_count,
185        deleted_count,
186        moved_count,
187        renamed_count,
188        reordered_count,
189        orphan_count,
190        total_entities_before,
191        total_entities_after,
192    }
193}
194
195fn suppress_redundant_parents(
196    changes: &mut Vec<SemanticChange>,
197    before: &[SemanticEntity],
198    after: &[SemanticEntity],
199) {
200    if changes.len() < 2 {
201        return;
202    }
203
204    const CONTAINER_TYPES: &[&str] = &[
205        "impl",
206        "trait",
207        "module",
208        "class",
209        "interface",
210        "mixin",
211        "extension",
212        "namespace",
213        "export",
214        "package",
215        "field",
216        "svelte_instance_script",
217        "svelte_module_script",
218        "object",
219    ];
220
221    let before_by_id: HashMap<&str, &SemanticEntity> =
222        before.iter().map(|e| (e.id.as_str(), e)).collect();
223    let after_by_id: HashMap<&str, &SemanticEntity> =
224        after.iter().map(|e| (e.id.as_str(), e)).collect();
225
226    let mut before_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
227    for e in before {
228        if let Some(ref pid) = e.parent_id {
229            before_children.entry(pid.as_str()).or_default().push(e);
230        }
231    }
232    let mut after_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
233    for e in after {
234        if let Some(ref pid) = e.parent_id {
235            after_children.entry(pid.as_str()).or_default().push(e);
236        }
237    }
238
239    let changed_ids: HashSet<&str> = changes.iter().map(|c| c.entity_id.as_str()).collect();
240
241    let mut suppress: HashSet<String> = HashSet::new();
242    for change in changes.iter() {
243        if !matches!(
244            change.change_type,
245            ChangeType::Modified | ChangeType::Added | ChangeType::Deleted
246        ) {
247            continue;
248        }
249        if !CONTAINER_TYPES.contains(&change.entity_type.as_str()) {
250            continue;
251        }
252        let eid = change.entity_id.as_str();
253        let b_children = before_children
254            .get(eid)
255            .map(|v| v.as_slice())
256            .unwrap_or(&[]);
257        let a_children = after_children.get(eid).map(|v| v.as_slice()).unwrap_or(&[]);
258
259        let has_changed_child = b_children
260            .iter()
261            .any(|c| changed_ids.contains(c.id.as_str()))
262            || a_children
263                .iter()
264                .any(|c| changed_ids.contains(c.id.as_str()));
265        if !has_changed_child {
266            continue;
267        }
268
269        // Added/Deleted: suppress unconditionally; the children carry the detail.
270        // Modified: only suppress if the container's own declaration is unchanged
271        // and the value type didn't transition.
272        let should_suppress = if change.change_type == ChangeType::Modified {
273            match (before_by_id.get(eid), after_by_id.get(eid)) {
274                (Some(bp), Some(ap)) if bp.entity_type == ap.entity_type => {
275                    let before_own = strip_children_content(&bp.content, bp.start_line, b_children);
276                    let after_own = strip_children_content(&ap.content, ap.start_line, a_children);
277                    before_own == after_own
278                }
279                _ => false,
280            }
281        } else {
282            true
283        };
284
285        if should_suppress {
286            suppress.insert(change.entity_id.clone());
287        }
288    }
289
290    // Suppress an old parent that a Moved child left behind when the old
291    // parent itself appears as a change — handles the parent-rename case
292    // where the parent itself failed to match.
293    for change in changes.iter() {
294        if change.change_type == ChangeType::Moved {
295            if let Some(ref old_pid) = change.old_parent_id {
296                if changed_ids.contains(old_pid.as_str()) {
297                    suppress.insert(old_pid.clone());
298                }
299            }
300        }
301    }
302
303    if !suppress.is_empty() {
304        changes.retain(|c| !suppress.contains(&c.entity_id));
305    }
306
307    // Drop a Moved child whose key is unchanged and whose old parent matches
308    // a Renamed entity — the child only "moved" because the parent renamed.
309    let renamed_before_ids: HashSet<&str> = changes
310        .iter()
311        .filter(|c| c.change_type == ChangeType::Renamed)
312        .filter_map(|c| {
313            let old_name = c.old_entity_name.as_deref()?;
314            let after_entity = after_by_id.get(c.entity_id.as_str())?;
315            before
316                .iter()
317                .find(|e| {
318                    e.name == old_name
319                        && e.entity_type == after_entity.entity_type
320                        && e.parent_id == after_entity.parent_id
321                })
322                .map(|e| e.id.as_str())
323        })
324        .collect();
325
326    if !renamed_before_ids.is_empty() {
327        changes.retain(|c| {
328            !(c.change_type == ChangeType::Moved
329                && c.old_entity_name.is_none()
330                && c.old_parent_id
331                    .as_deref()
332                    .map_or(false, |pid| renamed_before_ids.contains(pid)))
333        });
334    }
335}
336
337fn strip_children_content(
338    content: &str,
339    parent_start_line: usize,
340    children: &[&SemanticEntity],
341) -> String {
342    let lines: Vec<&str> = content.lines().collect();
343    let mut excluded: HashSet<usize> = HashSet::new();
344    for child in children {
345        let start_idx = child.start_line.saturating_sub(parent_start_line);
346        let end_idx = child.end_line.saturating_sub(parent_start_line);
347        for i in start_idx..=end_idx.max(start_idx) {
348            if i < lines.len() {
349                excluded.insert(i);
350            }
351        }
352    }
353    lines
354        .iter()
355        .enumerate()
356        .filter(|(i, _)| !excluded.contains(i))
357        .map(|(_, l)| l.trim())
358        .filter(|l| !l.is_empty())
359        .collect::<Vec<_>>()
360        .join(" ")
361}
362
363/// Detect changes in lines that fall outside any entity span.
364/// These are things like use statements, crate-level attributes, standalone
365/// comments, and macro invocations that aren't tracked as entities.
366fn detect_orphan_changes(
367    file: &FileChange,
368    before_entities: &[SemanticEntity],
369    after_entities: &[SemanticEntity],
370    commit_sha: Option<&str>,
371    author: Option<&str>,
372) -> Vec<SemanticChange> {
373    let before_text = file.before_content.as_deref().unwrap_or("");
374    let after_text = file.after_content.as_deref().unwrap_or("");
375
376    // Build covered line sets from entity spans
377    let before_covered: HashSet<usize> = before_entities
378        .iter()
379        .flat_map(|e| e.start_line..=e.end_line)
380        .collect();
381    let after_covered: HashSet<usize> = after_entities
382        .iter()
383        .flat_map(|e| e.start_line..=e.end_line)
384        .collect();
385
386    let before_orphans = orphan_segments(before_text, &before_covered);
387    let after_orphans = orphan_segments(after_text, &after_covered);
388    let mut changes = Vec::new();
389
390    for (before_idx, after_idx) in orphan_segment_change_pairs(&before_orphans, &after_orphans) {
391        let before_orphan = before_idx.and_then(|idx| before_orphans.get(idx));
392        let after_orphan = after_idx.and_then(|idx| after_orphans.get(idx));
393        let before_content = orphan_content(before_orphan);
394        let after_content = orphan_content(after_orphan);
395
396        // Skip if orphan content is unchanged, including blank-only segments.
397        if before_content == after_content {
398            continue;
399        }
400
401        let change_type = if before_content.is_none() {
402            ChangeType::Added
403        } else if after_content.is_none() {
404            ChangeType::Deleted
405        } else {
406            ChangeType::Modified
407        };
408
409        let current_orphan = match change_type {
410            ChangeType::Deleted => before_orphan,
411            _ => after_orphan.or(before_orphan),
412        };
413        let Some(current_orphan) = current_orphan else {
414            continue;
415        };
416        let span_label = if change_type == ChangeType::Deleted {
417            "oldL"
418        } else {
419            "L"
420        };
421        let orphan_id = format!(
422            "{}::orphan::{}@{}{}-{}",
423            file.file_path,
424            change_type,
425            span_label,
426            current_orphan.start_line,
427            current_orphan.end_line
428        );
429
430        changes.push(SemanticChange {
431            id: format!("change::{orphan_id}"),
432            entity_id: orphan_id,
433            change_type,
434            entity_type: "orphan".to_string(),
435            entity_name: "module-level".to_string(),
436            entity_line: current_orphan.start_line,
437            start_line: current_orphan.start_line,
438            end_line: current_orphan.end_line,
439            old_start_line: before_orphan.map(|orphan| orphan.start_line),
440            old_end_line: before_orphan.map(|orphan| orphan.end_line),
441            parent_name: None,
442            file_path: file.file_path.clone(),
443            old_entity_name: None,
444            old_file_path: None,
445            old_parent_id: None,
446            before_content: before_content.map(str::to_string),
447            after_content: after_content.map(str::to_string),
448            commit_sha: commit_sha.map(String::from),
449            author: author.map(String::from),
450            timestamp: None,
451            structural_change: Some(true),
452        });
453    }
454
455    changes
456}
457
458#[derive(Debug, Clone, PartialEq, Eq)]
459struct OrphanSegment {
460    start_line: usize,
461    end_line: usize,
462    content: String,
463}
464
465fn orphan_segments(text: &str, covered_lines: &HashSet<usize>) -> Vec<OrphanSegment> {
466    let mut segments = Vec::new();
467    let mut current_start: Option<usize> = None;
468    let mut current_lines: Vec<&str> = Vec::new();
469    let mut last_line_number = 0;
470
471    for (i, line) in text.lines().enumerate() {
472        let line_number = i + 1;
473        last_line_number = line_number;
474        if covered_lines.contains(&line_number) {
475            if let Some(start_line) = current_start.take() {
476                segments.push(OrphanSegment {
477                    start_line,
478                    end_line: line_number - 1,
479                    content: current_lines.join("\n"),
480                });
481                current_lines.clear();
482            }
483            continue;
484        }
485
486        current_start.get_or_insert(line_number);
487        current_lines.push(line);
488    }
489
490    if let Some(start_line) = current_start {
491        segments.push(OrphanSegment {
492            start_line,
493            end_line: last_line_number.max(start_line),
494            content: current_lines.join("\n"),
495        });
496    }
497
498    segments
499}
500
501fn orphan_content(segment: Option<&OrphanSegment>) -> Option<&str> {
502    segment
503        .map(|segment| segment.content.as_str())
504        .filter(|content| !content.trim().is_empty())
505}
506
507fn orphan_segment_change_pairs(
508    before: &[OrphanSegment],
509    after: &[OrphanSegment],
510) -> Vec<(Option<usize>, Option<usize>)> {
511    let anchors = orphan_segment_lcs(before, after);
512    let mut pairs = Vec::new();
513    let mut before_start = 0;
514    let mut after_start = 0;
515
516    for (before_anchor, after_anchor) in anchors {
517        append_orphan_gap_pairs(
518            &mut pairs,
519            before_start,
520            before_anchor,
521            after_start,
522            after_anchor,
523        );
524        before_start = before_anchor + 1;
525        after_start = after_anchor + 1;
526    }
527
528    append_orphan_gap_pairs(
529        &mut pairs,
530        before_start,
531        before.len(),
532        after_start,
533        after.len(),
534    );
535
536    pairs
537}
538
539fn append_orphan_gap_pairs(
540    pairs: &mut Vec<(Option<usize>, Option<usize>)>,
541    before_start: usize,
542    before_end: usize,
543    after_start: usize,
544    after_end: usize,
545) {
546    let before_len = before_end.saturating_sub(before_start);
547    let after_len = after_end.saturating_sub(after_start);
548
549    if before_len == after_len {
550        for i in 0..before_len {
551            pairs.push((Some(before_start + i), Some(after_start + i)));
552        }
553        return;
554    }
555
556    for i in 0..before_len {
557        pairs.push((Some(before_start + i), None));
558    }
559    for i in 0..after_len {
560        pairs.push((None, Some(after_start + i)));
561    }
562}
563
564fn orphan_segment_lcs(before: &[OrphanSegment], after: &[OrphanSegment]) -> Vec<(usize, usize)> {
565    let mut dp = vec![vec![0; after.len() + 1]; before.len() + 1];
566
567    for i in (0..before.len()).rev() {
568        for j in (0..after.len()).rev() {
569            dp[i][j] = if orphan_segments_equal(&before[i], &after[j]) {
570                dp[i + 1][j + 1] + 1
571            } else {
572                dp[i + 1][j].max(dp[i][j + 1])
573            };
574        }
575    }
576
577    let mut anchors = Vec::new();
578    let mut i = 0;
579    let mut j = 0;
580    while i < before.len() && j < after.len() {
581        if orphan_segments_equal(&before[i], &after[j]) {
582            anchors.push((i, j));
583            i += 1;
584            j += 1;
585        } else if dp[i + 1][j] >= dp[i][j + 1] {
586            i += 1;
587        } else {
588            j += 1;
589        }
590    }
591
592    anchors
593}
594
595fn orphan_segments_equal(before: &OrphanSegment, after: &OrphanSegment) -> bool {
596    match (orphan_content(Some(before)), orphan_content(Some(after))) {
597        (Some(before), Some(after)) => before == after,
598        _ => false,
599    }
600}
601
602#[cfg(test)]
603mod tests {
604    use super::*;
605    use crate::git::types::{FileChange, FileStatus};
606    use crate::parser::plugins::create_default_registry;
607
608    fn modified_file(path: &str, before: &str, after: &str) -> FileChange {
609        FileChange {
610            file_path: path.to_string(),
611            status: FileStatus::Modified,
612            old_file_path: None,
613            before_content: Some(before.to_string()),
614            after_content: Some(after.to_string()),
615        }
616    }
617
618    fn renamed_file(old_path: &str, new_path: &str, before: &str, after: &str) -> FileChange {
619        FileChange {
620            file_path: new_path.to_string(),
621            status: FileStatus::Renamed,
622            old_file_path: Some(old_path.to_string()),
623            before_content: Some(before.to_string()),
624            after_content: Some(after.to_string()),
625        }
626    }
627
628    fn entity_span(id: &str, start_line: usize, end_line: usize) -> SemanticEntity {
629        SemanticEntity {
630            id: id.to_string(),
631            file_path: "a.rs".to_string(),
632            entity_type: "function".to_string(),
633            name: id.to_string(),
634            parent_id: None,
635            content: String::new(),
636            content_hash: String::new(),
637            structural_hash: None,
638            start_line,
639            end_line,
640            metadata: None,
641        }
642    }
643
644    #[test]
645    fn orphan_only_change_counts_file_and_orphan() {
646        let before = "# old module comment\n\ndef value():\n    return 1\n";
647        let after = "# new module comment\n\ndef value():\n    return 1\n";
648
649        let registry = create_default_registry();
650        let result = compute_semantic_diff(
651            &[modified_file("app.py", before, after)],
652            &registry,
653            None,
654            None,
655        );
656
657        assert_eq!(result.changes.len(), 1);
658        assert_eq!(result.file_count, 1);
659        assert_eq!(result.orphan_count, 1);
660        assert_eq!(result.modified_count, 1);
661        assert_eq!(result.changes[0].entity_type, "orphan");
662        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
663    }
664
665    #[test]
666    fn test_parent_suppressed_when_only_child_modified() {
667        let before = "class UserService:\n    def get_user(self, user_id):\n        return db.find(user_id)\n";
668        let after  = "class UserService:\n    def get_user(self, user_id):\n        return db.find(user_id, include_deleted=False)\n";
669
670        let registry = create_default_registry();
671        let result = compute_semantic_diff(
672            &[modified_file("svc.py", before, after)],
673            &registry,
674            None,
675            None,
676        );
677
678        let names: Vec<&str> = result
679            .changes
680            .iter()
681            .map(|c| c.entity_name.as_str())
682            .collect();
683        assert!(
684            result.changes.iter().any(|c| c.entity_name == "get_user"),
685            "expected method get_user in changes, got: {names:?}"
686        );
687        assert!(
688            !result
689                .changes
690                .iter()
691                .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
692            "class should be suppressed when only the method body changed, got: {names:?}"
693        );
694    }
695
696    #[test]
697    fn test_parent_not_suppressed_when_own_declaration_changes() {
698        let before = "class UserService:\n    def get_user(self, user_id):\n        return db.find(user_id)\n";
699        let after  = "class UserService(BaseService):\n    def get_user(self, user_id):\n        return db.find(user_id, include_deleted=False)\n";
700
701        let registry = create_default_registry();
702        let result = compute_semantic_diff(
703            &[modified_file("svc.py", before, after)],
704            &registry,
705            None,
706            None,
707        );
708
709        let names: Vec<&str> = result
710            .changes
711            .iter()
712            .map(|c| c.entity_name.as_str())
713            .collect();
714        assert!(
715            result.changes.iter().any(|c| c.entity_name == "get_user"),
716            "expected method get_user in changes, got: {names:?}"
717        );
718        assert!(
719            result
720                .changes
721                .iter()
722                .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
723            "class should remain Modified when its own declaration changed, got: {names:?}"
724        );
725    }
726
727    #[test]
728    fn test_nested_typescript_class_field_diff_reports_leaf_method() {
729        let before = r#"class L1 {
730  L2 = class {
731    L3 = class {
732      L4 = class {
733        method() { return 1; }
734      };
735    };
736  };
737}
738"#;
739        let after = r#"class L1 {
740  L2 = class {
741    L3 = class {
742      L4 = class {
743        method() { return 999; }
744      };
745    };
746  };
747}
748"#;
749
750        let registry = create_default_registry();
751        let result = compute_semantic_diff(
752            &[modified_file("a.ts", before, after)],
753            &registry,
754            None,
755            None,
756        );
757
758        let changes: Vec<_> = result
759            .changes
760            .iter()
761            .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
762            .collect();
763        assert!(
764            result
765                .changes
766                .iter()
767                .any(|c| c.entity_id == "a.ts::class::L1::L2::L3::L4::method"),
768            "expected method leaf change, got: {changes:?}"
769        );
770        assert!(
771            !result.changes.iter().any(|c| c.entity_type == "field"),
772            "field containers should be suppressed when only a nested method changed, got: {changes:?}"
773        );
774    }
775
776    #[test]
777    fn renamed_file_with_edited_entity_reports_move_not_add_delete() {
778        let before = "def foo():\n    return alpha + beta + gamma\n";
779        let after = "def foo():\n    return one + two + three\n";
780
781        let registry = create_default_registry();
782        let result = compute_semantic_diff(
783            &[renamed_file("old.py", "new.py", before, after)],
784            &registry,
785            None,
786            None,
787        );
788
789        assert_eq!(result.added_count, 0);
790        assert_eq!(result.deleted_count, 0);
791        assert_eq!(result.modified_count, 1);
792        assert_eq!(result.moved_count, 1);
793        assert_eq!(result.changes.len(), 1);
794        assert_eq!(result.changes[0].entity_name, "foo");
795        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.py"));
796        assert_eq!(result.changes[0].structural_change, Some(true));
797    }
798
799    #[test]
800    fn duplicate_markdown_heading_reports_first_section_modification() {
801        let before = "# Same Title\n\noriginal content of section A\n\n# Same Title\n\ncontent of section B\n";
802        let after = "# Same Title\n\nMODIFIED content of section A\n\n# Same Title\n\ncontent of section B\n";
803
804        let registry = create_default_registry();
805        let result = compute_semantic_diff(
806            &[modified_file("doc.md", before, after)],
807            &registry,
808            None,
809            None,
810        );
811
812        assert_eq!(result.modified_count, 1, "{:?}", result.changes);
813        assert_eq!(result.changes.len(), 1, "{:?}", result.changes);
814
815        let change = &result.changes[0];
816        assert_eq!(change.change_type, ChangeType::Modified);
817        assert_eq!(change.entity_name, "Same Title");
818        assert_eq!(change.entity_line, 1);
819        assert!(change
820            .before_content
821            .as_deref()
822            .unwrap_or_default()
823            .contains("original content of section A"));
824        assert!(change
825            .after_content
826            .as_deref()
827            .unwrap_or_default()
828            .contains("MODIFIED content of section A"));
829    }
830
831    #[test]
832    fn orphan_changes_count_toward_change_type_buckets() {
833        let before = "def foo():\n    return 1\n\ndef bar():\n    return 2\n";
834        let after = "# just a comment\n";
835
836        let registry = create_default_registry();
837        let result = compute_semantic_diff(
838            &[modified_file("svc.py", before, after)],
839            &registry,
840            None,
841            None,
842        );
843
844        assert_eq!(result.added_count, 1);
845        assert_eq!(result.deleted_count, 2);
846        assert_eq!(result.modified_count, 0);
847        assert_eq!(result.orphan_count, 1);
848        assert!(result
849            .changes
850            .iter()
851            .any(|c| c.entity_type == "orphan" && c.change_type == ChangeType::Added));
852
853        let named_bucket_total = result.added_count
854            + result.modified_count
855            + result.deleted_count
856            + result.moved_count
857            + result.renamed_count
858            + result.reordered_count;
859        assert_eq!(named_bucket_total, result.changes.len());
860    }
861
862    #[test]
863    fn orphan_changes_use_contiguous_line_spans() {
864        let file = modified_file(
865            "a.rs",
866            "use alpha;\nfn foo() {}\nuse beta;\nfn bar() {}\n",
867            "use gamma;\nfn foo() {}\nuse delta;\nfn bar() {}\n",
868        );
869        let entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
870
871        let changes = detect_orphan_changes(&file, &entities, &entities, None, None);
872
873        assert_eq!(changes.len(), 2);
874        assert_eq!(changes[0].start_line, 1);
875        assert_eq!(changes[0].end_line, 1);
876        assert_eq!(changes[0].old_start_line, Some(1));
877        assert_eq!(changes[0].old_end_line, Some(1));
878        assert_eq!(changes[0].before_content.as_deref(), Some("use alpha;"));
879        assert_eq!(changes[0].after_content.as_deref(), Some("use gamma;"));
880        assert_eq!(changes[1].start_line, 3);
881        assert_eq!(changes[1].end_line, 3);
882        assert_eq!(changes[1].old_start_line, Some(3));
883        assert_eq!(changes[1].old_end_line, Some(3));
884        assert_eq!(changes[1].before_content.as_deref(), Some("use beta;"));
885        assert_eq!(changes[1].after_content.as_deref(), Some("use delta;"));
886    }
887
888    #[test]
889    fn blank_only_orphan_segments_are_ignored() {
890        let file = modified_file("a.rs", "fn foo() {}\n", "\nfn foo() {}\n");
891        let before_entities = vec![entity_span("foo", 1, 1)];
892        let after_entities = vec![entity_span("foo", 2, 2)];
893
894        let changes =
895            detect_orphan_changes(&file, &before_entities, &after_entities, None, None);
896
897        assert!(changes.is_empty());
898    }
899
900    #[test]
901    fn inserted_orphan_segment_does_not_modify_unchanged_later_segment() {
902        let file = modified_file(
903            "a.rs",
904            "fn foo() {}\nuse a;\nfn bar() {}\n",
905            "use x;\nfn foo() {}\nuse a;\nfn bar() {}\n",
906        );
907        let before_entities = vec![entity_span("foo", 1, 1), entity_span("bar", 3, 3)];
908        let after_entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
909
910        let changes =
911            detect_orphan_changes(&file, &before_entities, &after_entities, None, None);
912
913        assert_eq!(changes.len(), 1);
914        assert_eq!(changes[0].change_type, ChangeType::Added);
915        assert_eq!(changes[0].start_line, 1);
916        assert_eq!(changes[0].end_line, 1);
917        assert!(changes[0].old_start_line.is_none());
918        assert_eq!(changes[0].before_content, None);
919        assert_eq!(changes[0].after_content.as_deref(), Some("use x;"));
920    }
921
922    #[test]
923    fn uneven_orphan_gaps_are_not_forced_into_modifications() {
924        let file = modified_file(
925            "a.rs",
926            "use a;\nfn foo() {}\nuse old;\nfn mid() {}\nuse c;\nfn bar() {}\n",
927            "use a;\nfn foo() {}\nuse new1;\nfn mid() {}\nuse new2;\nfn baz() {}\nuse c;\nfn bar() {}\n",
928        );
929        let before_entities = vec![
930            entity_span("foo", 2, 2),
931            entity_span("mid", 4, 4),
932            entity_span("bar", 6, 6),
933        ];
934        let after_entities = vec![
935            entity_span("foo", 2, 2),
936            entity_span("mid", 4, 4),
937            entity_span("baz", 6, 6),
938            entity_span("bar", 8, 8),
939        ];
940
941        let changes =
942            detect_orphan_changes(&file, &before_entities, &after_entities, None, None);
943
944        assert_eq!(changes.len(), 3);
945        assert_eq!(changes[0].change_type, ChangeType::Deleted);
946        assert!(changes[0].entity_id.contains("::deleted@oldL3-3"));
947        assert_eq!(changes[0].before_content.as_deref(), Some("use old;"));
948        assert_eq!(changes[1].change_type, ChangeType::Added);
949        assert_eq!(changes[1].after_content.as_deref(), Some("use new1;"));
950        assert_eq!(changes[2].change_type, ChangeType::Added);
951        assert_eq!(changes[2].after_content.as_deref(), Some("use new2;"));
952    }
953}