Skip to main content

sem_core/parser/
differ.rs

1#[cfg(feature = "parallel")]
2use rayon::prelude::*;
3use serde::Serialize;
4
5use crate::git::types::{FileChange, FileStatus};
6
7macro_rules! maybe_par_iter {
8    ($slice:expr) => {{
9        #[cfg(feature = "parallel")]
10        {
11            $slice.par_iter()
12        }
13        #[cfg(not(feature = "parallel"))]
14        {
15            $slice.iter()
16        }
17    }};
18}
19use crate::model::change::{ChangeType, SemanticChange};
20use crate::model::entity::SemanticEntity;
21use crate::model::identity::match_entities;
22use crate::parser::plugin::SemanticParserPlugin;
23use crate::parser::registry::ParserRegistry;
24use std::collections::{HashMap, HashSet};
25
26#[derive(Debug, Clone, Default, Serialize)]
27#[serde(rename_all = "camelCase")]
28pub struct DiffResult {
29    pub changes: Vec<SemanticChange>,
30    pub file_count: usize,
31    pub added_count: usize,
32    pub modified_count: usize,
33    pub deleted_count: usize,
34    pub moved_count: usize,
35    pub renamed_count: usize,
36    pub reordered_count: usize,
37    pub orphan_count: usize,
38    pub total_entities_before: usize,
39    pub total_entities_after: usize,
40}
41
42#[derive(Debug, Clone, Serialize)]
43#[serde(rename_all = "camelCase")]
44pub struct BinaryFileChange {
45    pub file_path: String,
46    pub status: FileStatus,
47    pub old_file_path: Option<String>,
48}
49
50impl From<&FileChange> for BinaryFileChange {
51    fn from(file: &FileChange) -> Self {
52        Self {
53            file_path: file.file_path.clone(),
54            status: file.status.clone(),
55            old_file_path: file.old_file_path.clone(),
56        }
57    }
58}
59
60pub fn collect_binary_file_changes(file_changes: &[FileChange]) -> Vec<BinaryFileChange> {
61    file_changes
62        .iter()
63        .filter(|file| lacks_diffable_content(file))
64        .map(BinaryFileChange::from)
65        .collect()
66}
67
68fn lacks_diffable_content(file: &FileChange) -> bool {
69    match &file.status {
70        FileStatus::Added => file.after_content.is_none(),
71        FileStatus::Deleted => file.before_content.is_none(),
72        FileStatus::Modified | FileStatus::Renamed => {
73            file.before_content.is_none() || file.after_content.is_none()
74        }
75    }
76}
77
78pub fn compute_semantic_diff(
79    file_changes: &[FileChange],
80    registry: &ParserRegistry,
81    commit_sha: Option<&str>,
82    author: Option<&str>,
83) -> DiffResult {
84    // Process files in parallel: each file's entity extraction and matching is independent
85    let per_file_changes: Vec<(String, Vec<SemanticChange>, usize, usize)> =
86        maybe_par_iter!(file_changes)
87            .filter(|file| !lacks_diffable_content(file))
88            .filter_map(|file| {
89                let content_hint = file
90                    .after_content
91                    .as_deref()
92                    .or(file.before_content.as_deref())
93                    .unwrap_or("");
94                let resolved = registry.resolve_file_path(&file.file_path);
95                let detection_path = resolved.as_deref().unwrap_or(&file.file_path);
96                let plugin = registry.get_plugin_with_content(detection_path, content_hint)?;
97
98                let before_entities = if let Some(ref content) = file.before_content {
99                    let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
100                    let before_resolved = registry.resolve_file_path(before_path);
101                    let before_detection = before_resolved.as_deref().unwrap_or(before_path);
102                    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
103                        plugin.extract_entities(content, before_detection)
104                    })) {
105                        Ok(entities) => entities,
106                        Err(_) => Vec::new(),
107                    }
108                } else {
109                    Vec::new()
110                };
111
112                let after_entities = if let Some(ref content) = file.after_content {
113                    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
114                        plugin.extract_entities(content, detection_path)
115                    })) {
116                        Ok(entities) => entities,
117                        Err(_) => Vec::new(),
118                    }
119                } else {
120                    Vec::new()
121                };
122
123                let before_count = before_entities.len();
124                let after_count = after_entities.len();
125
126                let mut result = match_entities(
127                    &before_entities,
128                    &after_entities,
129                    &file.file_path,
130                    None,
131                    commit_sha,
132                    author,
133                );
134
135                // Suppress parent entities whose modification is already explained
136                // by child entity changes (e.g. impl blocks when methods changed).
137                suppress_redundant_parents(&mut result.changes, &before_entities, &after_entities);
138
139                // Detect orphan changes (lines that changed outside any entity span).
140                let orphans = detect_orphan_changes(
141                    file,
142                    &before_entities,
143                    &after_entities,
144                    Some(plugin),
145                    detection_path,
146                    commit_sha,
147                    author,
148                );
149                result.changes.extend(orphans);
150
151                result.changes.sort_by_key(|change| change.entity_line);
152
153                if result.changes.is_empty() {
154                    None
155                } else {
156                    Some((
157                        file.file_path.clone(),
158                        result.changes,
159                        before_count,
160                        after_count,
161                    ))
162                }
163            })
164            .collect();
165
166    let mut all_changes: Vec<SemanticChange> = Vec::new();
167    let mut files_with_changes: HashSet<String> = HashSet::new();
168    let mut total_entities_before: usize = 0;
169    let mut total_entities_after: usize = 0;
170    for (file_path, changes, before_count, after_count) in per_file_changes {
171        files_with_changes.insert(file_path);
172        all_changes.extend(changes);
173        total_entities_before += before_count;
174        total_entities_after += after_count;
175    }
176
177    // Single-pass counting. Orphans are first-class changes for the
178    // change-type buckets, and orphan_count is cross-cutting metadata.
179    let mut added_count = 0;
180    let mut modified_count = 0;
181    let mut deleted_count = 0;
182    let mut moved_count = 0;
183    let mut renamed_count = 0;
184    let mut reordered_count = 0;
185    let mut orphan_count = 0;
186
187    for c in &all_changes {
188        if c.entity_type == "orphan" {
189            orphan_count += 1;
190        }
191        match c.change_type {
192            ChangeType::Added => added_count += 1,
193            ChangeType::Modified => modified_count += 1,
194            ChangeType::Deleted => deleted_count += 1,
195            ChangeType::Moved => {
196                moved_count += 1;
197                if c.has_content_change() {
198                    modified_count += 1;
199                }
200            }
201            ChangeType::Renamed => {
202                renamed_count += 1;
203                if c.has_content_change() {
204                    modified_count += 1;
205                }
206            }
207            ChangeType::Reordered => {
208                reordered_count += 1;
209                if c.has_content_change() {
210                    modified_count += 1;
211                }
212            }
213        }
214    }
215
216    DiffResult {
217        changes: all_changes,
218        file_count: files_with_changes.len(),
219        added_count,
220        modified_count,
221        deleted_count,
222        moved_count,
223        renamed_count,
224        reordered_count,
225        orphan_count,
226        total_entities_before,
227        total_entities_after,
228    }
229}
230
231fn suppress_redundant_parents(
232    changes: &mut Vec<SemanticChange>,
233    before: &[SemanticEntity],
234    after: &[SemanticEntity],
235) {
236    if changes.len() < 2 {
237        return;
238    }
239
240    const CONTAINER_TYPES: &[&str] = &[
241        "impl",
242        "trait",
243        "module",
244        "class",
245        "interface",
246        "protocol",
247        "mixin",
248        "extension",
249        "namespace",
250        "export",
251        "package",
252        "field",
253        "variable",
254        "svelte_instance_script",
255        "svelte_module_script",
256        "object",
257    ];
258
259    let before_by_id: HashMap<&str, &SemanticEntity> =
260        before.iter().map(|e| (e.id.as_str(), e)).collect();
261    let after_by_id: HashMap<&str, &SemanticEntity> =
262        after.iter().map(|e| (e.id.as_str(), e)).collect();
263
264    let mut before_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
265    for e in before {
266        if let Some(ref pid) = e.parent_id {
267            before_children.entry(pid.as_str()).or_default().push(e);
268        }
269    }
270    let mut after_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
271    for e in after {
272        if let Some(ref pid) = e.parent_id {
273            after_children.entry(pid.as_str()).or_default().push(e);
274        }
275    }
276
277    let changed_ids: HashSet<&str> = changes.iter().map(|c| c.entity_id.as_str()).collect();
278
279    let mut suppress: HashSet<String> = HashSet::new();
280    for change in changes.iter() {
281        if !matches!(
282            change.change_type,
283            ChangeType::Modified | ChangeType::Added | ChangeType::Deleted
284        ) {
285            continue;
286        }
287        if !CONTAINER_TYPES.contains(&change.entity_type.as_str()) {
288            continue;
289        }
290        let eid = change.entity_id.as_str();
291        let b_children = before_children
292            .get(eid)
293            .map(|v| v.as_slice())
294            .unwrap_or(&[]);
295        let a_children = after_children.get(eid).map(|v| v.as_slice()).unwrap_or(&[]);
296
297        let has_changed_child = b_children
298            .iter()
299            .any(|c| changed_ids.contains(c.id.as_str()))
300            || a_children
301                .iter()
302                .any(|c| changed_ids.contains(c.id.as_str()));
303        if !has_changed_child {
304            continue;
305        }
306
307        // Added/Deleted: suppress unconditionally; the children carry the detail.
308        // Modified: only suppress if the container's own declaration is unchanged
309        // and the value type didn't transition.
310        let should_suppress = if change.change_type == ChangeType::Modified {
311            match (before_by_id.get(eid), after_by_id.get(eid)) {
312                (Some(bp), Some(ap)) if bp.entity_type == ap.entity_type => {
313                    let before_own = strip_children_content(&bp.content, bp.start_line, b_children);
314                    let after_own = strip_children_content(&ap.content, ap.start_line, a_children);
315                    before_own == after_own
316                }
317                _ => false,
318            }
319        } else {
320            true
321        };
322
323        if should_suppress {
324            suppress.insert(change.entity_id.clone());
325        }
326    }
327
328    // Suppress an old parent that a Moved child left behind when the old
329    // parent itself appears as a change — handles the parent-rename case
330    // where the parent itself failed to match.
331    for change in changes.iter() {
332        if change.change_type == ChangeType::Moved {
333            if let Some(ref old_pid) = change.old_parent_id {
334                if changed_ids.contains(old_pid.as_str()) {
335                    suppress.insert(old_pid.clone());
336                }
337            }
338        }
339    }
340
341    if !suppress.is_empty() {
342        changes.retain(|c| !suppress.contains(&c.entity_id));
343    }
344
345    // Drop a Moved child whose key is unchanged and whose old parent matches
346    // a Renamed entity — the child only "moved" because the parent renamed.
347    let renamed_before_ids: HashSet<&str> = changes
348        .iter()
349        .filter(|c| c.change_type == ChangeType::Renamed)
350        .filter_map(|c| {
351            let old_name = c.old_entity_name.as_deref()?;
352            let after_entity = after_by_id.get(c.entity_id.as_str())?;
353            before
354                .iter()
355                .find(|e| {
356                    e.name == old_name
357                        && e.entity_type == after_entity.entity_type
358                        && e.parent_id == after_entity.parent_id
359                })
360                .map(|e| e.id.as_str())
361        })
362        .collect();
363
364    if !renamed_before_ids.is_empty() {
365        changes.retain(|c| {
366            !(c.change_type == ChangeType::Moved
367                && c.old_entity_name.is_none()
368                && c.old_parent_id
369                    .as_deref()
370                    .map_or(false, |pid| renamed_before_ids.contains(pid)))
371        });
372    }
373}
374
375fn strip_children_content(
376    content: &str,
377    parent_start_line: usize,
378    children: &[&SemanticEntity],
379) -> String {
380    let mut line_starts = vec![0];
381    for (idx, ch) in content.char_indices() {
382        if ch == '\n' {
383            line_starts.push(idx + ch.len_utf8());
384        }
385    }
386
387    let mut excluded_ranges: Vec<(usize, usize)> = Vec::new();
388    for child in children {
389        let start_idx = child.start_line.saturating_sub(parent_start_line);
390        let end_idx = child.end_line.saturating_sub(parent_start_line);
391        let search_start = line_starts.get(start_idx).copied().unwrap_or(0);
392        let search_end = line_starts
393            .get(end_idx.saturating_add(1))
394            .copied()
395            .unwrap_or(content.len())
396            .min(content.len());
397
398        if !child.content.is_empty() && search_start <= search_end {
399            let search_window = &content[search_start..search_end];
400            if search_window.starts_with(&child.content) {
401                excluded_ranges.push((search_start, search_start + child.content.len()));
402                continue;
403            }
404
405            if let Some(relative_start) = search_window.find(&child.content) {
406                let start = search_start + relative_start;
407                excluded_ranges.push((start, start + child.content.len()));
408                continue;
409            }
410        }
411    }
412
413    if excluded_ranges.is_empty() {
414        return normalize_content_for_parent_suppression(content);
415    }
416
417    excluded_ranges.sort_unstable();
418    let mut merged_ranges: Vec<(usize, usize)> = Vec::new();
419    for (start, end) in excluded_ranges {
420        if let Some((_, merged_end)) = merged_ranges.last_mut() {
421            if start <= *merged_end {
422                *merged_end = (*merged_end).max(end);
423                continue;
424            }
425        }
426        merged_ranges.push((start, end));
427    }
428
429    let mut stripped = String::with_capacity(content.len());
430    let mut cursor = 0;
431    for (start, end) in merged_ranges {
432        if cursor < start {
433            stripped.push_str(&content[cursor..start]);
434        }
435        cursor = end.max(cursor);
436    }
437    if cursor < content.len() {
438        stripped.push_str(&content[cursor..]);
439    }
440
441    normalize_content_for_parent_suppression(&stripped)
442}
443
444fn normalize_content_for_parent_suppression(content: &str) -> String {
445    content
446        .lines()
447        .map(|l| l.trim())
448        .filter(|l| !l.is_empty())
449        .collect::<Vec<_>>()
450        .join(" ")
451}
452
453/// Detect changes in lines that fall outside any entity span.
454/// These are things like use statements, crate-level attributes, standalone
455/// comments, and macro invocations that aren't tracked as entities.
456fn detect_orphan_changes(
457    file: &FileChange,
458    before_entities: &[SemanticEntity],
459    after_entities: &[SemanticEntity],
460    plugin: Option<&dyn SemanticParserPlugin>,
461    detection_path: &str,
462    commit_sha: Option<&str>,
463    author: Option<&str>,
464) -> Vec<SemanticChange> {
465    let before_text = file.before_content.as_deref().unwrap_or("");
466    let after_text = file.after_content.as_deref().unwrap_or("");
467
468    // Build covered line sets from entity spans
469    let before_covered: HashSet<usize> = before_entities
470        .iter()
471        .flat_map(|e| e.start_line..=e.end_line)
472        .collect();
473    let after_covered: HashSet<usize> = after_entities
474        .iter()
475        .flat_map(|e| e.start_line..=e.end_line)
476        .collect();
477
478    let before_orphans = orphan_segments(before_text, &before_covered);
479    let after_orphans = orphan_segments(after_text, &after_covered);
480    let mut changes = Vec::new();
481
482    for (before_idx, after_idx) in orphan_segment_change_pairs(&before_orphans, &after_orphans) {
483        let before_orphan = before_idx.and_then(|idx| before_orphans.get(idx));
484        let after_orphan = after_idx.and_then(|idx| after_orphans.get(idx));
485        let before_content = orphan_content(before_orphan);
486        let after_content = orphan_content(after_orphan);
487
488        // Skip if orphan content is unchanged, including blank-only segments.
489        if before_content == after_content {
490            continue;
491        }
492
493        let change_type = if before_content.is_none() {
494            ChangeType::Added
495        } else if after_content.is_none() {
496            ChangeType::Deleted
497        } else {
498            ChangeType::Modified
499        };
500
501        let current_orphan = match change_type {
502            ChangeType::Deleted => before_orphan,
503            _ => after_orphan.or(before_orphan),
504        };
505        let Some(current_orphan) = current_orphan else {
506            continue;
507        };
508        let span_label = if change_type == ChangeType::Deleted {
509            "oldL"
510        } else {
511            "L"
512        };
513        let orphan_id = format!(
514            "{}::orphan::{}@{}{}-{}",
515            file.file_path,
516            change_type,
517            span_label,
518            current_orphan.start_line,
519            current_orphan.end_line
520        );
521
522        changes.push(SemanticChange {
523            id: format!("change::{orphan_id}"),
524            entity_id: orphan_id,
525            change_type,
526            entity_type: "orphan".to_string(),
527            entity_name: "module-level".to_string(),
528            entity_line: current_orphan.start_line,
529            start_line: current_orphan.start_line,
530            end_line: current_orphan.end_line,
531            old_start_line: before_orphan.map(|orphan| orphan.start_line),
532            old_end_line: before_orphan.map(|orphan| orphan.end_line),
533            parent_name: None,
534            file_path: file.file_path.clone(),
535            old_entity_name: None,
536            old_file_path: None,
537            old_parent_id: None,
538            before_content: before_content.map(str::to_string),
539            after_content: after_content.map(str::to_string),
540            commit_sha: commit_sha.map(String::from),
541            author: author.map(String::from),
542            timestamp: None,
543            structural_change: orphan_structural_change(
544                before_content,
545                after_content,
546                plugin,
547                detection_path,
548            ),
549        });
550    }
551
552    changes
553}
554
555fn orphan_structural_change(
556    before_content: Option<&str>,
557    after_content: Option<&str>,
558    plugin: Option<&dyn SemanticParserPlugin>,
559    detection_path: &str,
560) -> Option<bool> {
561    let plugin = plugin?;
562    let before_hash =
563        plugin.structural_hash_content(before_content.unwrap_or_default(), detection_path)?;
564    let after_hash =
565        plugin.structural_hash_content(after_content.unwrap_or_default(), detection_path)?;
566
567    Some(before_hash != after_hash)
568}
569
570#[derive(Debug, Clone, PartialEq, Eq)]
571struct OrphanSegment {
572    start_line: usize,
573    end_line: usize,
574    content: String,
575}
576
577fn orphan_segments(text: &str, covered_lines: &HashSet<usize>) -> Vec<OrphanSegment> {
578    let mut segments = Vec::new();
579    let mut current_start: Option<usize> = None;
580    let mut current_lines: Vec<&str> = Vec::new();
581    let mut last_line_number = 0;
582
583    for (i, line) in text.lines().enumerate() {
584        let line_number = i + 1;
585        last_line_number = line_number;
586        if covered_lines.contains(&line_number) {
587            if let Some(start_line) = current_start.take() {
588                segments.push(OrphanSegment {
589                    start_line,
590                    end_line: line_number - 1,
591                    content: current_lines.join("\n"),
592                });
593                current_lines.clear();
594            }
595            continue;
596        }
597
598        current_start.get_or_insert(line_number);
599        current_lines.push(line);
600    }
601
602    if let Some(start_line) = current_start {
603        segments.push(OrphanSegment {
604            start_line,
605            end_line: last_line_number.max(start_line),
606            content: current_lines.join("\n"),
607        });
608    }
609
610    segments
611}
612
613fn orphan_content(segment: Option<&OrphanSegment>) -> Option<&str> {
614    segment
615        .map(|segment| segment.content.as_str())
616        .filter(|content| !content.trim().is_empty())
617}
618
619fn orphan_segment_change_pairs(
620    before: &[OrphanSegment],
621    after: &[OrphanSegment],
622) -> Vec<(Option<usize>, Option<usize>)> {
623    let anchors = orphan_segment_lcs(before, after);
624    let mut pairs = Vec::new();
625    let mut before_start = 0;
626    let mut after_start = 0;
627
628    for (before_anchor, after_anchor) in anchors {
629        append_orphan_gap_pairs(
630            &mut pairs,
631            before_start,
632            before_anchor,
633            after_start,
634            after_anchor,
635        );
636        before_start = before_anchor + 1;
637        after_start = after_anchor + 1;
638    }
639
640    append_orphan_gap_pairs(
641        &mut pairs,
642        before_start,
643        before.len(),
644        after_start,
645        after.len(),
646    );
647
648    pairs
649}
650
651fn append_orphan_gap_pairs(
652    pairs: &mut Vec<(Option<usize>, Option<usize>)>,
653    before_start: usize,
654    before_end: usize,
655    after_start: usize,
656    after_end: usize,
657) {
658    let before_len = before_end.saturating_sub(before_start);
659    let after_len = after_end.saturating_sub(after_start);
660
661    if before_len == after_len {
662        for i in 0..before_len {
663            pairs.push((Some(before_start + i), Some(after_start + i)));
664        }
665        return;
666    }
667
668    for i in 0..before_len {
669        pairs.push((Some(before_start + i), None));
670    }
671    for i in 0..after_len {
672        pairs.push((None, Some(after_start + i)));
673    }
674}
675
676fn orphan_segment_lcs(before: &[OrphanSegment], after: &[OrphanSegment]) -> Vec<(usize, usize)> {
677    let mut dp = vec![vec![0; after.len() + 1]; before.len() + 1];
678
679    for i in (0..before.len()).rev() {
680        for j in (0..after.len()).rev() {
681            dp[i][j] = if orphan_segments_equal(&before[i], &after[j]) {
682                dp[i + 1][j + 1] + 1
683            } else {
684                dp[i + 1][j].max(dp[i][j + 1])
685            };
686        }
687    }
688
689    let mut anchors = Vec::new();
690    let mut i = 0;
691    let mut j = 0;
692    while i < before.len() && j < after.len() {
693        if orphan_segments_equal(&before[i], &after[j]) {
694            anchors.push((i, j));
695            i += 1;
696            j += 1;
697        } else if dp[i + 1][j] >= dp[i][j + 1] {
698            i += 1;
699        } else {
700            j += 1;
701        }
702    }
703
704    anchors
705}
706
707fn orphan_segments_equal(before: &OrphanSegment, after: &OrphanSegment) -> bool {
708    match (orphan_content(Some(before)), orphan_content(Some(after))) {
709        (Some(before), Some(after)) => before == after,
710        _ => false,
711    }
712}
713
714#[cfg(test)]
715mod tests {
716    use super::*;
717    use crate::git::types::{FileChange, FileStatus};
718    use crate::parser::plugins::create_default_registry;
719
720    fn modified_file(path: &str, before: &str, after: &str) -> FileChange {
721        FileChange {
722            file_path: path.to_string(),
723            status: FileStatus::Modified,
724            old_file_path: None,
725            before_content: Some(before.to_string()),
726            after_content: Some(after.to_string()),
727        }
728    }
729
730    fn renamed_file(old_path: &str, new_path: &str, before: &str, after: &str) -> FileChange {
731        FileChange {
732            file_path: new_path.to_string(),
733            status: FileStatus::Renamed,
734            old_file_path: Some(old_path.to_string()),
735            before_content: Some(before.to_string()),
736            after_content: Some(after.to_string()),
737        }
738    }
739
740    fn entity_span(id: &str, start_line: usize, end_line: usize) -> SemanticEntity {
741        SemanticEntity {
742            id: id.to_string(),
743            file_path: "a.rs".to_string(),
744            entity_type: "function".to_string(),
745            name: id.to_string(),
746            parent_id: None,
747            content: String::new(),
748            content_hash: String::new(),
749            structural_hash: None,
750            start_line,
751            end_line,
752            metadata: None,
753        }
754    }
755
756    #[test]
757    fn orphan_only_change_counts_file_and_orphan() {
758        let before = "# old module comment\n\ndef value():\n    return 1\n";
759        let after = "# new module comment\n\ndef value():\n    return 1\n";
760
761        let registry = create_default_registry();
762        let result = compute_semantic_diff(
763            &[modified_file("app.py", before, after)],
764            &registry,
765            None,
766            None,
767        );
768
769        assert_eq!(result.changes.len(), 1);
770        assert_eq!(result.file_count, 1);
771        assert_eq!(result.orphan_count, 1);
772        assert_eq!(result.modified_count, 1);
773        assert_eq!(result.changes[0].entity_type, "orphan");
774        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
775        assert_eq!(result.changes[0].structural_change, Some(false));
776    }
777
778    #[test]
779    fn orphan_code_change_is_structural() {
780        let before = "import os\n\ndef value():\n    return 1\n";
781        let after = "import sys\n\ndef value():\n    return 1\n";
782
783        let registry = create_default_registry();
784        let result = compute_semantic_diff(
785            &[modified_file("app.py", before, after)],
786            &registry,
787            None,
788            None,
789        );
790
791        assert_eq!(result.changes.len(), 1);
792        assert_eq!(result.changes[0].entity_type, "orphan");
793        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
794        assert_eq!(result.changes[0].structural_change, Some(true));
795    }
796
797    #[test]
798    fn orphan_shebang_change_is_structural() {
799        let before = "#!/usr/bin/env python3\ndef value():\n    return 1\n";
800        let after = "#!/usr/bin/env python\ndef value():\n    return 1\n";
801
802        let registry = create_default_registry();
803        let result = compute_semantic_diff(
804            &[modified_file("script", before, after)],
805            &registry,
806            None,
807            None,
808        );
809
810        assert_eq!(result.changes.len(), 1);
811        assert_eq!(result.changes[0].entity_type, "orphan");
812        assert_eq!(result.changes[0].change_type, ChangeType::Modified);
813        assert_eq!(result.changes[0].structural_change, Some(true));
814    }
815
816    #[test]
817    fn test_parent_suppressed_when_only_child_modified() {
818        let before = "class UserService:\n    def get_user(self, user_id):\n        return db.find(user_id)\n";
819        let after  = "class UserService:\n    def get_user(self, user_id):\n        return db.find(user_id, include_deleted=False)\n";
820
821        let registry = create_default_registry();
822        let result = compute_semantic_diff(
823            &[modified_file("svc.py", before, after)],
824            &registry,
825            None,
826            None,
827        );
828
829        let names: Vec<&str> = result
830            .changes
831            .iter()
832            .map(|c| c.entity_name.as_str())
833            .collect();
834        assert!(
835            result.changes.iter().any(|c| c.entity_name == "get_user"),
836            "expected method get_user in changes, got: {names:?}"
837        );
838        assert!(
839            !result
840                .changes
841                .iter()
842                .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
843            "class should be suppressed when only the method body changed, got: {names:?}"
844        );
845    }
846
847    #[test]
848    fn test_protocol_parent_suppressed_when_only_associatedtype_renamed() {
849        let before = "protocol Repository {\n    associatedtype Item\n}\n";
850        let after = "protocol Repository {\n    associatedtype Canvas\n}\n";
851
852        let registry = create_default_registry();
853        let result = compute_semantic_diff(
854            &[modified_file("Repository.swift", before, after)],
855            &registry,
856            None,
857            None,
858        );
859
860        let names: Vec<&str> = result
861            .changes
862            .iter()
863            .map(|c| c.entity_name.as_str())
864            .collect();
865        assert!(
866            result
867                .changes
868                .iter()
869                .any(|c| c.entity_type == "associatedtype"),
870            "expected associatedtype change, got: {names:?}"
871        );
872        assert!(
873            !result
874                .changes
875                .iter()
876                .any(|c| c.entity_name == "Repository" && c.change_type == ChangeType::Modified),
877            "protocol should be suppressed when only the associatedtype changed, got: {names:?}"
878        );
879    }
880
881    #[test]
882    fn test_protocol_parent_not_suppressed_when_own_declaration_changes() {
883        let before = "protocol Repository {\n    associatedtype Item\n}\n";
884        let after = "protocol Repository: Sendable {\n    associatedtype Canvas\n}\n";
885
886        let registry = create_default_registry();
887        let result = compute_semantic_diff(
888            &[modified_file("Repository.swift", before, after)],
889            &registry,
890            None,
891            None,
892        );
893
894        let names: Vec<&str> = result
895            .changes
896            .iter()
897            .map(|c| c.entity_name.as_str())
898            .collect();
899        assert!(
900            result
901                .changes
902                .iter()
903                .any(|c| c.entity_type == "associatedtype"),
904            "expected associatedtype change, got: {names:?}"
905        );
906        assert!(
907            result
908                .changes
909                .iter()
910                .any(|c| c.entity_name == "Repository" && c.change_type == ChangeType::Modified),
911            "protocol should remain Modified when its own declaration changed, got: {names:?}"
912        );
913    }
914
915    #[test]
916    fn test_parent_not_suppressed_when_own_declaration_changes() {
917        let before = "class UserService:\n    def get_user(self, user_id):\n        return db.find(user_id)\n";
918        let after  = "class UserService(BaseService):\n    def get_user(self, user_id):\n        return db.find(user_id, include_deleted=False)\n";
919
920        let registry = create_default_registry();
921        let result = compute_semantic_diff(
922            &[modified_file("svc.py", before, after)],
923            &registry,
924            None,
925            None,
926        );
927
928        let names: Vec<&str> = result
929            .changes
930            .iter()
931            .map(|c| c.entity_name.as_str())
932            .collect();
933        assert!(
934            result.changes.iter().any(|c| c.entity_name == "get_user"),
935            "expected method get_user in changes, got: {names:?}"
936        );
937        assert!(
938            result
939                .changes
940                .iter()
941                .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
942            "class should remain Modified when its own declaration changed, got: {names:?}"
943        );
944    }
945
946    #[test]
947    fn test_nested_typescript_class_field_diff_reports_leaf_method() {
948        let before = r#"class L1 {
949  L2 = class {
950    L3 = class {
951      L4 = class {
952        method() { return 1; }
953      };
954    };
955  };
956}
957"#;
958        let after = r#"class L1 {
959  L2 = class {
960    L3 = class {
961      L4 = class {
962        method() { return 999; }
963      };
964    };
965  };
966}
967"#;
968
969        let registry = create_default_registry();
970        let result = compute_semantic_diff(
971            &[modified_file("a.ts", before, after)],
972            &registry,
973            None,
974            None,
975        );
976
977        let changes: Vec<_> = result
978            .changes
979            .iter()
980            .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
981            .collect();
982        assert!(
983            result
984                .changes
985                .iter()
986                .any(|c| c.entity_id == "a.ts::class::L1::L2::L3::L4::method"),
987            "expected method leaf change, got: {changes:?}"
988        );
989        assert!(
990            !result.changes.iter().any(|c| c.entity_type == "field"),
991            "field containers should be suppressed when only a nested method changed, got: {changes:?}"
992        );
993    }
994
995    #[test]
996    fn test_nested_typescript_object_literal_diff_reports_leaf_method() {
997        let before = r#"export const svc = {
998  open(): number { return 1; },
999  close(): number { return 0; },
1000};
1001"#;
1002        let after = r#"export const svc = {
1003  open(): number { return 2; },
1004  close(): number { return 0; },
1005};
1006"#;
1007
1008        let registry = create_default_registry();
1009        let result = compute_semantic_diff(
1010            &[modified_file("service.ts", before, after)],
1011            &registry,
1012            None,
1013            None,
1014        );
1015
1016        let changes: Vec<_> = result
1017            .changes
1018            .iter()
1019            .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
1020            .collect();
1021        assert!(
1022            result
1023                .changes
1024                .iter()
1025                .any(|c| c.entity_id == "service.ts::variable::svc::open"),
1026            "expected object-literal method leaf change, got: {changes:?}"
1027        );
1028        assert!(
1029            !result
1030                .changes
1031                .iter()
1032                .any(|c| c.entity_name == "svc" && c.entity_type == "variable"),
1033            "variable container should be suppressed when only a nested method changed, got: {changes:?}"
1034        );
1035    }
1036
1037    #[test]
1038    fn test_nested_typescript_object_literal_pair_diff_reports_leaf_methods() {
1039        let before = r#"export const svc = {
1040  reset: () => 1,
1041  flush: function() { return 0; },
1042};
1043"#;
1044        let after = r#"export const svc = {
1045  reset: () => 2,
1046  flush: function() { return 3; },
1047};
1048"#;
1049
1050        let registry = create_default_registry();
1051        let result = compute_semantic_diff(
1052            &[modified_file("service.ts", before, after)],
1053            &registry,
1054            None,
1055            None,
1056        );
1057
1058        let changes: Vec<_> = result
1059            .changes
1060            .iter()
1061            .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
1062            .collect();
1063        assert!(
1064            result
1065                .changes
1066                .iter()
1067                .any(|c| c.entity_id == "service.ts::variable::svc::reset"),
1068            "expected arrow-valued object method change, got: {changes:?}"
1069        );
1070        assert!(
1071            result
1072                .changes
1073                .iter()
1074                .any(|c| c.entity_id == "service.ts::variable::svc::flush"),
1075            "expected function-valued object method change, got: {changes:?}"
1076        );
1077        assert!(
1078            !result
1079                .changes
1080                .iter()
1081                .any(|c| c.entity_name == "svc" && c.entity_type == "variable"),
1082            "variable container should be suppressed when only nested function-valued properties changed, got: {changes:?}"
1083        );
1084    }
1085
1086    #[test]
1087    fn test_inline_typescript_object_literal_keeps_parent_variable_changes() {
1088        let before = "export const svc = { open() { return 1; }, enabled: true };\n";
1089        let after = "export let svc = { open() { return 2; }, enabled: false };\n";
1090
1091        let registry = create_default_registry();
1092        let result = compute_semantic_diff(
1093            &[modified_file("service.ts", before, after)],
1094            &registry,
1095            None,
1096            None,
1097        );
1098
1099        let changes: Vec<_> = result
1100            .changes
1101            .iter()
1102            .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
1103            .collect();
1104        assert!(
1105            result
1106                .changes
1107                .iter()
1108                .any(|c| c.entity_id == "service.ts::variable::svc::open"),
1109            "expected nested method change, got: {changes:?}"
1110        );
1111        assert!(
1112            result
1113                .changes
1114                .iter()
1115                .any(|c| c.entity_name == "svc" && c.entity_type == "variable"),
1116            "parent variable change should remain visible, got: {changes:?}"
1117        );
1118    }
1119
1120    #[test]
1121    fn renamed_file_with_edited_entity_reports_move_not_add_delete() {
1122        let before = "def foo():\n    return alpha + beta + gamma\n";
1123        let after = "def foo():\n    return one + two + three\n";
1124
1125        let registry = create_default_registry();
1126        let result = compute_semantic_diff(
1127            &[renamed_file("old.py", "new.py", before, after)],
1128            &registry,
1129            None,
1130            None,
1131        );
1132
1133        assert_eq!(result.added_count, 0);
1134        assert_eq!(result.deleted_count, 0);
1135        assert_eq!(result.modified_count, 1);
1136        assert_eq!(result.moved_count, 1);
1137        assert_eq!(result.changes.len(), 1);
1138        assert_eq!(result.changes[0].entity_name, "foo");
1139        assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.py"));
1140        assert_eq!(result.changes[0].structural_change, Some(true));
1141    }
1142
1143    #[test]
1144    fn duplicate_markdown_heading_reports_first_section_modification() {
1145        let before = "# Same Title\n\noriginal content of section A\n\n# Same Title\n\ncontent of section B\n";
1146        let after = "# Same Title\n\nMODIFIED content of section A\n\n# Same Title\n\ncontent of section B\n";
1147
1148        let registry = create_default_registry();
1149        let result = compute_semantic_diff(
1150            &[modified_file("doc.md", before, after)],
1151            &registry,
1152            None,
1153            None,
1154        );
1155
1156        assert_eq!(result.modified_count, 1, "{:?}", result.changes);
1157        assert_eq!(result.changes.len(), 1, "{:?}", result.changes);
1158
1159        let change = &result.changes[0];
1160        assert_eq!(change.change_type, ChangeType::Modified);
1161        assert_eq!(change.entity_name, "Same Title");
1162        assert_eq!(change.entity_line, 1);
1163        assert!(change
1164            .before_content
1165            .as_deref()
1166            .unwrap_or_default()
1167            .contains("original content of section A"));
1168        assert!(change
1169            .after_content
1170            .as_deref()
1171            .unwrap_or_default()
1172            .contains("MODIFIED content of section A"));
1173    }
1174
1175    #[test]
1176    fn orphan_changes_count_toward_change_type_buckets() {
1177        let before = "def foo():\n    return 1\n\ndef bar():\n    return 2\n";
1178        let after = "# just a comment\n";
1179
1180        let registry = create_default_registry();
1181        let result = compute_semantic_diff(
1182            &[modified_file("svc.py", before, after)],
1183            &registry,
1184            None,
1185            None,
1186        );
1187
1188        assert_eq!(result.added_count, 1);
1189        assert_eq!(result.deleted_count, 2);
1190        assert_eq!(result.modified_count, 0);
1191        assert_eq!(result.orphan_count, 1);
1192        assert!(result
1193            .changes
1194            .iter()
1195            .any(|c| c.entity_type == "orphan" && c.change_type == ChangeType::Added));
1196        assert!(result.changes.iter().any(|c| {
1197            c.entity_type == "orphan"
1198                && c.change_type == ChangeType::Added
1199                && c.structural_change == Some(false)
1200        }));
1201
1202        let named_bucket_total = result.added_count
1203            + result.modified_count
1204            + result.deleted_count
1205            + result.moved_count
1206            + result.renamed_count
1207            + result.reordered_count;
1208        assert_eq!(named_bucket_total, result.changes.len());
1209    }
1210
1211    #[test]
1212    fn orphan_changes_use_contiguous_line_spans() {
1213        let file = modified_file(
1214            "a.rs",
1215            "use alpha;\nfn foo() {}\nuse beta;\nfn bar() {}\n",
1216            "use gamma;\nfn foo() {}\nuse delta;\nfn bar() {}\n",
1217        );
1218        let entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
1219
1220        let changes = detect_orphan_changes(&file, &entities, &entities, None, "a.rs", None, None);
1221
1222        assert_eq!(changes.len(), 2);
1223        assert_eq!(changes[0].start_line, 1);
1224        assert_eq!(changes[0].end_line, 1);
1225        assert_eq!(changes[0].old_start_line, Some(1));
1226        assert_eq!(changes[0].old_end_line, Some(1));
1227        assert_eq!(changes[0].before_content.as_deref(), Some("use alpha;"));
1228        assert_eq!(changes[0].after_content.as_deref(), Some("use gamma;"));
1229        assert_eq!(changes[1].start_line, 3);
1230        assert_eq!(changes[1].end_line, 3);
1231        assert_eq!(changes[1].old_start_line, Some(3));
1232        assert_eq!(changes[1].old_end_line, Some(3));
1233        assert_eq!(changes[1].before_content.as_deref(), Some("use beta;"));
1234        assert_eq!(changes[1].after_content.as_deref(), Some("use delta;"));
1235    }
1236
1237    #[test]
1238    fn blank_only_orphan_segments_are_ignored() {
1239        let file = modified_file("a.rs", "fn foo() {}\n", "\nfn foo() {}\n");
1240        let before_entities = vec![entity_span("foo", 1, 1)];
1241        let after_entities = vec![entity_span("foo", 2, 2)];
1242
1243        let changes = detect_orphan_changes(
1244            &file,
1245            &before_entities,
1246            &after_entities,
1247            None,
1248            "a.rs",
1249            None,
1250            None,
1251        );
1252
1253        assert!(changes.is_empty());
1254    }
1255
1256    #[test]
1257    fn inserted_orphan_segment_does_not_modify_unchanged_later_segment() {
1258        let file = modified_file(
1259            "a.rs",
1260            "fn foo() {}\nuse a;\nfn bar() {}\n",
1261            "use x;\nfn foo() {}\nuse a;\nfn bar() {}\n",
1262        );
1263        let before_entities = vec![entity_span("foo", 1, 1), entity_span("bar", 3, 3)];
1264        let after_entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
1265
1266        let changes = detect_orphan_changes(
1267            &file,
1268            &before_entities,
1269            &after_entities,
1270            None,
1271            "a.rs",
1272            None,
1273            None,
1274        );
1275
1276        assert_eq!(changes.len(), 1);
1277        assert_eq!(changes[0].change_type, ChangeType::Added);
1278        assert_eq!(changes[0].start_line, 1);
1279        assert_eq!(changes[0].end_line, 1);
1280        assert!(changes[0].old_start_line.is_none());
1281        assert_eq!(changes[0].before_content, None);
1282        assert_eq!(changes[0].after_content.as_deref(), Some("use x;"));
1283    }
1284
1285    #[test]
1286    fn uneven_orphan_gaps_are_not_forced_into_modifications() {
1287        let file = modified_file(
1288            "a.rs",
1289            "use a;\nfn foo() {}\nuse old;\nfn mid() {}\nuse c;\nfn bar() {}\n",
1290            "use a;\nfn foo() {}\nuse new1;\nfn mid() {}\nuse new2;\nfn baz() {}\nuse c;\nfn bar() {}\n",
1291        );
1292        let before_entities = vec![
1293            entity_span("foo", 2, 2),
1294            entity_span("mid", 4, 4),
1295            entity_span("bar", 6, 6),
1296        ];
1297        let after_entities = vec![
1298            entity_span("foo", 2, 2),
1299            entity_span("mid", 4, 4),
1300            entity_span("baz", 6, 6),
1301            entity_span("bar", 8, 8),
1302        ];
1303
1304        let changes = detect_orphan_changes(
1305            &file,
1306            &before_entities,
1307            &after_entities,
1308            None,
1309            "a.rs",
1310            None,
1311            None,
1312        );
1313
1314        assert_eq!(changes.len(), 3);
1315        assert_eq!(changes[0].change_type, ChangeType::Deleted);
1316        assert!(changes[0].entity_id.contains("::deleted@oldL3-3"));
1317        assert_eq!(changes[0].before_content.as_deref(), Some("use old;"));
1318        assert_eq!(changes[1].change_type, ChangeType::Added);
1319        assert_eq!(changes[1].after_content.as_deref(), Some("use new1;"));
1320        assert_eq!(changes[2].change_type, ChangeType::Added);
1321        assert_eq!(changes[2].after_content.as_deref(), Some("use new2;"));
1322    }
1323}