1#[cfg(feature = "parallel")]
2use rayon::prelude::*;
3use serde::Serialize;
4
5use crate::git::types::FileChange;
6
7macro_rules! maybe_par_iter {
8 ($slice:expr) => {{
9 #[cfg(feature = "parallel")]
10 {
11 $slice.par_iter()
12 }
13 #[cfg(not(feature = "parallel"))]
14 {
15 $slice.iter()
16 }
17 }};
18}
19use crate::model::change::{ChangeType, SemanticChange};
20use crate::model::entity::SemanticEntity;
21use crate::model::identity::match_entities;
22use crate::parser::registry::ParserRegistry;
23use std::collections::{HashMap, HashSet};
24
25#[derive(Debug, Clone, Serialize)]
26#[serde(rename_all = "camelCase")]
27pub struct DiffResult {
28 pub changes: Vec<SemanticChange>,
29 pub file_count: usize,
30 pub added_count: usize,
31 pub modified_count: usize,
32 pub deleted_count: usize,
33 pub moved_count: usize,
34 pub renamed_count: usize,
35 pub reordered_count: usize,
36 pub orphan_count: usize,
37 pub total_entities_before: usize,
38 pub total_entities_after: usize,
39}
40
41pub fn compute_semantic_diff(
42 file_changes: &[FileChange],
43 registry: &ParserRegistry,
44 commit_sha: Option<&str>,
45 author: Option<&str>,
46) -> DiffResult {
47 let per_file_changes: Vec<(String, Vec<SemanticChange>, usize, usize)> =
49 maybe_par_iter!(file_changes)
50 .filter_map(|file| {
51 let content_hint = file
52 .after_content
53 .as_deref()
54 .or(file.before_content.as_deref())
55 .unwrap_or("");
56 let resolved = registry.resolve_file_path(&file.file_path);
57 let detection_path = resolved.as_deref().unwrap_or(&file.file_path);
58 let plugin = registry.get_plugin_with_content(detection_path, content_hint)?;
59
60 let before_entities = if let Some(ref content) = file.before_content {
61 let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
62 let before_resolved = registry.resolve_file_path(before_path);
63 let before_detection = before_resolved.as_deref().unwrap_or(before_path);
64 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
65 plugin.extract_entities(content, before_detection)
66 })) {
67 Ok(entities) => entities,
68 Err(_) => Vec::new(),
69 }
70 } else {
71 Vec::new()
72 };
73
74 let after_entities = if let Some(ref content) = file.after_content {
75 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
76 plugin.extract_entities(content, detection_path)
77 })) {
78 Ok(entities) => entities,
79 Err(_) => Vec::new(),
80 }
81 } else {
82 Vec::new()
83 };
84
85 let before_count = before_entities.len();
86 let after_count = after_entities.len();
87
88 let sim_fn = |a: &crate::model::entity::SemanticEntity,
89 b: &crate::model::entity::SemanticEntity|
90 -> f64 { plugin.compute_similarity(a, b) };
91
92 let mut result = match_entities(
93 &before_entities,
94 &after_entities,
95 &file.file_path,
96 Some(&sim_fn),
97 commit_sha,
98 author,
99 );
100
101 suppress_redundant_parents(&mut result.changes, &before_entities, &after_entities);
104
105 let orphans = detect_orphan_changes(
107 file,
108 &before_entities,
109 &after_entities,
110 commit_sha,
111 author,
112 );
113 result.changes.extend(orphans);
114
115 result.changes.sort_by_key(|change| change.entity_line);
116
117 if result.changes.is_empty() {
118 None
119 } else {
120 Some((
121 file.file_path.clone(),
122 result.changes,
123 before_count,
124 after_count,
125 ))
126 }
127 })
128 .collect();
129
130 let mut all_changes: Vec<SemanticChange> = Vec::new();
131 let mut files_with_changes: HashSet<String> = HashSet::new();
132 let mut total_entities_before: usize = 0;
133 let mut total_entities_after: usize = 0;
134 for (file_path, changes, before_count, after_count) in per_file_changes {
135 files_with_changes.insert(file_path);
136 all_changes.extend(changes);
137 total_entities_before += before_count;
138 total_entities_after += after_count;
139 }
140
141 let mut added_count = 0;
144 let mut modified_count = 0;
145 let mut deleted_count = 0;
146 let mut moved_count = 0;
147 let mut renamed_count = 0;
148 let mut reordered_count = 0;
149 let mut orphan_count = 0;
150
151 for c in &all_changes {
152 if c.entity_type == "orphan" {
153 orphan_count += 1;
154 }
155 match c.change_type {
156 ChangeType::Added => added_count += 1,
157 ChangeType::Modified => modified_count += 1,
158 ChangeType::Deleted => deleted_count += 1,
159 ChangeType::Moved => {
160 moved_count += 1;
161 if c.has_content_change() {
162 modified_count += 1;
163 }
164 }
165 ChangeType::Renamed => {
166 renamed_count += 1;
167 if c.has_content_change() {
168 modified_count += 1;
169 }
170 }
171 ChangeType::Reordered => {
172 reordered_count += 1;
173 if c.has_content_change() {
174 modified_count += 1;
175 }
176 }
177 }
178 }
179
180 DiffResult {
181 changes: all_changes,
182 file_count: files_with_changes.len(),
183 added_count,
184 modified_count,
185 deleted_count,
186 moved_count,
187 renamed_count,
188 reordered_count,
189 orphan_count,
190 total_entities_before,
191 total_entities_after,
192 }
193}
194
195fn suppress_redundant_parents(
196 changes: &mut Vec<SemanticChange>,
197 before: &[SemanticEntity],
198 after: &[SemanticEntity],
199) {
200 if changes.len() < 2 {
201 return;
202 }
203
204 const CONTAINER_TYPES: &[&str] = &[
205 "impl",
206 "trait",
207 "module",
208 "class",
209 "interface",
210 "mixin",
211 "extension",
212 "namespace",
213 "export",
214 "package",
215 "field",
216 "svelte_instance_script",
217 "svelte_module_script",
218 "object",
219 ];
220
221 let before_by_id: HashMap<&str, &SemanticEntity> =
222 before.iter().map(|e| (e.id.as_str(), e)).collect();
223 let after_by_id: HashMap<&str, &SemanticEntity> =
224 after.iter().map(|e| (e.id.as_str(), e)).collect();
225
226 let mut before_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
227 for e in before {
228 if let Some(ref pid) = e.parent_id {
229 before_children.entry(pid.as_str()).or_default().push(e);
230 }
231 }
232 let mut after_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
233 for e in after {
234 if let Some(ref pid) = e.parent_id {
235 after_children.entry(pid.as_str()).or_default().push(e);
236 }
237 }
238
239 let changed_ids: HashSet<&str> = changes.iter().map(|c| c.entity_id.as_str()).collect();
240
241 let mut suppress: HashSet<String> = HashSet::new();
242 for change in changes.iter() {
243 if !matches!(
244 change.change_type,
245 ChangeType::Modified | ChangeType::Added | ChangeType::Deleted
246 ) {
247 continue;
248 }
249 if !CONTAINER_TYPES.contains(&change.entity_type.as_str()) {
250 continue;
251 }
252 let eid = change.entity_id.as_str();
253 let b_children = before_children
254 .get(eid)
255 .map(|v| v.as_slice())
256 .unwrap_or(&[]);
257 let a_children = after_children.get(eid).map(|v| v.as_slice()).unwrap_or(&[]);
258
259 let has_changed_child = b_children
260 .iter()
261 .any(|c| changed_ids.contains(c.id.as_str()))
262 || a_children
263 .iter()
264 .any(|c| changed_ids.contains(c.id.as_str()));
265 if !has_changed_child {
266 continue;
267 }
268
269 let should_suppress = if change.change_type == ChangeType::Modified {
273 match (before_by_id.get(eid), after_by_id.get(eid)) {
274 (Some(bp), Some(ap)) if bp.entity_type == ap.entity_type => {
275 let before_own = strip_children_content(&bp.content, bp.start_line, b_children);
276 let after_own = strip_children_content(&ap.content, ap.start_line, a_children);
277 before_own == after_own
278 }
279 _ => false,
280 }
281 } else {
282 true
283 };
284
285 if should_suppress {
286 suppress.insert(change.entity_id.clone());
287 }
288 }
289
290 for change in changes.iter() {
294 if change.change_type == ChangeType::Moved {
295 if let Some(ref old_pid) = change.old_parent_id {
296 if changed_ids.contains(old_pid.as_str()) {
297 suppress.insert(old_pid.clone());
298 }
299 }
300 }
301 }
302
303 if !suppress.is_empty() {
304 changes.retain(|c| !suppress.contains(&c.entity_id));
305 }
306
307 let renamed_before_ids: HashSet<&str> = changes
310 .iter()
311 .filter(|c| c.change_type == ChangeType::Renamed)
312 .filter_map(|c| {
313 let old_name = c.old_entity_name.as_deref()?;
314 let after_entity = after_by_id.get(c.entity_id.as_str())?;
315 before
316 .iter()
317 .find(|e| {
318 e.name == old_name
319 && e.entity_type == after_entity.entity_type
320 && e.parent_id == after_entity.parent_id
321 })
322 .map(|e| e.id.as_str())
323 })
324 .collect();
325
326 if !renamed_before_ids.is_empty() {
327 changes.retain(|c| {
328 !(c.change_type == ChangeType::Moved
329 && c.old_entity_name.is_none()
330 && c.old_parent_id
331 .as_deref()
332 .map_or(false, |pid| renamed_before_ids.contains(pid)))
333 });
334 }
335}
336
337fn strip_children_content(
338 content: &str,
339 parent_start_line: usize,
340 children: &[&SemanticEntity],
341) -> String {
342 let lines: Vec<&str> = content.lines().collect();
343 let mut excluded: HashSet<usize> = HashSet::new();
344 for child in children {
345 let start_idx = child.start_line.saturating_sub(parent_start_line);
346 let end_idx = child.end_line.saturating_sub(parent_start_line);
347 for i in start_idx..=end_idx.max(start_idx) {
348 if i < lines.len() {
349 excluded.insert(i);
350 }
351 }
352 }
353 lines
354 .iter()
355 .enumerate()
356 .filter(|(i, _)| !excluded.contains(i))
357 .map(|(_, l)| l.trim())
358 .filter(|l| !l.is_empty())
359 .collect::<Vec<_>>()
360 .join(" ")
361}
362
363fn detect_orphan_changes(
367 file: &FileChange,
368 before_entities: &[SemanticEntity],
369 after_entities: &[SemanticEntity],
370 commit_sha: Option<&str>,
371 author: Option<&str>,
372) -> Vec<SemanticChange> {
373 let before_text = file.before_content.as_deref().unwrap_or("");
374 let after_text = file.after_content.as_deref().unwrap_or("");
375
376 let before_covered: HashSet<usize> = before_entities
378 .iter()
379 .flat_map(|e| e.start_line..=e.end_line)
380 .collect();
381 let after_covered: HashSet<usize> = after_entities
382 .iter()
383 .flat_map(|e| e.start_line..=e.end_line)
384 .collect();
385
386 let before_orphans = orphan_segments(before_text, &before_covered);
387 let after_orphans = orphan_segments(after_text, &after_covered);
388 let mut changes = Vec::new();
389
390 for (before_idx, after_idx) in orphan_segment_change_pairs(&before_orphans, &after_orphans) {
391 let before_orphan = before_idx.and_then(|idx| before_orphans.get(idx));
392 let after_orphan = after_idx.and_then(|idx| after_orphans.get(idx));
393 let before_content = orphan_content(before_orphan);
394 let after_content = orphan_content(after_orphan);
395
396 if before_content == after_content {
398 continue;
399 }
400
401 let change_type = if before_content.is_none() {
402 ChangeType::Added
403 } else if after_content.is_none() {
404 ChangeType::Deleted
405 } else {
406 ChangeType::Modified
407 };
408
409 let current_orphan = match change_type {
410 ChangeType::Deleted => before_orphan,
411 _ => after_orphan.or(before_orphan),
412 };
413 let Some(current_orphan) = current_orphan else {
414 continue;
415 };
416 let span_label = if change_type == ChangeType::Deleted {
417 "oldL"
418 } else {
419 "L"
420 };
421 let orphan_id = format!(
422 "{}::orphan::{}@{}{}-{}",
423 file.file_path,
424 change_type,
425 span_label,
426 current_orphan.start_line,
427 current_orphan.end_line
428 );
429
430 changes.push(SemanticChange {
431 id: format!("change::{orphan_id}"),
432 entity_id: orphan_id,
433 change_type,
434 entity_type: "orphan".to_string(),
435 entity_name: "module-level".to_string(),
436 entity_line: current_orphan.start_line,
437 start_line: current_orphan.start_line,
438 end_line: current_orphan.end_line,
439 old_start_line: before_orphan.map(|orphan| orphan.start_line),
440 old_end_line: before_orphan.map(|orphan| orphan.end_line),
441 parent_name: None,
442 file_path: file.file_path.clone(),
443 old_entity_name: None,
444 old_file_path: None,
445 old_parent_id: None,
446 before_content: before_content.map(str::to_string),
447 after_content: after_content.map(str::to_string),
448 commit_sha: commit_sha.map(String::from),
449 author: author.map(String::from),
450 timestamp: None,
451 structural_change: Some(true),
452 });
453 }
454
455 changes
456}
457
458#[derive(Debug, Clone, PartialEq, Eq)]
459struct OrphanSegment {
460 start_line: usize,
461 end_line: usize,
462 content: String,
463}
464
465fn orphan_segments(text: &str, covered_lines: &HashSet<usize>) -> Vec<OrphanSegment> {
466 let mut segments = Vec::new();
467 let mut current_start: Option<usize> = None;
468 let mut current_lines: Vec<&str> = Vec::new();
469 let mut last_line_number = 0;
470
471 for (i, line) in text.lines().enumerate() {
472 let line_number = i + 1;
473 last_line_number = line_number;
474 if covered_lines.contains(&line_number) {
475 if let Some(start_line) = current_start.take() {
476 segments.push(OrphanSegment {
477 start_line,
478 end_line: line_number - 1,
479 content: current_lines.join("\n"),
480 });
481 current_lines.clear();
482 }
483 continue;
484 }
485
486 current_start.get_or_insert(line_number);
487 current_lines.push(line);
488 }
489
490 if let Some(start_line) = current_start {
491 segments.push(OrphanSegment {
492 start_line,
493 end_line: last_line_number.max(start_line),
494 content: current_lines.join("\n"),
495 });
496 }
497
498 segments
499}
500
501fn orphan_content(segment: Option<&OrphanSegment>) -> Option<&str> {
502 segment
503 .map(|segment| segment.content.as_str())
504 .filter(|content| !content.trim().is_empty())
505}
506
507fn orphan_segment_change_pairs(
508 before: &[OrphanSegment],
509 after: &[OrphanSegment],
510) -> Vec<(Option<usize>, Option<usize>)> {
511 let anchors = orphan_segment_lcs(before, after);
512 let mut pairs = Vec::new();
513 let mut before_start = 0;
514 let mut after_start = 0;
515
516 for (before_anchor, after_anchor) in anchors {
517 append_orphan_gap_pairs(
518 &mut pairs,
519 before_start,
520 before_anchor,
521 after_start,
522 after_anchor,
523 );
524 before_start = before_anchor + 1;
525 after_start = after_anchor + 1;
526 }
527
528 append_orphan_gap_pairs(
529 &mut pairs,
530 before_start,
531 before.len(),
532 after_start,
533 after.len(),
534 );
535
536 pairs
537}
538
539fn append_orphan_gap_pairs(
540 pairs: &mut Vec<(Option<usize>, Option<usize>)>,
541 before_start: usize,
542 before_end: usize,
543 after_start: usize,
544 after_end: usize,
545) {
546 let before_len = before_end.saturating_sub(before_start);
547 let after_len = after_end.saturating_sub(after_start);
548
549 if before_len == after_len {
550 for i in 0..before_len {
551 pairs.push((Some(before_start + i), Some(after_start + i)));
552 }
553 return;
554 }
555
556 for i in 0..before_len {
557 pairs.push((Some(before_start + i), None));
558 }
559 for i in 0..after_len {
560 pairs.push((None, Some(after_start + i)));
561 }
562}
563
564fn orphan_segment_lcs(before: &[OrphanSegment], after: &[OrphanSegment]) -> Vec<(usize, usize)> {
565 let mut dp = vec![vec![0; after.len() + 1]; before.len() + 1];
566
567 for i in (0..before.len()).rev() {
568 for j in (0..after.len()).rev() {
569 dp[i][j] = if orphan_segments_equal(&before[i], &after[j]) {
570 dp[i + 1][j + 1] + 1
571 } else {
572 dp[i + 1][j].max(dp[i][j + 1])
573 };
574 }
575 }
576
577 let mut anchors = Vec::new();
578 let mut i = 0;
579 let mut j = 0;
580 while i < before.len() && j < after.len() {
581 if orphan_segments_equal(&before[i], &after[j]) {
582 anchors.push((i, j));
583 i += 1;
584 j += 1;
585 } else if dp[i + 1][j] >= dp[i][j + 1] {
586 i += 1;
587 } else {
588 j += 1;
589 }
590 }
591
592 anchors
593}
594
595fn orphan_segments_equal(before: &OrphanSegment, after: &OrphanSegment) -> bool {
596 match (orphan_content(Some(before)), orphan_content(Some(after))) {
597 (Some(before), Some(after)) => before == after,
598 _ => false,
599 }
600}
601
602#[cfg(test)]
603mod tests {
604 use super::*;
605 use crate::git::types::{FileChange, FileStatus};
606 use crate::parser::plugins::create_default_registry;
607
608 fn modified_file(path: &str, before: &str, after: &str) -> FileChange {
609 FileChange {
610 file_path: path.to_string(),
611 status: FileStatus::Modified,
612 old_file_path: None,
613 before_content: Some(before.to_string()),
614 after_content: Some(after.to_string()),
615 }
616 }
617
618 fn renamed_file(old_path: &str, new_path: &str, before: &str, after: &str) -> FileChange {
619 FileChange {
620 file_path: new_path.to_string(),
621 status: FileStatus::Renamed,
622 old_file_path: Some(old_path.to_string()),
623 before_content: Some(before.to_string()),
624 after_content: Some(after.to_string()),
625 }
626 }
627
628 fn entity_span(id: &str, start_line: usize, end_line: usize) -> SemanticEntity {
629 SemanticEntity {
630 id: id.to_string(),
631 file_path: "a.rs".to_string(),
632 entity_type: "function".to_string(),
633 name: id.to_string(),
634 parent_id: None,
635 content: String::new(),
636 content_hash: String::new(),
637 structural_hash: None,
638 start_line,
639 end_line,
640 metadata: None,
641 }
642 }
643
644 #[test]
645 fn orphan_only_change_counts_file_and_orphan() {
646 let before = "# old module comment\n\ndef value():\n return 1\n";
647 let after = "# new module comment\n\ndef value():\n return 1\n";
648
649 let registry = create_default_registry();
650 let result = compute_semantic_diff(
651 &[modified_file("app.py", before, after)],
652 ®istry,
653 None,
654 None,
655 );
656
657 assert_eq!(result.changes.len(), 1);
658 assert_eq!(result.file_count, 1);
659 assert_eq!(result.orphan_count, 1);
660 assert_eq!(result.modified_count, 1);
661 assert_eq!(result.changes[0].entity_type, "orphan");
662 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
663 }
664
665 #[test]
666 fn test_parent_suppressed_when_only_child_modified() {
667 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
668 let after = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
669
670 let registry = create_default_registry();
671 let result = compute_semantic_diff(
672 &[modified_file("svc.py", before, after)],
673 ®istry,
674 None,
675 None,
676 );
677
678 let names: Vec<&str> = result
679 .changes
680 .iter()
681 .map(|c| c.entity_name.as_str())
682 .collect();
683 assert!(
684 result.changes.iter().any(|c| c.entity_name == "get_user"),
685 "expected method get_user in changes, got: {names:?}"
686 );
687 assert!(
688 !result
689 .changes
690 .iter()
691 .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
692 "class should be suppressed when only the method body changed, got: {names:?}"
693 );
694 }
695
696 #[test]
697 fn test_parent_not_suppressed_when_own_declaration_changes() {
698 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
699 let after = "class UserService(BaseService):\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
700
701 let registry = create_default_registry();
702 let result = compute_semantic_diff(
703 &[modified_file("svc.py", before, after)],
704 ®istry,
705 None,
706 None,
707 );
708
709 let names: Vec<&str> = result
710 .changes
711 .iter()
712 .map(|c| c.entity_name.as_str())
713 .collect();
714 assert!(
715 result.changes.iter().any(|c| c.entity_name == "get_user"),
716 "expected method get_user in changes, got: {names:?}"
717 );
718 assert!(
719 result
720 .changes
721 .iter()
722 .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
723 "class should remain Modified when its own declaration changed, got: {names:?}"
724 );
725 }
726
727 #[test]
728 fn test_nested_typescript_class_field_diff_reports_leaf_method() {
729 let before = r#"class L1 {
730 L2 = class {
731 L3 = class {
732 L4 = class {
733 method() { return 1; }
734 };
735 };
736 };
737}
738"#;
739 let after = r#"class L1 {
740 L2 = class {
741 L3 = class {
742 L4 = class {
743 method() { return 999; }
744 };
745 };
746 };
747}
748"#;
749
750 let registry = create_default_registry();
751 let result = compute_semantic_diff(
752 &[modified_file("a.ts", before, after)],
753 ®istry,
754 None,
755 None,
756 );
757
758 let changes: Vec<_> = result
759 .changes
760 .iter()
761 .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
762 .collect();
763 assert!(
764 result
765 .changes
766 .iter()
767 .any(|c| c.entity_id == "a.ts::class::L1::L2::L3::L4::method"),
768 "expected method leaf change, got: {changes:?}"
769 );
770 assert!(
771 !result.changes.iter().any(|c| c.entity_type == "field"),
772 "field containers should be suppressed when only a nested method changed, got: {changes:?}"
773 );
774 }
775
776 #[test]
777 fn renamed_file_with_edited_entity_reports_move_not_add_delete() {
778 let before = "def foo():\n return alpha + beta + gamma\n";
779 let after = "def foo():\n return one + two + three\n";
780
781 let registry = create_default_registry();
782 let result = compute_semantic_diff(
783 &[renamed_file("old.py", "new.py", before, after)],
784 ®istry,
785 None,
786 None,
787 );
788
789 assert_eq!(result.added_count, 0);
790 assert_eq!(result.deleted_count, 0);
791 assert_eq!(result.modified_count, 1);
792 assert_eq!(result.moved_count, 1);
793 assert_eq!(result.changes.len(), 1);
794 assert_eq!(result.changes[0].entity_name, "foo");
795 assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.py"));
796 assert_eq!(result.changes[0].structural_change, Some(true));
797 }
798
799 #[test]
800 fn duplicate_markdown_heading_reports_first_section_modification() {
801 let before = "# Same Title\n\noriginal content of section A\n\n# Same Title\n\ncontent of section B\n";
802 let after = "# Same Title\n\nMODIFIED content of section A\n\n# Same Title\n\ncontent of section B\n";
803
804 let registry = create_default_registry();
805 let result = compute_semantic_diff(
806 &[modified_file("doc.md", before, after)],
807 ®istry,
808 None,
809 None,
810 );
811
812 assert_eq!(result.modified_count, 1, "{:?}", result.changes);
813 assert_eq!(result.changes.len(), 1, "{:?}", result.changes);
814
815 let change = &result.changes[0];
816 assert_eq!(change.change_type, ChangeType::Modified);
817 assert_eq!(change.entity_name, "Same Title");
818 assert_eq!(change.entity_line, 1);
819 assert!(change
820 .before_content
821 .as_deref()
822 .unwrap_or_default()
823 .contains("original content of section A"));
824 assert!(change
825 .after_content
826 .as_deref()
827 .unwrap_or_default()
828 .contains("MODIFIED content of section A"));
829 }
830
831 #[test]
832 fn orphan_changes_count_toward_change_type_buckets() {
833 let before = "def foo():\n return 1\n\ndef bar():\n return 2\n";
834 let after = "# just a comment\n";
835
836 let registry = create_default_registry();
837 let result = compute_semantic_diff(
838 &[modified_file("svc.py", before, after)],
839 ®istry,
840 None,
841 None,
842 );
843
844 assert_eq!(result.added_count, 1);
845 assert_eq!(result.deleted_count, 2);
846 assert_eq!(result.modified_count, 0);
847 assert_eq!(result.orphan_count, 1);
848 assert!(result
849 .changes
850 .iter()
851 .any(|c| c.entity_type == "orphan" && c.change_type == ChangeType::Added));
852
853 let named_bucket_total = result.added_count
854 + result.modified_count
855 + result.deleted_count
856 + result.moved_count
857 + result.renamed_count
858 + result.reordered_count;
859 assert_eq!(named_bucket_total, result.changes.len());
860 }
861
862 #[test]
863 fn orphan_changes_use_contiguous_line_spans() {
864 let file = modified_file(
865 "a.rs",
866 "use alpha;\nfn foo() {}\nuse beta;\nfn bar() {}\n",
867 "use gamma;\nfn foo() {}\nuse delta;\nfn bar() {}\n",
868 );
869 let entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
870
871 let changes = detect_orphan_changes(&file, &entities, &entities, None, None);
872
873 assert_eq!(changes.len(), 2);
874 assert_eq!(changes[0].start_line, 1);
875 assert_eq!(changes[0].end_line, 1);
876 assert_eq!(changes[0].old_start_line, Some(1));
877 assert_eq!(changes[0].old_end_line, Some(1));
878 assert_eq!(changes[0].before_content.as_deref(), Some("use alpha;"));
879 assert_eq!(changes[0].after_content.as_deref(), Some("use gamma;"));
880 assert_eq!(changes[1].start_line, 3);
881 assert_eq!(changes[1].end_line, 3);
882 assert_eq!(changes[1].old_start_line, Some(3));
883 assert_eq!(changes[1].old_end_line, Some(3));
884 assert_eq!(changes[1].before_content.as_deref(), Some("use beta;"));
885 assert_eq!(changes[1].after_content.as_deref(), Some("use delta;"));
886 }
887
888 #[test]
889 fn blank_only_orphan_segments_are_ignored() {
890 let file = modified_file("a.rs", "fn foo() {}\n", "\nfn foo() {}\n");
891 let before_entities = vec![entity_span("foo", 1, 1)];
892 let after_entities = vec![entity_span("foo", 2, 2)];
893
894 let changes =
895 detect_orphan_changes(&file, &before_entities, &after_entities, None, None);
896
897 assert!(changes.is_empty());
898 }
899
900 #[test]
901 fn inserted_orphan_segment_does_not_modify_unchanged_later_segment() {
902 let file = modified_file(
903 "a.rs",
904 "fn foo() {}\nuse a;\nfn bar() {}\n",
905 "use x;\nfn foo() {}\nuse a;\nfn bar() {}\n",
906 );
907 let before_entities = vec![entity_span("foo", 1, 1), entity_span("bar", 3, 3)];
908 let after_entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
909
910 let changes =
911 detect_orphan_changes(&file, &before_entities, &after_entities, None, None);
912
913 assert_eq!(changes.len(), 1);
914 assert_eq!(changes[0].change_type, ChangeType::Added);
915 assert_eq!(changes[0].start_line, 1);
916 assert_eq!(changes[0].end_line, 1);
917 assert!(changes[0].old_start_line.is_none());
918 assert_eq!(changes[0].before_content, None);
919 assert_eq!(changes[0].after_content.as_deref(), Some("use x;"));
920 }
921
922 #[test]
923 fn uneven_orphan_gaps_are_not_forced_into_modifications() {
924 let file = modified_file(
925 "a.rs",
926 "use a;\nfn foo() {}\nuse old;\nfn mid() {}\nuse c;\nfn bar() {}\n",
927 "use a;\nfn foo() {}\nuse new1;\nfn mid() {}\nuse new2;\nfn baz() {}\nuse c;\nfn bar() {}\n",
928 );
929 let before_entities = vec![
930 entity_span("foo", 2, 2),
931 entity_span("mid", 4, 4),
932 entity_span("bar", 6, 6),
933 ];
934 let after_entities = vec![
935 entity_span("foo", 2, 2),
936 entity_span("mid", 4, 4),
937 entity_span("baz", 6, 6),
938 entity_span("bar", 8, 8),
939 ];
940
941 let changes =
942 detect_orphan_changes(&file, &before_entities, &after_entities, None, None);
943
944 assert_eq!(changes.len(), 3);
945 assert_eq!(changes[0].change_type, ChangeType::Deleted);
946 assert!(changes[0].entity_id.contains("::deleted@oldL3-3"));
947 assert_eq!(changes[0].before_content.as_deref(), Some("use old;"));
948 assert_eq!(changes[1].change_type, ChangeType::Added);
949 assert_eq!(changes[1].after_content.as_deref(), Some("use new1;"));
950 assert_eq!(changes[2].change_type, ChangeType::Added);
951 assert_eq!(changes[2].after_content.as_deref(), Some("use new2;"));
952 }
953}