1#[cfg(feature = "parallel")]
2use rayon::prelude::*;
3use serde::Serialize;
4
5use crate::git::types::{FileChange, FileStatus};
6
7macro_rules! maybe_par_iter {
8 ($slice:expr) => {{
9 #[cfg(feature = "parallel")]
10 {
11 $slice.par_iter()
12 }
13 #[cfg(not(feature = "parallel"))]
14 {
15 $slice.iter()
16 }
17 }};
18}
19use crate::model::change::{ChangeType, SemanticChange};
20use crate::model::entity::SemanticEntity;
21use crate::model::identity::match_entities;
22use crate::parser::plugin::SemanticParserPlugin;
23use crate::parser::registry::ParserRegistry;
24use std::collections::{HashMap, HashSet};
25
26#[derive(Debug, Clone, Default, Serialize)]
27#[serde(rename_all = "camelCase")]
28pub struct DiffResult {
29 pub changes: Vec<SemanticChange>,
30 pub file_count: usize,
31 pub added_count: usize,
32 pub modified_count: usize,
33 pub deleted_count: usize,
34 pub moved_count: usize,
35 pub renamed_count: usize,
36 pub reordered_count: usize,
37 pub orphan_count: usize,
38 pub total_entities_before: usize,
39 pub total_entities_after: usize,
40}
41
42#[derive(Debug, Clone, Serialize)]
43#[serde(rename_all = "camelCase")]
44pub struct BinaryFileChange {
45 pub file_path: String,
46 pub status: FileStatus,
47 pub old_file_path: Option<String>,
48}
49
50impl From<&FileChange> for BinaryFileChange {
51 fn from(file: &FileChange) -> Self {
52 Self {
53 file_path: file.file_path.clone(),
54 status: file.status.clone(),
55 old_file_path: file.old_file_path.clone(),
56 }
57 }
58}
59
60pub fn collect_binary_file_changes(file_changes: &[FileChange]) -> Vec<BinaryFileChange> {
61 file_changes
62 .iter()
63 .filter(|file| lacks_diffable_content(file))
64 .map(BinaryFileChange::from)
65 .collect()
66}
67
68fn lacks_diffable_content(file: &FileChange) -> bool {
69 match &file.status {
70 FileStatus::Added => file.after_content.is_none(),
71 FileStatus::Deleted => file.before_content.is_none(),
72 FileStatus::Modified | FileStatus::Renamed => {
73 file.before_content.is_none() || file.after_content.is_none()
74 }
75 }
76}
77
78pub fn compute_semantic_diff(
79 file_changes: &[FileChange],
80 registry: &ParserRegistry,
81 commit_sha: Option<&str>,
82 author: Option<&str>,
83) -> DiffResult {
84 let per_file_changes: Vec<(String, Vec<SemanticChange>, usize, usize)> =
86 maybe_par_iter!(file_changes)
87 .filter(|file| !lacks_diffable_content(file))
88 .filter_map(|file| {
89 let content_hint = file
90 .after_content
91 .as_deref()
92 .or(file.before_content.as_deref())
93 .unwrap_or("");
94 let resolved = registry.resolve_file_path(&file.file_path);
95 let detection_path = resolved.as_deref().unwrap_or(&file.file_path);
96 let plugin = registry.get_plugin_with_content(detection_path, content_hint)?;
97
98 let before_entities = if let Some(ref content) = file.before_content {
99 let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
100 let before_resolved = registry.resolve_file_path(before_path);
101 let before_detection = before_resolved.as_deref().unwrap_or(before_path);
102 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
103 plugin.extract_entities(content, before_detection)
104 })) {
105 Ok(entities) => entities,
106 Err(_) => Vec::new(),
107 }
108 } else {
109 Vec::new()
110 };
111
112 let after_entities = if let Some(ref content) = file.after_content {
113 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
114 plugin.extract_entities(content, detection_path)
115 })) {
116 Ok(entities) => entities,
117 Err(_) => Vec::new(),
118 }
119 } else {
120 Vec::new()
121 };
122
123 let before_count = before_entities.len();
124 let after_count = after_entities.len();
125
126 let mut result = match_entities(
127 &before_entities,
128 &after_entities,
129 &file.file_path,
130 None,
131 commit_sha,
132 author,
133 );
134
135 suppress_redundant_parents(&mut result.changes, &before_entities, &after_entities);
138
139 let orphans = detect_orphan_changes(
141 file,
142 &before_entities,
143 &after_entities,
144 Some(plugin),
145 detection_path,
146 commit_sha,
147 author,
148 );
149 result.changes.extend(orphans);
150
151 result.changes.sort_by_key(|change| change.entity_line);
152
153 if result.changes.is_empty() {
154 None
155 } else {
156 Some((
157 file.file_path.clone(),
158 result.changes,
159 before_count,
160 after_count,
161 ))
162 }
163 })
164 .collect();
165
166 let mut all_changes: Vec<SemanticChange> = Vec::new();
167 let mut files_with_changes: HashSet<String> = HashSet::new();
168 let mut total_entities_before: usize = 0;
169 let mut total_entities_after: usize = 0;
170 for (file_path, changes, before_count, after_count) in per_file_changes {
171 files_with_changes.insert(file_path);
172 all_changes.extend(changes);
173 total_entities_before += before_count;
174 total_entities_after += after_count;
175 }
176
177 let mut added_count = 0;
180 let mut modified_count = 0;
181 let mut deleted_count = 0;
182 let mut moved_count = 0;
183 let mut renamed_count = 0;
184 let mut reordered_count = 0;
185 let mut orphan_count = 0;
186
187 for c in &all_changes {
188 if c.entity_type == "orphan" {
189 orphan_count += 1;
190 }
191 match c.change_type {
192 ChangeType::Added => added_count += 1,
193 ChangeType::Modified => modified_count += 1,
194 ChangeType::Deleted => deleted_count += 1,
195 ChangeType::Moved => {
196 moved_count += 1;
197 if c.has_content_change() {
198 modified_count += 1;
199 }
200 }
201 ChangeType::Renamed => {
202 renamed_count += 1;
203 if c.has_content_change() {
204 modified_count += 1;
205 }
206 }
207 ChangeType::Reordered => {
208 reordered_count += 1;
209 if c.has_content_change() {
210 modified_count += 1;
211 }
212 }
213 }
214 }
215
216 DiffResult {
217 changes: all_changes,
218 file_count: files_with_changes.len(),
219 added_count,
220 modified_count,
221 deleted_count,
222 moved_count,
223 renamed_count,
224 reordered_count,
225 orphan_count,
226 total_entities_before,
227 total_entities_after,
228 }
229}
230
231fn suppress_redundant_parents(
232 changes: &mut Vec<SemanticChange>,
233 before: &[SemanticEntity],
234 after: &[SemanticEntity],
235) {
236 if changes.len() < 2 {
237 return;
238 }
239
240 const CONTAINER_TYPES: &[&str] = &[
241 "impl",
242 "trait",
243 "module",
244 "class",
245 "interface",
246 "protocol",
247 "mixin",
248 "extension",
249 "namespace",
250 "export",
251 "package",
252 "field",
253 "variable",
254 "svelte_instance_script",
255 "svelte_module_script",
256 "object",
257 ];
258
259 let before_by_id: HashMap<&str, &SemanticEntity> =
260 before.iter().map(|e| (e.id.as_str(), e)).collect();
261 let after_by_id: HashMap<&str, &SemanticEntity> =
262 after.iter().map(|e| (e.id.as_str(), e)).collect();
263
264 let mut before_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
265 for e in before {
266 if let Some(ref pid) = e.parent_id {
267 before_children.entry(pid.as_str()).or_default().push(e);
268 }
269 }
270 let mut after_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
271 for e in after {
272 if let Some(ref pid) = e.parent_id {
273 after_children.entry(pid.as_str()).or_default().push(e);
274 }
275 }
276
277 let changed_ids: HashSet<&str> = changes.iter().map(|c| c.entity_id.as_str()).collect();
278
279 let mut suppress: HashSet<String> = HashSet::new();
280 for change in changes.iter() {
281 if !matches!(
282 change.change_type,
283 ChangeType::Modified | ChangeType::Added | ChangeType::Deleted
284 ) {
285 continue;
286 }
287 if !CONTAINER_TYPES.contains(&change.entity_type.as_str()) {
288 continue;
289 }
290 let eid = change.entity_id.as_str();
291 let b_children = before_children
292 .get(eid)
293 .map(|v| v.as_slice())
294 .unwrap_or(&[]);
295 let a_children = after_children.get(eid).map(|v| v.as_slice()).unwrap_or(&[]);
296
297 let has_changed_child = b_children
298 .iter()
299 .any(|c| changed_ids.contains(c.id.as_str()))
300 || a_children
301 .iter()
302 .any(|c| changed_ids.contains(c.id.as_str()));
303 if !has_changed_child {
304 continue;
305 }
306
307 let should_suppress = if change.change_type == ChangeType::Modified {
311 match (before_by_id.get(eid), after_by_id.get(eid)) {
312 (Some(bp), Some(ap)) if bp.entity_type == ap.entity_type => {
313 let before_own = strip_children_content(&bp.content, bp.start_line, b_children);
314 let after_own = strip_children_content(&ap.content, ap.start_line, a_children);
315 before_own == after_own
316 }
317 _ => false,
318 }
319 } else {
320 true
321 };
322
323 if should_suppress {
324 suppress.insert(change.entity_id.clone());
325 }
326 }
327
328 for change in changes.iter() {
332 if change.change_type == ChangeType::Moved {
333 if let Some(ref old_pid) = change.old_parent_id {
334 if changed_ids.contains(old_pid.as_str()) {
335 suppress.insert(old_pid.clone());
336 }
337 }
338 }
339 }
340
341 if !suppress.is_empty() {
342 changes.retain(|c| !suppress.contains(&c.entity_id));
343 }
344
345 let renamed_before_ids: HashSet<&str> = changes
348 .iter()
349 .filter(|c| c.change_type == ChangeType::Renamed)
350 .filter_map(|c| {
351 let old_name = c.old_entity_name.as_deref()?;
352 let after_entity = after_by_id.get(c.entity_id.as_str())?;
353 before
354 .iter()
355 .find(|e| {
356 e.name == old_name
357 && e.entity_type == after_entity.entity_type
358 && e.parent_id == after_entity.parent_id
359 })
360 .map(|e| e.id.as_str())
361 })
362 .collect();
363
364 if !renamed_before_ids.is_empty() {
365 changes.retain(|c| {
366 !(c.change_type == ChangeType::Moved
367 && c.old_entity_name.is_none()
368 && c.old_parent_id
369 .as_deref()
370 .map_or(false, |pid| renamed_before_ids.contains(pid)))
371 });
372 }
373}
374
375fn strip_children_content(
376 content: &str,
377 parent_start_line: usize,
378 children: &[&SemanticEntity],
379) -> String {
380 let mut line_starts = vec![0];
381 for (idx, ch) in content.char_indices() {
382 if ch == '\n' {
383 line_starts.push(idx + ch.len_utf8());
384 }
385 }
386
387 let mut excluded_ranges: Vec<(usize, usize)> = Vec::new();
388 for child in children {
389 let start_idx = child.start_line.saturating_sub(parent_start_line);
390 let end_idx = child.end_line.saturating_sub(parent_start_line);
391 let search_start = line_starts.get(start_idx).copied().unwrap_or(0);
392 let search_end = line_starts
393 .get(end_idx.saturating_add(1))
394 .copied()
395 .unwrap_or(content.len())
396 .min(content.len());
397
398 if !child.content.is_empty() && search_start <= search_end {
399 let search_window = &content[search_start..search_end];
400 if search_window.starts_with(&child.content) {
401 excluded_ranges.push((search_start, search_start + child.content.len()));
402 continue;
403 }
404
405 if let Some(relative_start) = search_window.find(&child.content) {
406 let start = search_start + relative_start;
407 excluded_ranges.push((start, start + child.content.len()));
408 continue;
409 }
410 }
411 }
412
413 if excluded_ranges.is_empty() {
414 return normalize_content_for_parent_suppression(content);
415 }
416
417 excluded_ranges.sort_unstable();
418 let mut merged_ranges: Vec<(usize, usize)> = Vec::new();
419 for (start, end) in excluded_ranges {
420 if let Some((_, merged_end)) = merged_ranges.last_mut() {
421 if start <= *merged_end {
422 *merged_end = (*merged_end).max(end);
423 continue;
424 }
425 }
426 merged_ranges.push((start, end));
427 }
428
429 let mut stripped = String::with_capacity(content.len());
430 let mut cursor = 0;
431 for (start, end) in merged_ranges {
432 if cursor < start {
433 stripped.push_str(&content[cursor..start]);
434 }
435 cursor = end.max(cursor);
436 }
437 if cursor < content.len() {
438 stripped.push_str(&content[cursor..]);
439 }
440
441 normalize_content_for_parent_suppression(&stripped)
442}
443
444fn normalize_content_for_parent_suppression(content: &str) -> String {
445 content
446 .lines()
447 .map(|l| l.trim())
448 .filter(|l| !l.is_empty())
449 .collect::<Vec<_>>()
450 .join(" ")
451}
452
453fn detect_orphan_changes(
457 file: &FileChange,
458 before_entities: &[SemanticEntity],
459 after_entities: &[SemanticEntity],
460 plugin: Option<&dyn SemanticParserPlugin>,
461 detection_path: &str,
462 commit_sha: Option<&str>,
463 author: Option<&str>,
464) -> Vec<SemanticChange> {
465 let before_text = file.before_content.as_deref().unwrap_or("");
466 let after_text = file.after_content.as_deref().unwrap_or("");
467
468 let before_covered: HashSet<usize> = before_entities
470 .iter()
471 .flat_map(|e| e.start_line..=e.end_line)
472 .collect();
473 let after_covered: HashSet<usize> = after_entities
474 .iter()
475 .flat_map(|e| e.start_line..=e.end_line)
476 .collect();
477
478 let before_orphans = orphan_segments(before_text, &before_covered);
479 let after_orphans = orphan_segments(after_text, &after_covered);
480 let mut changes = Vec::new();
481
482 for (before_idx, after_idx) in orphan_segment_change_pairs(&before_orphans, &after_orphans) {
483 let before_orphan = before_idx.and_then(|idx| before_orphans.get(idx));
484 let after_orphan = after_idx.and_then(|idx| after_orphans.get(idx));
485 let before_content = orphan_content(before_orphan);
486 let after_content = orphan_content(after_orphan);
487
488 if before_content == after_content {
490 continue;
491 }
492
493 let change_type = if before_content.is_none() {
494 ChangeType::Added
495 } else if after_content.is_none() {
496 ChangeType::Deleted
497 } else {
498 ChangeType::Modified
499 };
500
501 let current_orphan = match change_type {
502 ChangeType::Deleted => before_orphan,
503 _ => after_orphan.or(before_orphan),
504 };
505 let Some(current_orphan) = current_orphan else {
506 continue;
507 };
508 let span_label = if change_type == ChangeType::Deleted {
509 "oldL"
510 } else {
511 "L"
512 };
513 let orphan_id = format!(
514 "{}::orphan::{}@{}{}-{}",
515 file.file_path,
516 change_type,
517 span_label,
518 current_orphan.start_line,
519 current_orphan.end_line
520 );
521
522 changes.push(SemanticChange {
523 id: format!("change::{orphan_id}"),
524 entity_id: orphan_id,
525 change_type,
526 entity_type: "orphan".to_string(),
527 entity_name: "module-level".to_string(),
528 entity_line: current_orphan.start_line,
529 start_line: current_orphan.start_line,
530 end_line: current_orphan.end_line,
531 old_start_line: before_orphan.map(|orphan| orphan.start_line),
532 old_end_line: before_orphan.map(|orphan| orphan.end_line),
533 parent_name: None,
534 file_path: file.file_path.clone(),
535 old_entity_name: None,
536 old_file_path: None,
537 old_parent_id: None,
538 before_content: before_content.map(str::to_string),
539 after_content: after_content.map(str::to_string),
540 commit_sha: commit_sha.map(String::from),
541 author: author.map(String::from),
542 timestamp: None,
543 structural_change: orphan_structural_change(
544 before_content,
545 after_content,
546 plugin,
547 detection_path,
548 ),
549 });
550 }
551
552 changes
553}
554
555fn orphan_structural_change(
556 before_content: Option<&str>,
557 after_content: Option<&str>,
558 plugin: Option<&dyn SemanticParserPlugin>,
559 detection_path: &str,
560) -> Option<bool> {
561 let plugin = plugin?;
562 let before_hash =
563 plugin.structural_hash_content(before_content.unwrap_or_default(), detection_path)?;
564 let after_hash =
565 plugin.structural_hash_content(after_content.unwrap_or_default(), detection_path)?;
566
567 Some(before_hash != after_hash)
568}
569
570#[derive(Debug, Clone, PartialEq, Eq)]
571struct OrphanSegment {
572 start_line: usize,
573 end_line: usize,
574 content: String,
575}
576
577fn orphan_segments(text: &str, covered_lines: &HashSet<usize>) -> Vec<OrphanSegment> {
578 let mut segments = Vec::new();
579 let mut current_start: Option<usize> = None;
580 let mut current_lines: Vec<&str> = Vec::new();
581 let mut last_line_number = 0;
582
583 for (i, line) in text.lines().enumerate() {
584 let line_number = i + 1;
585 last_line_number = line_number;
586 if covered_lines.contains(&line_number) {
587 if let Some(start_line) = current_start.take() {
588 segments.push(OrphanSegment {
589 start_line,
590 end_line: line_number - 1,
591 content: current_lines.join("\n"),
592 });
593 current_lines.clear();
594 }
595 continue;
596 }
597
598 current_start.get_or_insert(line_number);
599 current_lines.push(line);
600 }
601
602 if let Some(start_line) = current_start {
603 segments.push(OrphanSegment {
604 start_line,
605 end_line: last_line_number.max(start_line),
606 content: current_lines.join("\n"),
607 });
608 }
609
610 segments
611}
612
613fn orphan_content(segment: Option<&OrphanSegment>) -> Option<&str> {
614 segment
615 .map(|segment| segment.content.as_str())
616 .filter(|content| !content.trim().is_empty())
617}
618
619fn orphan_segment_change_pairs(
620 before: &[OrphanSegment],
621 after: &[OrphanSegment],
622) -> Vec<(Option<usize>, Option<usize>)> {
623 let anchors = orphan_segment_lcs(before, after);
624 let mut pairs = Vec::new();
625 let mut before_start = 0;
626 let mut after_start = 0;
627
628 for (before_anchor, after_anchor) in anchors {
629 append_orphan_gap_pairs(
630 &mut pairs,
631 before_start,
632 before_anchor,
633 after_start,
634 after_anchor,
635 );
636 before_start = before_anchor + 1;
637 after_start = after_anchor + 1;
638 }
639
640 append_orphan_gap_pairs(
641 &mut pairs,
642 before_start,
643 before.len(),
644 after_start,
645 after.len(),
646 );
647
648 pairs
649}
650
651fn append_orphan_gap_pairs(
652 pairs: &mut Vec<(Option<usize>, Option<usize>)>,
653 before_start: usize,
654 before_end: usize,
655 after_start: usize,
656 after_end: usize,
657) {
658 let before_len = before_end.saturating_sub(before_start);
659 let after_len = after_end.saturating_sub(after_start);
660
661 if before_len == after_len {
662 for i in 0..before_len {
663 pairs.push((Some(before_start + i), Some(after_start + i)));
664 }
665 return;
666 }
667
668 for i in 0..before_len {
669 pairs.push((Some(before_start + i), None));
670 }
671 for i in 0..after_len {
672 pairs.push((None, Some(after_start + i)));
673 }
674}
675
676fn orphan_segment_lcs(before: &[OrphanSegment], after: &[OrphanSegment]) -> Vec<(usize, usize)> {
677 let mut dp = vec![vec![0; after.len() + 1]; before.len() + 1];
678
679 for i in (0..before.len()).rev() {
680 for j in (0..after.len()).rev() {
681 dp[i][j] = if orphan_segments_equal(&before[i], &after[j]) {
682 dp[i + 1][j + 1] + 1
683 } else {
684 dp[i + 1][j].max(dp[i][j + 1])
685 };
686 }
687 }
688
689 let mut anchors = Vec::new();
690 let mut i = 0;
691 let mut j = 0;
692 while i < before.len() && j < after.len() {
693 if orphan_segments_equal(&before[i], &after[j]) {
694 anchors.push((i, j));
695 i += 1;
696 j += 1;
697 } else if dp[i + 1][j] >= dp[i][j + 1] {
698 i += 1;
699 } else {
700 j += 1;
701 }
702 }
703
704 anchors
705}
706
707fn orphan_segments_equal(before: &OrphanSegment, after: &OrphanSegment) -> bool {
708 match (orphan_content(Some(before)), orphan_content(Some(after))) {
709 (Some(before), Some(after)) => before == after,
710 _ => false,
711 }
712}
713
714#[cfg(test)]
715mod tests {
716 use super::*;
717 use crate::git::types::{FileChange, FileStatus};
718 use crate::parser::plugins::create_default_registry;
719
720 fn modified_file(path: &str, before: &str, after: &str) -> FileChange {
721 FileChange {
722 file_path: path.to_string(),
723 status: FileStatus::Modified,
724 old_file_path: None,
725 before_content: Some(before.to_string()),
726 after_content: Some(after.to_string()),
727 }
728 }
729
730 fn renamed_file(old_path: &str, new_path: &str, before: &str, after: &str) -> FileChange {
731 FileChange {
732 file_path: new_path.to_string(),
733 status: FileStatus::Renamed,
734 old_file_path: Some(old_path.to_string()),
735 before_content: Some(before.to_string()),
736 after_content: Some(after.to_string()),
737 }
738 }
739
740 fn entity_span(id: &str, start_line: usize, end_line: usize) -> SemanticEntity {
741 SemanticEntity {
742 id: id.to_string(),
743 file_path: "a.rs".to_string(),
744 entity_type: "function".to_string(),
745 name: id.to_string(),
746 parent_id: None,
747 content: String::new(),
748 content_hash: String::new(),
749 structural_hash: None,
750 start_line,
751 end_line,
752 metadata: None,
753 }
754 }
755
756 #[test]
757 fn orphan_only_change_counts_file_and_orphan() {
758 let before = "# old module comment\n\ndef value():\n return 1\n";
759 let after = "# new module comment\n\ndef value():\n return 1\n";
760
761 let registry = create_default_registry();
762 let result = compute_semantic_diff(
763 &[modified_file("app.py", before, after)],
764 ®istry,
765 None,
766 None,
767 );
768
769 assert_eq!(result.changes.len(), 1);
770 assert_eq!(result.file_count, 1);
771 assert_eq!(result.orphan_count, 1);
772 assert_eq!(result.modified_count, 1);
773 assert_eq!(result.changes[0].entity_type, "orphan");
774 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
775 assert_eq!(result.changes[0].structural_change, Some(false));
776 }
777
778 #[test]
779 fn orphan_code_change_is_structural() {
780 let before = "import os\n\ndef value():\n return 1\n";
781 let after = "import sys\n\ndef value():\n return 1\n";
782
783 let registry = create_default_registry();
784 let result = compute_semantic_diff(
785 &[modified_file("app.py", before, after)],
786 ®istry,
787 None,
788 None,
789 );
790
791 assert_eq!(result.changes.len(), 1);
792 assert_eq!(result.changes[0].entity_type, "orphan");
793 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
794 assert_eq!(result.changes[0].structural_change, Some(true));
795 }
796
797 #[test]
798 fn orphan_shebang_change_is_structural() {
799 let before = "#!/usr/bin/env python3\ndef value():\n return 1\n";
800 let after = "#!/usr/bin/env python\ndef value():\n return 1\n";
801
802 let registry = create_default_registry();
803 let result = compute_semantic_diff(
804 &[modified_file("script", before, after)],
805 ®istry,
806 None,
807 None,
808 );
809
810 assert_eq!(result.changes.len(), 1);
811 assert_eq!(result.changes[0].entity_type, "orphan");
812 assert_eq!(result.changes[0].change_type, ChangeType::Modified);
813 assert_eq!(result.changes[0].structural_change, Some(true));
814 }
815
816 #[test]
817 fn test_parent_suppressed_when_only_child_modified() {
818 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
819 let after = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
820
821 let registry = create_default_registry();
822 let result = compute_semantic_diff(
823 &[modified_file("svc.py", before, after)],
824 ®istry,
825 None,
826 None,
827 );
828
829 let names: Vec<&str> = result
830 .changes
831 .iter()
832 .map(|c| c.entity_name.as_str())
833 .collect();
834 assert!(
835 result.changes.iter().any(|c| c.entity_name == "get_user"),
836 "expected method get_user in changes, got: {names:?}"
837 );
838 assert!(
839 !result
840 .changes
841 .iter()
842 .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
843 "class should be suppressed when only the method body changed, got: {names:?}"
844 );
845 }
846
847 #[test]
848 fn test_protocol_parent_suppressed_when_only_associatedtype_renamed() {
849 let before = "protocol Repository {\n associatedtype Item\n}\n";
850 let after = "protocol Repository {\n associatedtype Canvas\n}\n";
851
852 let registry = create_default_registry();
853 let result = compute_semantic_diff(
854 &[modified_file("Repository.swift", before, after)],
855 ®istry,
856 None,
857 None,
858 );
859
860 let names: Vec<&str> = result
861 .changes
862 .iter()
863 .map(|c| c.entity_name.as_str())
864 .collect();
865 assert!(
866 result
867 .changes
868 .iter()
869 .any(|c| c.entity_type == "associatedtype"),
870 "expected associatedtype change, got: {names:?}"
871 );
872 assert!(
873 !result
874 .changes
875 .iter()
876 .any(|c| c.entity_name == "Repository" && c.change_type == ChangeType::Modified),
877 "protocol should be suppressed when only the associatedtype changed, got: {names:?}"
878 );
879 }
880
881 #[test]
882 fn test_protocol_parent_not_suppressed_when_own_declaration_changes() {
883 let before = "protocol Repository {\n associatedtype Item\n}\n";
884 let after = "protocol Repository: Sendable {\n associatedtype Canvas\n}\n";
885
886 let registry = create_default_registry();
887 let result = compute_semantic_diff(
888 &[modified_file("Repository.swift", before, after)],
889 ®istry,
890 None,
891 None,
892 );
893
894 let names: Vec<&str> = result
895 .changes
896 .iter()
897 .map(|c| c.entity_name.as_str())
898 .collect();
899 assert!(
900 result
901 .changes
902 .iter()
903 .any(|c| c.entity_type == "associatedtype"),
904 "expected associatedtype change, got: {names:?}"
905 );
906 assert!(
907 result
908 .changes
909 .iter()
910 .any(|c| c.entity_name == "Repository" && c.change_type == ChangeType::Modified),
911 "protocol should remain Modified when its own declaration changed, got: {names:?}"
912 );
913 }
914
915 #[test]
916 fn test_parent_not_suppressed_when_own_declaration_changes() {
917 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
918 let after = "class UserService(BaseService):\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
919
920 let registry = create_default_registry();
921 let result = compute_semantic_diff(
922 &[modified_file("svc.py", before, after)],
923 ®istry,
924 None,
925 None,
926 );
927
928 let names: Vec<&str> = result
929 .changes
930 .iter()
931 .map(|c| c.entity_name.as_str())
932 .collect();
933 assert!(
934 result.changes.iter().any(|c| c.entity_name == "get_user"),
935 "expected method get_user in changes, got: {names:?}"
936 );
937 assert!(
938 result
939 .changes
940 .iter()
941 .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
942 "class should remain Modified when its own declaration changed, got: {names:?}"
943 );
944 }
945
946 #[test]
947 fn test_nested_typescript_class_field_diff_reports_leaf_method() {
948 let before = r#"class L1 {
949 L2 = class {
950 L3 = class {
951 L4 = class {
952 method() { return 1; }
953 };
954 };
955 };
956}
957"#;
958 let after = r#"class L1 {
959 L2 = class {
960 L3 = class {
961 L4 = class {
962 method() { return 999; }
963 };
964 };
965 };
966}
967"#;
968
969 let registry = create_default_registry();
970 let result = compute_semantic_diff(
971 &[modified_file("a.ts", before, after)],
972 ®istry,
973 None,
974 None,
975 );
976
977 let changes: Vec<_> = result
978 .changes
979 .iter()
980 .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
981 .collect();
982 assert!(
983 result
984 .changes
985 .iter()
986 .any(|c| c.entity_id == "a.ts::class::L1::L2::L3::L4::method"),
987 "expected method leaf change, got: {changes:?}"
988 );
989 assert!(
990 !result.changes.iter().any(|c| c.entity_type == "field"),
991 "field containers should be suppressed when only a nested method changed, got: {changes:?}"
992 );
993 }
994
995 #[test]
996 fn test_nested_typescript_object_literal_diff_reports_leaf_method() {
997 let before = r#"export const svc = {
998 open(): number { return 1; },
999 close(): number { return 0; },
1000};
1001"#;
1002 let after = r#"export const svc = {
1003 open(): number { return 2; },
1004 close(): number { return 0; },
1005};
1006"#;
1007
1008 let registry = create_default_registry();
1009 let result = compute_semantic_diff(
1010 &[modified_file("service.ts", before, after)],
1011 ®istry,
1012 None,
1013 None,
1014 );
1015
1016 let changes: Vec<_> = result
1017 .changes
1018 .iter()
1019 .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
1020 .collect();
1021 assert!(
1022 result
1023 .changes
1024 .iter()
1025 .any(|c| c.entity_id == "service.ts::variable::svc::open"),
1026 "expected object-literal method leaf change, got: {changes:?}"
1027 );
1028 assert!(
1029 !result
1030 .changes
1031 .iter()
1032 .any(|c| c.entity_name == "svc" && c.entity_type == "variable"),
1033 "variable container should be suppressed when only a nested method changed, got: {changes:?}"
1034 );
1035 }
1036
1037 #[test]
1038 fn test_nested_typescript_object_literal_pair_diff_reports_leaf_methods() {
1039 let before = r#"export const svc = {
1040 reset: () => 1,
1041 flush: function() { return 0; },
1042};
1043"#;
1044 let after = r#"export const svc = {
1045 reset: () => 2,
1046 flush: function() { return 3; },
1047};
1048"#;
1049
1050 let registry = create_default_registry();
1051 let result = compute_semantic_diff(
1052 &[modified_file("service.ts", before, after)],
1053 ®istry,
1054 None,
1055 None,
1056 );
1057
1058 let changes: Vec<_> = result
1059 .changes
1060 .iter()
1061 .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
1062 .collect();
1063 assert!(
1064 result
1065 .changes
1066 .iter()
1067 .any(|c| c.entity_id == "service.ts::variable::svc::reset"),
1068 "expected arrow-valued object method change, got: {changes:?}"
1069 );
1070 assert!(
1071 result
1072 .changes
1073 .iter()
1074 .any(|c| c.entity_id == "service.ts::variable::svc::flush"),
1075 "expected function-valued object method change, got: {changes:?}"
1076 );
1077 assert!(
1078 !result
1079 .changes
1080 .iter()
1081 .any(|c| c.entity_name == "svc" && c.entity_type == "variable"),
1082 "variable container should be suppressed when only nested function-valued properties changed, got: {changes:?}"
1083 );
1084 }
1085
1086 #[test]
1087 fn test_inline_typescript_object_literal_keeps_parent_variable_changes() {
1088 let before = "export const svc = { open() { return 1; }, enabled: true };\n";
1089 let after = "export let svc = { open() { return 2; }, enabled: false };\n";
1090
1091 let registry = create_default_registry();
1092 let result = compute_semantic_diff(
1093 &[modified_file("service.ts", before, after)],
1094 ®istry,
1095 None,
1096 None,
1097 );
1098
1099 let changes: Vec<_> = result
1100 .changes
1101 .iter()
1102 .map(|c| (c.entity_name.as_str(), c.entity_type.as_str()))
1103 .collect();
1104 assert!(
1105 result
1106 .changes
1107 .iter()
1108 .any(|c| c.entity_id == "service.ts::variable::svc::open"),
1109 "expected nested method change, got: {changes:?}"
1110 );
1111 assert!(
1112 result
1113 .changes
1114 .iter()
1115 .any(|c| c.entity_name == "svc" && c.entity_type == "variable"),
1116 "parent variable change should remain visible, got: {changes:?}"
1117 );
1118 }
1119
1120 #[test]
1121 fn renamed_file_with_edited_entity_reports_move_not_add_delete() {
1122 let before = "def foo():\n return alpha + beta + gamma\n";
1123 let after = "def foo():\n return one + two + three\n";
1124
1125 let registry = create_default_registry();
1126 let result = compute_semantic_diff(
1127 &[renamed_file("old.py", "new.py", before, after)],
1128 ®istry,
1129 None,
1130 None,
1131 );
1132
1133 assert_eq!(result.added_count, 0);
1134 assert_eq!(result.deleted_count, 0);
1135 assert_eq!(result.modified_count, 1);
1136 assert_eq!(result.moved_count, 1);
1137 assert_eq!(result.changes.len(), 1);
1138 assert_eq!(result.changes[0].entity_name, "foo");
1139 assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.py"));
1140 assert_eq!(result.changes[0].structural_change, Some(true));
1141 }
1142
1143 #[test]
1144 fn duplicate_markdown_heading_reports_first_section_modification() {
1145 let before = "# Same Title\n\noriginal content of section A\n\n# Same Title\n\ncontent of section B\n";
1146 let after = "# Same Title\n\nMODIFIED content of section A\n\n# Same Title\n\ncontent of section B\n";
1147
1148 let registry = create_default_registry();
1149 let result = compute_semantic_diff(
1150 &[modified_file("doc.md", before, after)],
1151 ®istry,
1152 None,
1153 None,
1154 );
1155
1156 assert_eq!(result.modified_count, 1, "{:?}", result.changes);
1157 assert_eq!(result.changes.len(), 1, "{:?}", result.changes);
1158
1159 let change = &result.changes[0];
1160 assert_eq!(change.change_type, ChangeType::Modified);
1161 assert_eq!(change.entity_name, "Same Title");
1162 assert_eq!(change.entity_line, 1);
1163 assert!(change
1164 .before_content
1165 .as_deref()
1166 .unwrap_or_default()
1167 .contains("original content of section A"));
1168 assert!(change
1169 .after_content
1170 .as_deref()
1171 .unwrap_or_default()
1172 .contains("MODIFIED content of section A"));
1173 }
1174
1175 #[test]
1176 fn orphan_changes_count_toward_change_type_buckets() {
1177 let before = "def foo():\n return 1\n\ndef bar():\n return 2\n";
1178 let after = "# just a comment\n";
1179
1180 let registry = create_default_registry();
1181 let result = compute_semantic_diff(
1182 &[modified_file("svc.py", before, after)],
1183 ®istry,
1184 None,
1185 None,
1186 );
1187
1188 assert_eq!(result.added_count, 1);
1189 assert_eq!(result.deleted_count, 2);
1190 assert_eq!(result.modified_count, 0);
1191 assert_eq!(result.orphan_count, 1);
1192 assert!(result
1193 .changes
1194 .iter()
1195 .any(|c| c.entity_type == "orphan" && c.change_type == ChangeType::Added));
1196 assert!(result.changes.iter().any(|c| {
1197 c.entity_type == "orphan"
1198 && c.change_type == ChangeType::Added
1199 && c.structural_change == Some(false)
1200 }));
1201
1202 let named_bucket_total = result.added_count
1203 + result.modified_count
1204 + result.deleted_count
1205 + result.moved_count
1206 + result.renamed_count
1207 + result.reordered_count;
1208 assert_eq!(named_bucket_total, result.changes.len());
1209 }
1210
1211 #[test]
1212 fn orphan_changes_use_contiguous_line_spans() {
1213 let file = modified_file(
1214 "a.rs",
1215 "use alpha;\nfn foo() {}\nuse beta;\nfn bar() {}\n",
1216 "use gamma;\nfn foo() {}\nuse delta;\nfn bar() {}\n",
1217 );
1218 let entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
1219
1220 let changes = detect_orphan_changes(&file, &entities, &entities, None, "a.rs", None, None);
1221
1222 assert_eq!(changes.len(), 2);
1223 assert_eq!(changes[0].start_line, 1);
1224 assert_eq!(changes[0].end_line, 1);
1225 assert_eq!(changes[0].old_start_line, Some(1));
1226 assert_eq!(changes[0].old_end_line, Some(1));
1227 assert_eq!(changes[0].before_content.as_deref(), Some("use alpha;"));
1228 assert_eq!(changes[0].after_content.as_deref(), Some("use gamma;"));
1229 assert_eq!(changes[1].start_line, 3);
1230 assert_eq!(changes[1].end_line, 3);
1231 assert_eq!(changes[1].old_start_line, Some(3));
1232 assert_eq!(changes[1].old_end_line, Some(3));
1233 assert_eq!(changes[1].before_content.as_deref(), Some("use beta;"));
1234 assert_eq!(changes[1].after_content.as_deref(), Some("use delta;"));
1235 }
1236
1237 #[test]
1238 fn blank_only_orphan_segments_are_ignored() {
1239 let file = modified_file("a.rs", "fn foo() {}\n", "\nfn foo() {}\n");
1240 let before_entities = vec![entity_span("foo", 1, 1)];
1241 let after_entities = vec![entity_span("foo", 2, 2)];
1242
1243 let changes = detect_orphan_changes(
1244 &file,
1245 &before_entities,
1246 &after_entities,
1247 None,
1248 "a.rs",
1249 None,
1250 None,
1251 );
1252
1253 assert!(changes.is_empty());
1254 }
1255
1256 #[test]
1257 fn inserted_orphan_segment_does_not_modify_unchanged_later_segment() {
1258 let file = modified_file(
1259 "a.rs",
1260 "fn foo() {}\nuse a;\nfn bar() {}\n",
1261 "use x;\nfn foo() {}\nuse a;\nfn bar() {}\n",
1262 );
1263 let before_entities = vec![entity_span("foo", 1, 1), entity_span("bar", 3, 3)];
1264 let after_entities = vec![entity_span("foo", 2, 2), entity_span("bar", 4, 4)];
1265
1266 let changes = detect_orphan_changes(
1267 &file,
1268 &before_entities,
1269 &after_entities,
1270 None,
1271 "a.rs",
1272 None,
1273 None,
1274 );
1275
1276 assert_eq!(changes.len(), 1);
1277 assert_eq!(changes[0].change_type, ChangeType::Added);
1278 assert_eq!(changes[0].start_line, 1);
1279 assert_eq!(changes[0].end_line, 1);
1280 assert!(changes[0].old_start_line.is_none());
1281 assert_eq!(changes[0].before_content, None);
1282 assert_eq!(changes[0].after_content.as_deref(), Some("use x;"));
1283 }
1284
1285 #[test]
1286 fn uneven_orphan_gaps_are_not_forced_into_modifications() {
1287 let file = modified_file(
1288 "a.rs",
1289 "use a;\nfn foo() {}\nuse old;\nfn mid() {}\nuse c;\nfn bar() {}\n",
1290 "use a;\nfn foo() {}\nuse new1;\nfn mid() {}\nuse new2;\nfn baz() {}\nuse c;\nfn bar() {}\n",
1291 );
1292 let before_entities = vec![
1293 entity_span("foo", 2, 2),
1294 entity_span("mid", 4, 4),
1295 entity_span("bar", 6, 6),
1296 ];
1297 let after_entities = vec![
1298 entity_span("foo", 2, 2),
1299 entity_span("mid", 4, 4),
1300 entity_span("baz", 6, 6),
1301 entity_span("bar", 8, 8),
1302 ];
1303
1304 let changes = detect_orphan_changes(
1305 &file,
1306 &before_entities,
1307 &after_entities,
1308 None,
1309 "a.rs",
1310 None,
1311 None,
1312 );
1313
1314 assert_eq!(changes.len(), 3);
1315 assert_eq!(changes[0].change_type, ChangeType::Deleted);
1316 assert!(changes[0].entity_id.contains("::deleted@oldL3-3"));
1317 assert_eq!(changes[0].before_content.as_deref(), Some("use old;"));
1318 assert_eq!(changes[1].change_type, ChangeType::Added);
1319 assert_eq!(changes[1].after_content.as_deref(), Some("use new1;"));
1320 assert_eq!(changes[2].change_type, ChangeType::Added);
1321 assert_eq!(changes[2].after_content.as_deref(), Some("use new2;"));
1322 }
1323
1324 #[test]
1333 #[cfg(feature = "lang-edn")]
1334 fn edn_comment_inside_map_does_not_displace_key_value_pairing() {
1335 let before = r#"{:body [:div]
1336 ; :published #inst "2025-12-14T14:05:00Z"
1337 :slug :my-post
1338 :title "Hello"}"#;
1339
1340 let after = r#"{:body [:div]
1341 :published #inst "2025-12-14T14:05:00Z"
1342 :slug :my-post
1343 :title "Hello"}"#;
1344
1345 let registry = create_default_registry();
1346 let result = compute_semantic_diff(
1347 &[modified_file("post.edn", before, after)],
1348 ®istry,
1349 None,
1350 None,
1351 );
1352
1353 let non_orphan: Vec<_> = result
1354 .changes
1355 .iter()
1356 .filter(|c| c.entity_type != "orphan")
1357 .collect();
1358
1359 assert_eq!(
1361 non_orphan.len(),
1362 1,
1363 "expected only :published to be added, got: {:?}",
1364 non_orphan
1365 .iter()
1366 .map(|c| (&c.entity_name, &c.change_type))
1367 .collect::<Vec<_>>()
1368 );
1369 assert_eq!(non_orphan[0].entity_name, ":published");
1370 assert_eq!(non_orphan[0].change_type, ChangeType::Added);
1371 }
1372}