1#[cfg(feature = "parallel")]
2use rayon::prelude::*;
3use serde::Serialize;
4
5use crate::git::types::FileChange;
6
7macro_rules! maybe_par_iter {
8 ($slice:expr) => {{
9 #[cfg(feature = "parallel")]
10 {
11 $slice.par_iter()
12 }
13 #[cfg(not(feature = "parallel"))]
14 {
15 $slice.iter()
16 }
17 }};
18}
19use crate::model::change::{ChangeType, SemanticChange};
20use crate::model::entity::SemanticEntity;
21use crate::model::identity::match_entities;
22use crate::parser::registry::ParserRegistry;
23use std::collections::{HashMap, HashSet};
24
25#[derive(Debug, Clone, Serialize)]
26#[serde(rename_all = "camelCase")]
27pub struct DiffResult {
28 pub changes: Vec<SemanticChange>,
29 pub file_count: usize,
30 pub added_count: usize,
31 pub modified_count: usize,
32 pub deleted_count: usize,
33 pub moved_count: usize,
34 pub renamed_count: usize,
35 pub reordered_count: usize,
36 pub orphan_count: usize,
37 pub total_entities_before: usize,
38 pub total_entities_after: usize,
39}
40
41pub fn compute_semantic_diff(
42 file_changes: &[FileChange],
43 registry: &ParserRegistry,
44 commit_sha: Option<&str>,
45 author: Option<&str>,
46) -> DiffResult {
47 let per_file_changes: Vec<(String, Vec<SemanticChange>, usize, usize)> =
49 maybe_par_iter!(file_changes)
50 .filter_map(|file| {
51 let content_hint = file
52 .after_content
53 .as_deref()
54 .or(file.before_content.as_deref())
55 .unwrap_or("");
56 let resolved = registry.resolve_file_path(&file.file_path);
57 let detection_path = resolved.as_deref().unwrap_or(&file.file_path);
58 let plugin = registry.get_plugin_with_content(detection_path, content_hint)?;
59
60 let before_entities = if let Some(ref content) = file.before_content {
61 let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
62 let before_resolved = registry.resolve_file_path(before_path);
63 let before_detection = before_resolved.as_deref().unwrap_or(before_path);
64 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
65 plugin.extract_entities(content, before_detection)
66 })) {
67 Ok(entities) => entities,
68 Err(_) => Vec::new(),
69 }
70 } else {
71 Vec::new()
72 };
73
74 let after_entities = if let Some(ref content) = file.after_content {
75 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
76 plugin.extract_entities(content, detection_path)
77 })) {
78 Ok(entities) => entities,
79 Err(_) => Vec::new(),
80 }
81 } else {
82 Vec::new()
83 };
84
85 let before_count = before_entities.len();
86 let after_count = after_entities.len();
87
88 let sim_fn = |a: &crate::model::entity::SemanticEntity,
89 b: &crate::model::entity::SemanticEntity|
90 -> f64 { plugin.compute_similarity(a, b) };
91
92 let mut result = match_entities(
93 &before_entities,
94 &after_entities,
95 &file.file_path,
96 Some(&sim_fn),
97 commit_sha,
98 author,
99 );
100
101 suppress_redundant_parents(&mut result.changes, &before_entities, &after_entities);
104
105 let orphans = detect_orphan_changes(
107 file,
108 &before_entities,
109 &after_entities,
110 commit_sha,
111 author,
112 );
113 result.changes.extend(orphans);
114
115 result.changes.sort_by_key(|change| change.entity_line);
116
117 if result.changes.is_empty() {
118 None
119 } else {
120 Some((
121 file.file_path.clone(),
122 result.changes,
123 before_count,
124 after_count,
125 ))
126 }
127 })
128 .collect();
129
130 let mut all_changes: Vec<SemanticChange> = Vec::new();
131 let mut files_with_changes: HashSet<String> = HashSet::new();
132 let mut total_entities_before: usize = 0;
133 let mut total_entities_after: usize = 0;
134 for (file_path, changes, before_count, after_count) in per_file_changes {
135 files_with_changes.insert(file_path);
136 all_changes.extend(changes);
137 total_entities_before += before_count;
138 total_entities_after += after_count;
139 }
140
141 let mut added_count = 0;
143 let mut modified_count = 0;
144 let mut deleted_count = 0;
145 let mut moved_count = 0;
146 let mut renamed_count = 0;
147 let mut reordered_count = 0;
148 let mut orphan_count = 0;
149
150 for c in &all_changes {
151 if c.entity_type == "orphan" {
152 orphan_count += 1;
153 continue;
154 }
155 match c.change_type {
156 ChangeType::Added => added_count += 1,
157 ChangeType::Modified => modified_count += 1,
158 ChangeType::Deleted => deleted_count += 1,
159 ChangeType::Moved => moved_count += 1,
160 ChangeType::Renamed => renamed_count += 1,
161 ChangeType::Reordered => reordered_count += 1,
162 }
163 }
164
165 DiffResult {
166 changes: all_changes,
167 file_count: files_with_changes.len(),
168 added_count,
169 modified_count,
170 deleted_count,
171 moved_count,
172 renamed_count,
173 reordered_count,
174 orphan_count,
175 total_entities_before,
176 total_entities_after,
177 }
178}
179
180fn suppress_redundant_parents(
181 changes: &mut Vec<SemanticChange>,
182 before: &[SemanticEntity],
183 after: &[SemanticEntity],
184) {
185 if changes.len() < 2 {
186 return;
187 }
188
189 const CONTAINER_TYPES: &[&str] = &[
190 "impl",
191 "trait",
192 "module",
193 "class",
194 "interface",
195 "mixin",
196 "extension",
197 "namespace",
198 "export",
199 "package",
200 "svelte_instance_script",
201 "svelte_module_script",
202 "object",
203 ];
204
205 let before_by_id: HashMap<&str, &SemanticEntity> =
206 before.iter().map(|e| (e.id.as_str(), e)).collect();
207 let after_by_id: HashMap<&str, &SemanticEntity> =
208 after.iter().map(|e| (e.id.as_str(), e)).collect();
209
210 let mut before_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
211 for e in before {
212 if let Some(ref pid) = e.parent_id {
213 before_children.entry(pid.as_str()).or_default().push(e);
214 }
215 }
216 let mut after_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
217 for e in after {
218 if let Some(ref pid) = e.parent_id {
219 after_children.entry(pid.as_str()).or_default().push(e);
220 }
221 }
222
223 let changed_ids: HashSet<&str> = changes.iter().map(|c| c.entity_id.as_str()).collect();
224
225 let mut suppress: HashSet<String> = HashSet::new();
226 for change in changes.iter() {
227 if !matches!(
228 change.change_type,
229 ChangeType::Modified | ChangeType::Added | ChangeType::Deleted
230 ) {
231 continue;
232 }
233 if !CONTAINER_TYPES.contains(&change.entity_type.as_str()) {
234 continue;
235 }
236 let eid = change.entity_id.as_str();
237 let b_children = before_children
238 .get(eid)
239 .map(|v| v.as_slice())
240 .unwrap_or(&[]);
241 let a_children = after_children.get(eid).map(|v| v.as_slice()).unwrap_or(&[]);
242
243 let has_changed_child = b_children
244 .iter()
245 .any(|c| changed_ids.contains(c.id.as_str()))
246 || a_children
247 .iter()
248 .any(|c| changed_ids.contains(c.id.as_str()));
249 if !has_changed_child {
250 continue;
251 }
252
253 let should_suppress = if change.change_type == ChangeType::Modified {
257 match (before_by_id.get(eid), after_by_id.get(eid)) {
258 (Some(bp), Some(ap)) if bp.entity_type == ap.entity_type => {
259 let before_own = strip_children_content(&bp.content, bp.start_line, b_children);
260 let after_own = strip_children_content(&ap.content, ap.start_line, a_children);
261 before_own == after_own
262 }
263 _ => false,
264 }
265 } else {
266 true
267 };
268
269 if should_suppress {
270 suppress.insert(change.entity_id.clone());
271 }
272 }
273
274 for change in changes.iter() {
278 if change.change_type == ChangeType::Moved {
279 if let Some(ref old_pid) = change.old_parent_id {
280 if changed_ids.contains(old_pid.as_str()) {
281 suppress.insert(old_pid.clone());
282 }
283 }
284 }
285 }
286
287 if !suppress.is_empty() {
288 changes.retain(|c| !suppress.contains(&c.entity_id));
289 }
290
291 let renamed_before_ids: HashSet<&str> = changes
294 .iter()
295 .filter(|c| c.change_type == ChangeType::Renamed)
296 .filter_map(|c| {
297 let old_name = c.old_entity_name.as_deref()?;
298 let after_entity = after_by_id.get(c.entity_id.as_str())?;
299 before
300 .iter()
301 .find(|e| {
302 e.name == old_name
303 && e.entity_type == after_entity.entity_type
304 && e.parent_id == after_entity.parent_id
305 })
306 .map(|e| e.id.as_str())
307 })
308 .collect();
309
310 if !renamed_before_ids.is_empty() {
311 changes.retain(|c| {
312 !(c.change_type == ChangeType::Moved
313 && c.old_entity_name.is_none()
314 && c.old_parent_id
315 .as_deref()
316 .map_or(false, |pid| renamed_before_ids.contains(pid)))
317 });
318 }
319}
320
321fn strip_children_content(
322 content: &str,
323 parent_start_line: usize,
324 children: &[&SemanticEntity],
325) -> String {
326 let lines: Vec<&str> = content.lines().collect();
327 let mut excluded: HashSet<usize> = HashSet::new();
328 for child in children {
329 let start_idx = child.start_line.saturating_sub(parent_start_line);
330 let end_idx = child.end_line.saturating_sub(parent_start_line);
331 for i in start_idx..=end_idx.max(start_idx) {
332 if i < lines.len() {
333 excluded.insert(i);
334 }
335 }
336 }
337 lines
338 .iter()
339 .enumerate()
340 .filter(|(i, _)| !excluded.contains(i))
341 .map(|(_, l)| l.trim())
342 .filter(|l| !l.is_empty())
343 .collect::<Vec<_>>()
344 .join(" ")
345}
346
347fn detect_orphan_changes(
351 file: &FileChange,
352 before_entities: &[SemanticEntity],
353 after_entities: &[SemanticEntity],
354 commit_sha: Option<&str>,
355 author: Option<&str>,
356) -> Vec<SemanticChange> {
357 let before_text = file.before_content.as_deref().unwrap_or("");
358 let after_text = file.after_content.as_deref().unwrap_or("");
359
360 let before_covered: HashSet<usize> = before_entities
362 .iter()
363 .flat_map(|e| e.start_line..=e.end_line)
364 .collect();
365 let after_covered: HashSet<usize> = after_entities
366 .iter()
367 .flat_map(|e| e.start_line..=e.end_line)
368 .collect();
369
370 let before_orphan: String = before_text
372 .lines()
373 .enumerate()
374 .filter(|(i, _)| !before_covered.contains(&(i + 1)))
375 .map(|(_, l)| l)
376 .collect::<Vec<_>>()
377 .join("\n");
378 let after_orphan: String = after_text
379 .lines()
380 .enumerate()
381 .filter(|(i, _)| !after_covered.contains(&(i + 1)))
382 .map(|(_, l)| l)
383 .collect::<Vec<_>>()
384 .join("\n");
385
386 if before_orphan == after_orphan {
388 return Vec::new();
389 }
390
391 let change_type = if before_orphan.trim().is_empty() {
392 ChangeType::Added
393 } else if after_orphan.trim().is_empty() {
394 ChangeType::Deleted
395 } else {
396 ChangeType::Modified
397 };
398
399 vec![SemanticChange {
400 id: format!("{}::orphan", file.file_path),
401 entity_id: format!("{}::orphan", file.file_path),
402 change_type,
403 entity_type: "orphan".to_string(),
404 entity_name: "module-level".to_string(),
405 entity_line: 0,
406 parent_name: None,
407 file_path: file.file_path.clone(),
408 old_entity_name: None,
409 old_file_path: None,
410 old_parent_id: None,
411 before_content: if before_orphan.is_empty() {
412 None
413 } else {
414 Some(before_orphan)
415 },
416 after_content: if after_orphan.is_empty() {
417 None
418 } else {
419 Some(after_orphan)
420 },
421 commit_sha: commit_sha.map(String::from),
422 author: author.map(String::from),
423 timestamp: None,
424 structural_change: Some(true),
425 }]
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431 use crate::git::types::{FileChange, FileStatus};
432 use crate::parser::plugins::create_default_registry;
433
434 fn modified_file(path: &str, before: &str, after: &str) -> FileChange {
435 FileChange {
436 file_path: path.to_string(),
437 status: FileStatus::Modified,
438 old_file_path: None,
439 before_content: Some(before.to_string()),
440 after_content: Some(after.to_string()),
441 }
442 }
443
444 fn renamed_file(old_path: &str, new_path: &str, before: &str, after: &str) -> FileChange {
445 FileChange {
446 file_path: new_path.to_string(),
447 status: FileStatus::Renamed,
448 old_file_path: Some(old_path.to_string()),
449 before_content: Some(before.to_string()),
450 after_content: Some(after.to_string()),
451 }
452 }
453
454 #[test]
455 fn test_parent_suppressed_when_only_child_modified() {
456 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
457 let after = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
458
459 let registry = create_default_registry();
460 let result = compute_semantic_diff(
461 &[modified_file("svc.py", before, after)],
462 ®istry,
463 None,
464 None,
465 );
466
467 let names: Vec<&str> = result
468 .changes
469 .iter()
470 .map(|c| c.entity_name.as_str())
471 .collect();
472 assert!(
473 result.changes.iter().any(|c| c.entity_name == "get_user"),
474 "expected method get_user in changes, got: {names:?}"
475 );
476 assert!(
477 !result
478 .changes
479 .iter()
480 .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
481 "class should be suppressed when only the method body changed, got: {names:?}"
482 );
483 }
484
485 #[test]
486 fn test_parent_not_suppressed_when_own_declaration_changes() {
487 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
488 let after = "class UserService(BaseService):\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
489
490 let registry = create_default_registry();
491 let result = compute_semantic_diff(
492 &[modified_file("svc.py", before, after)],
493 ®istry,
494 None,
495 None,
496 );
497
498 let names: Vec<&str> = result
499 .changes
500 .iter()
501 .map(|c| c.entity_name.as_str())
502 .collect();
503 assert!(
504 result.changes.iter().any(|c| c.entity_name == "get_user"),
505 "expected method get_user in changes, got: {names:?}"
506 );
507 assert!(
508 result
509 .changes
510 .iter()
511 .any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
512 "class should remain Modified when its own declaration changed, got: {names:?}"
513 );
514 }
515
516 #[test]
517 fn renamed_file_with_edited_entity_reports_move_not_add_delete() {
518 let before = "def foo():\n return alpha + beta + gamma\n";
519 let after = "def foo():\n return one + two + three\n";
520
521 let registry = create_default_registry();
522 let result = compute_semantic_diff(
523 &[renamed_file("old.py", "new.py", before, after)],
524 ®istry,
525 None,
526 None,
527 );
528
529 assert_eq!(result.added_count, 0);
530 assert_eq!(result.deleted_count, 0);
531 assert_eq!(result.moved_count, 1);
532 assert_eq!(result.changes.len(), 1);
533 assert_eq!(result.changes[0].entity_name, "foo");
534 assert_eq!(result.changes[0].old_file_path.as_deref(), Some("old.py"));
535 }
536}