1use rayon::prelude::*;
2use serde::Serialize;
3
4use crate::git::types::FileChange;
5use crate::model::change::{ChangeType, SemanticChange};
6use crate::model::entity::SemanticEntity;
7use crate::model::identity::match_entities;
8use crate::parser::registry::ParserRegistry;
9use std::collections::{HashMap, HashSet};
10
11#[derive(Debug, Clone, Serialize)]
12#[serde(rename_all = "camelCase")]
13pub struct DiffResult {
14 pub changes: Vec<SemanticChange>,
15 pub file_count: usize,
16 pub added_count: usize,
17 pub modified_count: usize,
18 pub deleted_count: usize,
19 pub moved_count: usize,
20 pub renamed_count: usize,
21 pub reordered_count: usize,
22 pub orphan_count: usize,
23}
24
25pub fn compute_semantic_diff(
26 file_changes: &[FileChange],
27 registry: &ParserRegistry,
28 commit_sha: Option<&str>,
29 author: Option<&str>,
30) -> DiffResult {
31 let per_file_changes: Vec<(String, Vec<SemanticChange>)> = file_changes
33 .par_iter()
34 .filter_map(|file| {
35 let content_hint = file.after_content.as_deref()
36 .or(file.before_content.as_deref())
37 .unwrap_or("");
38 let resolved = registry.resolve_file_path(&file.file_path);
39 let detection_path = resolved.as_deref().unwrap_or(&file.file_path);
40 let plugin = registry.get_plugin_with_content(detection_path, content_hint)?;
41
42 let before_entities = if let Some(ref content) = file.before_content {
43 let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
44 let before_resolved = registry.resolve_file_path(before_path);
45 let before_detection = before_resolved.as_deref().unwrap_or(before_path);
46 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
47 plugin.extract_entities(content, before_detection)
48 })) {
49 Ok(entities) => entities,
50 Err(_) => Vec::new(),
51 }
52 } else {
53 Vec::new()
54 };
55
56 let after_entities = if let Some(ref content) = file.after_content {
57 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
58 plugin.extract_entities(content, detection_path)
59 })) {
60 Ok(entities) => entities,
61 Err(_) => Vec::new(),
62 }
63 } else {
64 Vec::new()
65 };
66
67 let sim_fn = |a: &crate::model::entity::SemanticEntity,
68 b: &crate::model::entity::SemanticEntity|
69 -> f64 { plugin.compute_similarity(a, b) };
70
71 let mut result = match_entities(
72 &before_entities,
73 &after_entities,
74 &file.file_path,
75 Some(&sim_fn),
76 commit_sha,
77 author,
78 );
79
80 suppress_redundant_parents(&mut result.changes, &before_entities, &after_entities);
83
84 let orphans = detect_orphan_changes(
86 file,
87 &before_entities,
88 &after_entities,
89 commit_sha,
90 author,
91 );
92 result.changes.extend(orphans);
93
94 result.changes.sort_by_key(|change| change.entity_line);
95
96 if result.changes.is_empty() {
97 None
98 } else {
99 Some((file.file_path.clone(), result.changes))
100 }
101 })
102 .collect();
103
104 let mut all_changes: Vec<SemanticChange> = Vec::new();
105 let mut files_with_changes: HashSet<String> = HashSet::new();
106 for (file_path, changes) in per_file_changes {
107 files_with_changes.insert(file_path);
108 all_changes.extend(changes);
109 }
110
111 let mut added_count = 0;
113 let mut modified_count = 0;
114 let mut deleted_count = 0;
115 let mut moved_count = 0;
116 let mut renamed_count = 0;
117 let mut reordered_count = 0;
118 let mut orphan_count = 0;
119
120 for c in &all_changes {
121 if c.entity_type == "orphan" {
122 orphan_count += 1;
123 continue;
124 }
125 match c.change_type {
126 ChangeType::Added => added_count += 1,
127 ChangeType::Modified => modified_count += 1,
128 ChangeType::Deleted => deleted_count += 1,
129 ChangeType::Moved => moved_count += 1,
130 ChangeType::Renamed => renamed_count += 1,
131 ChangeType::Reordered => reordered_count += 1,
132 }
133 }
134
135 DiffResult {
136 changes: all_changes,
137 file_count: files_with_changes.len(),
138 added_count,
139 modified_count,
140 deleted_count,
141 moved_count,
142 renamed_count,
143 reordered_count,
144 orphan_count,
145 }
146}
147
148fn suppress_redundant_parents(
149 changes: &mut Vec<SemanticChange>,
150 before: &[SemanticEntity],
151 after: &[SemanticEntity],
152) {
153 if changes.len() < 2 {
154 return;
155 }
156
157 const CONTAINER_TYPES: &[&str] = &[
158 "impl", "trait", "module", "class", "interface", "mixin",
159 "extension", "namespace", "export", "package",
160 "svelte_instance_script", "svelte_module_script",
161 "object",
162 ];
163
164 let before_by_id: HashMap<&str, &SemanticEntity> =
165 before.iter().map(|e| (e.id.as_str(), e)).collect();
166 let after_by_id: HashMap<&str, &SemanticEntity> =
167 after.iter().map(|e| (e.id.as_str(), e)).collect();
168
169 let mut before_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
170 for e in before {
171 if let Some(ref pid) = e.parent_id {
172 before_children.entry(pid.as_str()).or_default().push(e);
173 }
174 }
175 let mut after_children: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new();
176 for e in after {
177 if let Some(ref pid) = e.parent_id {
178 after_children.entry(pid.as_str()).or_default().push(e);
179 }
180 }
181
182 let changed_ids: HashSet<&str> = changes.iter().map(|c| c.entity_id.as_str()).collect();
183
184 let mut suppress: HashSet<String> = HashSet::new();
185 for change in changes.iter() {
186 if !matches!(change.change_type, ChangeType::Modified | ChangeType::Added | ChangeType::Deleted) {
187 continue;
188 }
189 if !CONTAINER_TYPES.contains(&change.entity_type.as_str()) {
190 continue;
191 }
192 let eid = change.entity_id.as_str();
193 let b_children = before_children.get(eid).map(|v| v.as_slice()).unwrap_or(&[]);
194 let a_children = after_children.get(eid).map(|v| v.as_slice()).unwrap_or(&[]);
195
196 let has_changed_child = b_children.iter().any(|c| changed_ids.contains(c.id.as_str()))
197 || a_children.iter().any(|c| changed_ids.contains(c.id.as_str()));
198 if !has_changed_child {
199 continue;
200 }
201
202 let should_suppress = if change.change_type == ChangeType::Modified {
206 match (before_by_id.get(eid), after_by_id.get(eid)) {
207 (Some(bp), Some(ap)) if bp.entity_type == ap.entity_type => {
208 let before_own = strip_children_content(&bp.content, bp.start_line, b_children);
209 let after_own = strip_children_content(&ap.content, ap.start_line, a_children);
210 before_own == after_own
211 }
212 _ => false,
213 }
214 } else {
215 true
216 };
217
218 if should_suppress {
219 suppress.insert(change.entity_id.clone());
220 }
221 }
222
223 for change in changes.iter() {
227 if change.change_type == ChangeType::Moved {
228 if let Some(ref old_pid) = change.old_parent_id {
229 if changed_ids.contains(old_pid.as_str()) {
230 suppress.insert(old_pid.clone());
231 }
232 }
233 }
234 }
235
236 if !suppress.is_empty() {
237 changes.retain(|c| !suppress.contains(&c.entity_id));
238 }
239
240 let renamed_before_ids: HashSet<&str> = changes
243 .iter()
244 .filter(|c| c.change_type == ChangeType::Renamed)
245 .filter_map(|c| {
246 let old_name = c.old_entity_name.as_deref()?;
247 let after_entity = after_by_id.get(c.entity_id.as_str())?;
248 before.iter()
249 .find(|e| {
250 e.name == old_name
251 && e.entity_type == after_entity.entity_type
252 && e.parent_id == after_entity.parent_id
253 })
254 .map(|e| e.id.as_str())
255 })
256 .collect();
257
258 if !renamed_before_ids.is_empty() {
259 changes.retain(|c| {
260 !(c.change_type == ChangeType::Moved
261 && c.old_entity_name.is_none()
262 && c.old_parent_id.as_deref()
263 .map_or(false, |pid| renamed_before_ids.contains(pid)))
264 });
265 }
266}
267
268fn strip_children_content(content: &str, parent_start_line: usize, children: &[&SemanticEntity]) -> String {
269 let lines: Vec<&str> = content.lines().collect();
270 let mut excluded: HashSet<usize> = HashSet::new();
271 for child in children {
272 let start_idx = child.start_line.saturating_sub(parent_start_line);
273 let end_idx = child.end_line.saturating_sub(parent_start_line);
274 for i in start_idx..=end_idx.max(start_idx) {
275 if i < lines.len() {
276 excluded.insert(i);
277 }
278 }
279 }
280 lines.iter().enumerate()
281 .filter(|(i, _)| !excluded.contains(i))
282 .map(|(_, l)| l.trim())
283 .filter(|l| !l.is_empty())
284 .collect::<Vec<_>>()
285 .join(" ")
286}
287
288fn detect_orphan_changes(
292 file: &FileChange,
293 before_entities: &[SemanticEntity],
294 after_entities: &[SemanticEntity],
295 commit_sha: Option<&str>,
296 author: Option<&str>,
297) -> Vec<SemanticChange> {
298 let before_text = file.before_content.as_deref().unwrap_or("");
299 let after_text = file.after_content.as_deref().unwrap_or("");
300
301 let before_covered: HashSet<usize> = before_entities
303 .iter()
304 .flat_map(|e| e.start_line..=e.end_line)
305 .collect();
306 let after_covered: HashSet<usize> = after_entities
307 .iter()
308 .flat_map(|e| e.start_line..=e.end_line)
309 .collect();
310
311 let before_orphan: String = before_text
313 .lines()
314 .enumerate()
315 .filter(|(i, _)| !before_covered.contains(&(i + 1)))
316 .map(|(_, l)| l)
317 .collect::<Vec<_>>()
318 .join("\n");
319 let after_orphan: String = after_text
320 .lines()
321 .enumerate()
322 .filter(|(i, _)| !after_covered.contains(&(i + 1)))
323 .map(|(_, l)| l)
324 .collect::<Vec<_>>()
325 .join("\n");
326
327 if before_orphan == after_orphan {
329 return Vec::new();
330 }
331
332 let change_type = if before_orphan.trim().is_empty() {
333 ChangeType::Added
334 } else if after_orphan.trim().is_empty() {
335 ChangeType::Deleted
336 } else {
337 ChangeType::Modified
338 };
339
340 vec![SemanticChange {
341 id: format!("{}::orphan", file.file_path),
342 entity_id: format!("{}::orphan", file.file_path),
343 change_type,
344 entity_type: "orphan".to_string(),
345 entity_name: "module-level".to_string(),
346 entity_line: 0,
347 parent_name: None,
348 file_path: file.file_path.clone(),
349 old_entity_name: None,
350 old_file_path: None,
351 old_parent_id: None,
352 before_content: if before_orphan.is_empty() {
353 None
354 } else {
355 Some(before_orphan)
356 },
357 after_content: if after_orphan.is_empty() {
358 None
359 } else {
360 Some(after_orphan)
361 },
362 commit_sha: commit_sha.map(String::from),
363 author: author.map(String::from),
364 timestamp: None,
365 structural_change: Some(true),
366 }]
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372 use crate::git::types::{FileChange, FileStatus};
373 use crate::parser::plugins::create_default_registry;
374
375 fn modified_file(path: &str, before: &str, after: &str) -> FileChange {
376 FileChange {
377 file_path: path.to_string(),
378 status: FileStatus::Modified,
379 old_file_path: None,
380 before_content: Some(before.to_string()),
381 after_content: Some(after.to_string()),
382 }
383 }
384
385 #[test]
386 fn test_parent_suppressed_when_only_child_modified() {
387 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
388 let after = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
389
390 let registry = create_default_registry();
391 let result = compute_semantic_diff(&[modified_file("svc.py", before, after)], ®istry, None, None);
392
393 let names: Vec<&str> = result.changes.iter().map(|c| c.entity_name.as_str()).collect();
394 assert!(
395 result.changes.iter().any(|c| c.entity_name == "get_user"),
396 "expected method get_user in changes, got: {names:?}"
397 );
398 assert!(
399 !result.changes.iter().any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
400 "class should be suppressed when only the method body changed, got: {names:?}"
401 );
402 }
403
404 #[test]
405 fn test_parent_not_suppressed_when_own_declaration_changes() {
406 let before = "class UserService:\n def get_user(self, user_id):\n return db.find(user_id)\n";
407 let after = "class UserService(BaseService):\n def get_user(self, user_id):\n return db.find(user_id, include_deleted=False)\n";
408
409 let registry = create_default_registry();
410 let result = compute_semantic_diff(&[modified_file("svc.py", before, after)], ®istry, None, None);
411
412 let names: Vec<&str> = result.changes.iter().map(|c| c.entity_name.as_str()).collect();
413 assert!(
414 result.changes.iter().any(|c| c.entity_name == "get_user"),
415 "expected method get_user in changes, got: {names:?}"
416 );
417 assert!(
418 result.changes.iter().any(|c| c.entity_name == "UserService" && c.change_type == ChangeType::Modified),
419 "class should remain Modified when its own declaration changed, got: {names:?}"
420 );
421 }
422}