Skip to main content

sem_core/parser/
differ.rs

1use rayon::prelude::*;
2use serde::Serialize;
3
4use crate::git::types::FileChange;
5use crate::model::change::{ChangeType, SemanticChange};
6use crate::model::entity::SemanticEntity;
7use crate::model::identity::match_entities;
8use crate::parser::registry::ParserRegistry;
9use std::collections::HashSet;
10
11#[derive(Debug, Clone, Serialize)]
12#[serde(rename_all = "camelCase")]
13pub struct DiffResult {
14    pub changes: Vec<SemanticChange>,
15    pub file_count: usize,
16    pub added_count: usize,
17    pub modified_count: usize,
18    pub deleted_count: usize,
19    pub moved_count: usize,
20    pub renamed_count: usize,
21    pub reordered_count: usize,
22    pub orphan_count: usize,
23}
24
25pub fn compute_semantic_diff(
26    file_changes: &[FileChange],
27    registry: &ParserRegistry,
28    commit_sha: Option<&str>,
29    author: Option<&str>,
30) -> DiffResult {
31    // Process files in parallel: each file's entity extraction and matching is independent
32    let per_file_changes: Vec<(String, Vec<SemanticChange>)> = file_changes
33        .par_iter()
34        .filter_map(|file| {
35            let content_hint = file.after_content.as_deref()
36                .or(file.before_content.as_deref())
37                .unwrap_or("");
38            let plugin = registry.get_plugin_with_content(&file.file_path, content_hint)?;
39
40            let before_entities = if let Some(ref content) = file.before_content {
41                let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
42                match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
43                    plugin.extract_entities(content, before_path)
44                })) {
45                    Ok(entities) => entities,
46                    Err(_) => Vec::new(),
47                }
48            } else {
49                Vec::new()
50            };
51
52            let after_entities = if let Some(ref content) = file.after_content {
53                match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
54                    plugin.extract_entities(content, &file.file_path)
55                })) {
56                    Ok(entities) => entities,
57                    Err(_) => Vec::new(),
58                }
59            } else {
60                Vec::new()
61            };
62
63            let sim_fn = |a: &crate::model::entity::SemanticEntity,
64                          b: &crate::model::entity::SemanticEntity|
65             -> f64 { plugin.compute_similarity(a, b) };
66
67            let mut result = match_entities(
68                &before_entities,
69                &after_entities,
70                &file.file_path,
71                Some(&sim_fn),
72                commit_sha,
73                author,
74            );
75
76            // Suppress parent entities whose modification is already explained
77            // by child entity changes (e.g. impl blocks when methods changed).
78            let all_entities: Vec<&SemanticEntity> =
79                before_entities.iter().chain(after_entities.iter()).collect();
80            suppress_redundant_parents(&mut result.changes, &all_entities);
81
82            // Detect orphan changes (lines that changed outside any entity span).
83            let orphans = detect_orphan_changes(
84                file,
85                &before_entities,
86                &after_entities,
87                commit_sha,
88                author,
89            );
90            result.changes.extend(orphans);
91
92            result.changes.sort_by_key(|change| change.entity_line);
93
94            if result.changes.is_empty() {
95                None
96            } else {
97                Some((file.file_path.clone(), result.changes))
98            }
99        })
100        .collect();
101
102    let mut all_changes: Vec<SemanticChange> = Vec::new();
103    let mut files_with_changes: HashSet<String> = HashSet::new();
104    for (file_path, changes) in per_file_changes {
105        files_with_changes.insert(file_path);
106        all_changes.extend(changes);
107    }
108
109    // Single-pass counting (exclude orphan changes from entity counts)
110    let mut added_count = 0;
111    let mut modified_count = 0;
112    let mut deleted_count = 0;
113    let mut moved_count = 0;
114    let mut renamed_count = 0;
115    let mut reordered_count = 0;
116    let mut orphan_count = 0;
117
118    for c in &all_changes {
119        if c.entity_type == "orphan" {
120            orphan_count += 1;
121            continue;
122        }
123        match c.change_type {
124            ChangeType::Added => added_count += 1,
125            ChangeType::Modified => modified_count += 1,
126            ChangeType::Deleted => deleted_count += 1,
127            ChangeType::Moved => moved_count += 1,
128            ChangeType::Renamed => renamed_count += 1,
129            ChangeType::Reordered => reordered_count += 1,
130        }
131    }
132
133    DiffResult {
134        changes: all_changes,
135        file_count: files_with_changes.len(),
136        added_count,
137        modified_count,
138        deleted_count,
139        moved_count,
140        renamed_count,
141        reordered_count,
142        orphan_count,
143    }
144}
145
146/// Remove "Modified" parent entities from the change list when at least one
147/// child entity also appears as a change.  This avoids showing e.g. an impl
148/// block as modified when the real change is in a method inside it.
149/// Only suppresses container entity types (impl, trait, module) where the
150/// parent is just a wrapper. Functions, structs, etc. are never suppressed
151/// because they have independent meaningful content.
152fn suppress_redundant_parents(
153    changes: &mut Vec<SemanticChange>,
154    entities: &[&SemanticEntity],
155) {
156    if changes.len() < 2 {
157        return;
158    }
159
160    // Container types whose only purpose is grouping child entities.
161    // Functions, structs, enums etc. are NOT containers because they have
162    // independent meaningful content (body logic, fields, variants).
163    const CONTAINER_TYPES: &[&str] = &[
164        "impl", "trait", "module", "class", "interface", "mixin",
165        "extension", "namespace", "export", "package",
166        "svelte_instance_script", "svelte_module_script",
167    ];
168
169    // Build set of entity IDs that have changes
170    let changed_ids: HashSet<&str> = changes.iter().map(|c| c.entity_id.as_str()).collect();
171
172    // Find parent entity IDs that should be suppressed: a parent is redundant
173    // when at least one of its children also has a change and the parent is a
174    // container type (impl, trait, module).
175    let mut suppress: HashSet<String> = HashSet::new();
176    for entity in entities {
177        if let Some(ref pid) = entity.parent_id {
178            if changed_ids.contains(entity.id.as_str()) && changed_ids.contains(pid.as_str()) {
179                suppress.insert(pid.clone());
180            }
181        }
182    }
183
184    if !suppress.is_empty() {
185        changes.retain(|c| {
186            !(matches!(c.change_type, ChangeType::Modified | ChangeType::Added | ChangeType::Deleted)
187                && suppress.contains(&c.entity_id)
188                && CONTAINER_TYPES.contains(&c.entity_type.as_str()))
189        });
190    }
191}
192
193/// Detect changes in lines that fall outside any entity span.
194/// These are things like use statements, crate-level attributes, standalone
195/// comments, and macro invocations that aren't tracked as entities.
196fn detect_orphan_changes(
197    file: &FileChange,
198    before_entities: &[SemanticEntity],
199    after_entities: &[SemanticEntity],
200    commit_sha: Option<&str>,
201    author: Option<&str>,
202) -> Vec<SemanticChange> {
203    let before_text = file.before_content.as_deref().unwrap_or("");
204    let after_text = file.after_content.as_deref().unwrap_or("");
205
206    // Build covered line sets from entity spans
207    let before_covered: HashSet<usize> = before_entities
208        .iter()
209        .flat_map(|e| e.start_line..=e.end_line)
210        .collect();
211    let after_covered: HashSet<usize> = after_entities
212        .iter()
213        .flat_map(|e| e.start_line..=e.end_line)
214        .collect();
215
216    // Extract uncovered lines, preserving line numbers for context
217    let before_orphan: String = before_text
218        .lines()
219        .enumerate()
220        .filter(|(i, _)| !before_covered.contains(&(i + 1)))
221        .map(|(_, l)| l)
222        .collect::<Vec<_>>()
223        .join("\n");
224    let after_orphan: String = after_text
225        .lines()
226        .enumerate()
227        .filter(|(i, _)| !after_covered.contains(&(i + 1)))
228        .map(|(_, l)| l)
229        .collect::<Vec<_>>()
230        .join("\n");
231
232    // Skip if orphan content is unchanged
233    if before_orphan == after_orphan {
234        return Vec::new();
235    }
236
237    let change_type = if before_orphan.trim().is_empty() {
238        ChangeType::Added
239    } else if after_orphan.trim().is_empty() {
240        ChangeType::Deleted
241    } else {
242        ChangeType::Modified
243    };
244
245    vec![SemanticChange {
246        id: format!("{}::orphan", file.file_path),
247        entity_id: format!("{}::orphan", file.file_path),
248        change_type,
249        entity_type: "orphan".to_string(),
250        entity_name: "module-level".to_string(),
251        entity_line: 0,
252        file_path: file.file_path.clone(),
253        old_entity_name: None,
254        old_file_path: None,
255        before_content: if before_orphan.is_empty() {
256            None
257        } else {
258            Some(before_orphan)
259        },
260        after_content: if after_orphan.is_empty() {
261            None
262        } else {
263            Some(after_orphan)
264        },
265        commit_sha: commit_sha.map(String::from),
266        author: author.map(String::from),
267        timestamp: None,
268        structural_change: Some(true),
269    }]
270}