Skip to main content

sem_core/parser/
differ.rs

1use rayon::prelude::*;
2use serde::Serialize;
3
4use crate::git::types::FileChange;
5use crate::model::change::{ChangeType, SemanticChange};
6use crate::model::identity::match_entities;
7use crate::parser::registry::ParserRegistry;
8use std::collections::HashSet;
9
10#[derive(Debug, Clone, Serialize)]
11#[serde(rename_all = "camelCase")]
12pub struct DiffResult {
13    pub changes: Vec<SemanticChange>,
14    pub file_count: usize,
15    pub added_count: usize,
16    pub modified_count: usize,
17    pub deleted_count: usize,
18    pub moved_count: usize,
19    pub renamed_count: usize,
20}
21
22pub fn compute_semantic_diff(
23    file_changes: &[FileChange],
24    registry: &ParserRegistry,
25    commit_sha: Option<&str>,
26    author: Option<&str>,
27) -> DiffResult {
28    // Process files in parallel: each file's entity extraction and matching is independent
29    let per_file_changes: Vec<(String, Vec<SemanticChange>)> = file_changes
30        .par_iter()
31        .filter_map(|file| {
32            let content_hint = file.after_content.as_deref()
33                .or(file.before_content.as_deref())
34                .unwrap_or("");
35            let plugin = registry.get_plugin_with_content(&file.file_path, content_hint)?;
36
37            let before_entities = if let Some(ref content) = file.before_content {
38                let before_path = file.old_file_path.as_deref().unwrap_or(&file.file_path);
39                match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
40                    plugin.extract_entities(content, before_path)
41                })) {
42                    Ok(entities) => entities,
43                    Err(_) => Vec::new(),
44                }
45            } else {
46                Vec::new()
47            };
48
49            let after_entities = if let Some(ref content) = file.after_content {
50                match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
51                    plugin.extract_entities(content, &file.file_path)
52                })) {
53                    Ok(entities) => entities,
54                    Err(_) => Vec::new(),
55                }
56            } else {
57                Vec::new()
58            };
59
60            let sim_fn = |a: &crate::model::entity::SemanticEntity,
61                          b: &crate::model::entity::SemanticEntity|
62             -> f64 { plugin.compute_similarity(a, b) };
63
64            let mut result = match_entities(
65                &before_entities,
66                &after_entities,
67                &file.file_path,
68                Some(&sim_fn),
69                commit_sha,
70                author,
71            );
72            result.changes.sort_by_key(|change| change.entity_line);
73
74            if result.changes.is_empty() {
75                None
76            } else {
77                Some((file.file_path.clone(), result.changes))
78            }
79        })
80        .collect();
81
82    let mut all_changes: Vec<SemanticChange> = Vec::new();
83    let mut files_with_changes: HashSet<String> = HashSet::new();
84    for (file_path, changes) in per_file_changes {
85        files_with_changes.insert(file_path);
86        all_changes.extend(changes);
87    }
88
89    // Single-pass counting
90    let mut added_count = 0;
91    let mut modified_count = 0;
92    let mut deleted_count = 0;
93    let mut moved_count = 0;
94    let mut renamed_count = 0;
95
96    for c in &all_changes {
97        match c.change_type {
98            ChangeType::Added => added_count += 1,
99            ChangeType::Modified => modified_count += 1,
100            ChangeType::Deleted => deleted_count += 1,
101            ChangeType::Moved => moved_count += 1,
102            ChangeType::Renamed => renamed_count += 1,
103        }
104    }
105
106    DiffResult {
107        changes: all_changes,
108        file_count: files_with_changes.len(),
109        added_count,
110        modified_count,
111        deleted_count,
112        moved_count,
113        renamed_count,
114    }
115}