Skip to main content

codemem_engine/hooks/
diff.rs

1//! Diff computation and semantic summarization for code edits.
2//!
3//! Uses the `similar` crate to compute line-level diffs, then applies
4//! heuristic rules to generate human-readable semantic summaries.
5
6use similar::{ChangeTag, TextDiff};
7
8/// Summary of a diff between old and new content.
9#[derive(Debug, Clone)]
10pub struct DiffSummary {
11    pub file_path: String,
12    pub change_type: ChangeType,
13    pub lines_added: usize,
14    pub lines_removed: usize,
15    pub hunks: Vec<DiffHunk>,
16    pub semantic_summary: String,
17}
18
19/// A contiguous region of changes.
20#[derive(Debug, Clone)]
21pub struct DiffHunk {
22    pub added: Vec<String>,
23    pub removed: Vec<String>,
24}
25
26/// The kind of change detected.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum ChangeType {
29    Added,
30    Modified,
31    Deleted,
32}
33
34impl std::fmt::Display for ChangeType {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        match self {
37            ChangeType::Added => write!(f, "added"),
38            ChangeType::Modified => write!(f, "modified"),
39            ChangeType::Deleted => write!(f, "deleted"),
40        }
41    }
42}
43
44/// Compute a line-level diff between old and new content.
45pub fn compute_diff(file_path: &str, old_content: &str, new_content: &str) -> DiffSummary {
46    let change_type = if old_content.is_empty() && !new_content.is_empty() {
47        ChangeType::Added
48    } else if !old_content.is_empty() && new_content.is_empty() {
49        ChangeType::Deleted
50    } else {
51        ChangeType::Modified
52    };
53
54    let text_diff = TextDiff::from_lines(old_content, new_content);
55
56    let mut lines_added: usize = 0;
57    let mut lines_removed: usize = 0;
58    let mut hunks: Vec<DiffHunk> = Vec::new();
59
60    for group in text_diff.grouped_ops(3) {
61        let mut hunk = DiffHunk {
62            added: Vec::new(),
63            removed: Vec::new(),
64        };
65
66        for op in &group {
67            for change in text_diff.iter_changes(op) {
68                match change.tag() {
69                    ChangeTag::Insert => {
70                        lines_added += 1;
71                        hunk.added.push(change.value().to_string());
72                    }
73                    ChangeTag::Delete => {
74                        lines_removed += 1;
75                        hunk.removed.push(change.value().to_string());
76                    }
77                    ChangeTag::Equal => {}
78                }
79            }
80        }
81
82        if !hunk.added.is_empty() || !hunk.removed.is_empty() {
83            hunks.push(hunk);
84        }
85    }
86
87    let mut summary = DiffSummary {
88        file_path: file_path.to_string(),
89        change_type,
90        lines_added,
91        lines_removed,
92        hunks,
93        semantic_summary: String::new(),
94    };
95
96    summary.semantic_summary = generate_semantic_summary(&summary);
97    summary
98}
99
100/// Generate a heuristic-based human-readable summary of the diff.
101pub fn generate_semantic_summary(diff: &DiffSummary) -> String {
102    let mut parts: Vec<String> = Vec::new();
103
104    let all_added: Vec<&str> = diff
105        .hunks
106        .iter()
107        .flat_map(|h| h.added.iter().map(|s| s.trim()))
108        .collect();
109    let all_removed: Vec<&str> = diff
110        .hunks
111        .iter()
112        .flat_map(|h| h.removed.iter().map(|s| s.trim()))
113        .collect();
114
115    // Detect function additions/removals
116    let fn_patterns = ["fn ", "def ", "function ", "func ", "async fn "];
117    let added_fns: Vec<&str> = all_added
118        .iter()
119        .filter(|line| fn_patterns.iter().any(|p| line.contains(p)))
120        .copied()
121        .collect();
122    let removed_fns: Vec<&str> = all_removed
123        .iter()
124        .filter(|line| fn_patterns.iter().any(|p| line.contains(p)))
125        .copied()
126        .collect();
127
128    for line in &added_fns {
129        if let Some(name) = extract_fn_name(line) {
130            parts.push(format!("Added function {name}"));
131        }
132    }
133    for line in &removed_fns {
134        if let Some(name) = extract_fn_name(line) {
135            let was_readded = added_fns
136                .iter()
137                .any(|a| extract_fn_name(a) == Some(name.clone()));
138            if !was_readded {
139                parts.push(format!("Removed function {name}"));
140            }
141        }
142    }
143
144    // Detect import changes
145    let import_patterns = ["use ", "import ", "from ", "require("];
146    let added_imports = all_added
147        .iter()
148        .any(|line| import_patterns.iter().any(|p| line.contains(p)));
149    let removed_imports = all_removed
150        .iter()
151        .any(|line| import_patterns.iter().any(|p| line.contains(p)));
152    if added_imports || removed_imports {
153        parts.push("Updated imports".to_string());
154    }
155
156    // Detect error handling
157    let error_patterns = ["Result", "Error", "unwrap", "expect", "try {", "catch"];
158    let added_error = all_added
159        .iter()
160        .any(|line| error_patterns.iter().any(|p| line.contains(p)));
161    let removed_error = all_removed
162        .iter()
163        .any(|line| error_patterns.iter().any(|p| line.contains(p)));
164    if added_error && !removed_error {
165        parts.push("Added error handling".to_string());
166    }
167
168    // Detect type definitions
169    let type_patterns = ["struct ", "class ", "enum ", "trait ", "interface "];
170    for line in &all_added {
171        if type_patterns.iter().any(|p| line.contains(p)) {
172            if let Some(name) = extract_type_name(line) {
173                parts.push(format!("Added type {name}"));
174            }
175        }
176    }
177
178    if parts.is_empty() {
179        let total = diff.lines_added + diff.lines_removed;
180        format!("Modified {} lines in {}", total, diff.file_path)
181    } else {
182        parts.join("; ")
183    }
184}
185
186fn extract_fn_name(line: &str) -> Option<String> {
187    let trimmed = line.trim();
188    for prefix in &[
189        "export async function ",
190        "export function ",
191        "async function ",
192        "function ",
193        "async fn ",
194        "pub async fn ",
195        "pub fn ",
196        "pub(crate) fn ",
197        "fn ",
198        "def ",
199        "func ",
200    ] {
201        if let Some(rest) = trimmed.strip_prefix(prefix) {
202            let name: String = rest
203                .chars()
204                .take_while(|c| c.is_alphanumeric() || *c == '_')
205                .collect();
206            if !name.is_empty() {
207                return Some(name);
208            }
209        }
210    }
211    None
212}
213
214fn extract_type_name(line: &str) -> Option<String> {
215    let trimmed = line.trim();
216    for prefix in &[
217        "pub struct ",
218        "pub(crate) struct ",
219        "struct ",
220        "pub class ",
221        "class ",
222        "pub enum ",
223        "pub(crate) enum ",
224        "enum ",
225        "pub trait ",
226        "pub(crate) trait ",
227        "trait ",
228        "pub interface ",
229        "interface ",
230    ] {
231        if let Some(rest) = trimmed.strip_prefix(prefix) {
232            let name: String = rest
233                .chars()
234                .take_while(|c| c.is_alphanumeric() || *c == '_')
235                .collect();
236            if !name.is_empty() {
237                return Some(name);
238            }
239        }
240    }
241    None
242}
243
244#[cfg(test)]
245#[path = "tests/diff_tests.rs"]
246mod tests;