Skip to main content

codemem_hooks/
diff.rs

1//! Diff computation and semantic summarization for code edits.
2//!
3//! Uses the `similar` crate to compute line-level diffs, then applies
4//! heuristic rules to generate human-readable semantic summaries.
5
6use similar::{ChangeTag, TextDiff};
7
8/// Summary of a diff between old and new content.
9#[derive(Debug, Clone)]
10pub struct DiffSummary {
11    pub file_path: String,
12    pub change_type: ChangeType,
13    pub lines_added: usize,
14    pub lines_removed: usize,
15    pub hunks: Vec<DiffHunk>,
16    pub semantic_summary: String,
17}
18
19/// A contiguous region of changes.
20#[derive(Debug, Clone)]
21pub struct DiffHunk {
22    pub added: Vec<String>,
23    pub removed: Vec<String>,
24}
25
26/// The kind of change detected.
27#[derive(Debug, Clone, PartialEq, Eq)]
28pub enum ChangeType {
29    Added,
30    Modified,
31    Deleted,
32}
33
34impl std::fmt::Display for ChangeType {
35    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36        match self {
37            ChangeType::Added => write!(f, "added"),
38            ChangeType::Modified => write!(f, "modified"),
39            ChangeType::Deleted => write!(f, "deleted"),
40        }
41    }
42}
43
44/// Compute a line-level diff between old and new content.
45pub fn compute_diff(file_path: &str, old_content: &str, new_content: &str) -> DiffSummary {
46    let change_type = if old_content.is_empty() && !new_content.is_empty() {
47        ChangeType::Added
48    } else if !old_content.is_empty() && new_content.is_empty() {
49        ChangeType::Deleted
50    } else {
51        ChangeType::Modified
52    };
53
54    let text_diff = TextDiff::from_lines(old_content, new_content);
55
56    let mut lines_added: usize = 0;
57    let mut lines_removed: usize = 0;
58    let mut hunks: Vec<DiffHunk> = Vec::new();
59
60    for group in text_diff.grouped_ops(3) {
61        let mut hunk = DiffHunk {
62            added: Vec::new(),
63            removed: Vec::new(),
64        };
65
66        for op in &group {
67            for change in text_diff.iter_changes(op) {
68                match change.tag() {
69                    ChangeTag::Insert => {
70                        lines_added += 1;
71                        hunk.added.push(change.value().to_string());
72                    }
73                    ChangeTag::Delete => {
74                        lines_removed += 1;
75                        hunk.removed.push(change.value().to_string());
76                    }
77                    ChangeTag::Equal => {}
78                }
79            }
80        }
81
82        if !hunk.added.is_empty() || !hunk.removed.is_empty() {
83            hunks.push(hunk);
84        }
85    }
86
87    let mut summary = DiffSummary {
88        file_path: file_path.to_string(),
89        change_type,
90        lines_added,
91        lines_removed,
92        hunks,
93        semantic_summary: String::new(),
94    };
95
96    summary.semantic_summary = generate_semantic_summary(&summary);
97    summary
98}
99
100/// Generate a heuristic-based human-readable summary of the diff.
101pub fn generate_semantic_summary(diff: &DiffSummary) -> String {
102    let mut parts: Vec<String> = Vec::new();
103
104    let all_added: Vec<&str> = diff
105        .hunks
106        .iter()
107        .flat_map(|h| h.added.iter().map(|s| s.trim()))
108        .collect();
109    let all_removed: Vec<&str> = diff
110        .hunks
111        .iter()
112        .flat_map(|h| h.removed.iter().map(|s| s.trim()))
113        .collect();
114
115    // Detect function additions/removals
116    let fn_patterns = ["fn ", "def ", "function ", "func ", "async fn "];
117    let added_fns: Vec<&str> = all_added
118        .iter()
119        .filter(|line| fn_patterns.iter().any(|p| line.contains(p)))
120        .copied()
121        .collect();
122    let removed_fns: Vec<&str> = all_removed
123        .iter()
124        .filter(|line| fn_patterns.iter().any(|p| line.contains(p)))
125        .copied()
126        .collect();
127
128    for line in &added_fns {
129        if let Some(name) = extract_fn_name(line) {
130            parts.push(format!("Added function {name}"));
131        }
132    }
133    for line in &removed_fns {
134        if let Some(name) = extract_fn_name(line) {
135            let was_readded = added_fns
136                .iter()
137                .any(|a| extract_fn_name(a) == Some(name.clone()));
138            if !was_readded {
139                parts.push(format!("Removed function {name}"));
140            }
141        }
142    }
143
144    // Detect import changes
145    let import_patterns = ["use ", "import ", "from ", "require("];
146    let added_imports = all_added
147        .iter()
148        .any(|line| import_patterns.iter().any(|p| line.contains(p)));
149    let removed_imports = all_removed
150        .iter()
151        .any(|line| import_patterns.iter().any(|p| line.contains(p)));
152    if added_imports || removed_imports {
153        parts.push("Updated imports".to_string());
154    }
155
156    // Detect error handling
157    let error_patterns = ["Result", "Error", "unwrap", "expect", "try {", "catch"];
158    let added_error = all_added
159        .iter()
160        .any(|line| error_patterns.iter().any(|p| line.contains(p)));
161    let removed_error = all_removed
162        .iter()
163        .any(|line| error_patterns.iter().any(|p| line.contains(p)));
164    if added_error && !removed_error {
165        parts.push("Added error handling".to_string());
166    }
167
168    // Detect type definitions
169    let type_patterns = ["struct ", "class ", "enum ", "trait ", "interface "];
170    for line in &all_added {
171        if type_patterns.iter().any(|p| line.contains(p)) {
172            if let Some(name) = extract_type_name(line) {
173                parts.push(format!("Added type {name}"));
174            }
175        }
176    }
177
178    if parts.is_empty() {
179        let total = diff.lines_added + diff.lines_removed;
180        format!("Modified {} lines in {}", total, diff.file_path)
181    } else {
182        parts.join("; ")
183    }
184}
185
186fn extract_fn_name(line: &str) -> Option<String> {
187    let trimmed = line.trim();
188    for prefix in &[
189        "export async function ",
190        "export function ",
191        "async function ",
192        "function ",
193        "async fn ",
194        "pub async fn ",
195        "pub fn ",
196        "pub(crate) fn ",
197        "fn ",
198        "def ",
199        "func ",
200    ] {
201        if let Some(rest) = trimmed.strip_prefix(prefix) {
202            let name: String = rest
203                .chars()
204                .take_while(|c| c.is_alphanumeric() || *c == '_')
205                .collect();
206            if !name.is_empty() {
207                return Some(name);
208            }
209        }
210    }
211    None
212}
213
214fn extract_type_name(line: &str) -> Option<String> {
215    for prefix in &["struct ", "class ", "enum ", "trait ", "interface "] {
216        if let Some(rest) = line.split(prefix).nth(1) {
217            let name: String = rest
218                .chars()
219                .take_while(|c| c.is_alphanumeric() || *c == '_')
220                .collect();
221            if !name.is_empty() {
222                return Some(name);
223            }
224        }
225    }
226    None
227}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232
233    #[test]
234    fn diff_simple_edit() {
235        let old = "fn main() {\n    println!(\"hello\");\n}\n";
236        let new = "fn main() {\n    println!(\"world\");\n}\n";
237        let summary = compute_diff("src/main.rs", old, new);
238        assert_eq!(summary.change_type, ChangeType::Modified);
239        assert_eq!(summary.lines_added, 1);
240        assert_eq!(summary.lines_removed, 1);
241    }
242
243    #[test]
244    fn semantic_summary_function_addition() {
245        let old = "// module\n";
246        let new = "// module\nfn new_helper() {\n    todo!()\n}\n";
247        let summary = compute_diff("src/lib.rs", old, new);
248        assert!(summary
249            .semantic_summary
250            .contains("Added function new_helper"));
251    }
252
253    #[test]
254    fn semantic_summary_function_removal() {
255        let old = "fn helper() {\n    todo!()\n}\nfn main() {}\n";
256        let new = "fn main() {}\n";
257        let summary = compute_diff("src/lib.rs", old, new);
258        assert!(summary.semantic_summary.contains("Removed function helper"));
259    }
260
261    #[test]
262    fn semantic_summary_import_changes() {
263        let old = "use std::io;\nfn main() {}\n";
264        let new = "use std::io;\nuse std::fs;\nfn main() {}\n";
265        let summary = compute_diff("src/main.rs", old, new);
266        assert!(summary.semantic_summary.contains("Updated imports"));
267    }
268
269    #[test]
270    fn semantic_summary_type_addition() {
271        let old = "// types\n";
272        let new = "// types\nstruct Config {\n    name: String,\n}\n";
273        let summary = compute_diff("src/types.rs", old, new);
274        assert!(summary.semantic_summary.contains("Added type Config"));
275    }
276
277    #[test]
278    fn empty_diff() {
279        let content = "fn main() {}\n";
280        let summary = compute_diff("src/main.rs", content, content);
281        assert_eq!(summary.lines_added, 0);
282        assert_eq!(summary.lines_removed, 0);
283    }
284
285    #[test]
286    fn change_type_added() {
287        let summary = compute_diff("new.rs", "", "fn new() {}\n");
288        assert_eq!(summary.change_type, ChangeType::Added);
289    }
290
291    #[test]
292    fn change_type_deleted() {
293        let summary = compute_diff("old.rs", "fn old() {}\n", "");
294        assert_eq!(summary.change_type, ChangeType::Deleted);
295    }
296
297    #[test]
298    fn extract_fn_name_works() {
299        assert_eq!(extract_fn_name("fn hello("), Some("hello".to_string()));
300        assert_eq!(
301            extract_fn_name("async fn fetch_data()"),
302            Some("fetch_data".to_string())
303        );
304        assert_eq!(
305            extract_fn_name("def process(x):"),
306            Some("process".to_string())
307        );
308        assert_eq!(extract_fn_name("no function here"), None);
309    }
310
311    #[test]
312    fn extract_type_name_works() {
313        assert_eq!(
314            extract_type_name("struct MyStruct {"),
315            Some("MyStruct".to_string())
316        );
317        assert_eq!(extract_type_name("enum Color {"), Some("Color".to_string()));
318        assert_eq!(
319            extract_type_name("trait Display {"),
320            Some("Display".to_string())
321        );
322        assert_eq!(extract_type_name("no type here"), None);
323    }
324}