1use crate::change::{Change, ChangeKind, ChangeSpan};
4use similar::{ChangeTag, TextDiff};
5use std::path::Path;
6use thiserror::Error;
7
8#[derive(Error, Debug)]
9pub enum DiffError {
10 #[error("Failed to read file: {0}")]
11 FileRead(#[from] std::io::Error),
12 #[error("Diff computation failed: {0}")]
13 ComputeFailed(String),
14}
15
16#[derive(Debug, Clone)]
18pub struct Hunk {
19 pub id: usize,
21 pub change_ids: Vec<usize>,
23 pub old_start: Option<usize>,
25 pub new_start: Option<usize>,
27 pub insertions: usize,
29 pub deletions: usize,
31}
32
33impl Hunk {
34 pub fn len(&self) -> usize {
36 self.change_ids.len()
37 }
38
39 pub fn is_empty(&self) -> bool {
41 self.change_ids.is_empty()
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct DiffResult {
48 pub changes: Vec<Change>,
50 pub significant_changes: Vec<usize>,
52 pub hunks: Vec<Hunk>,
54 pub insertions: usize,
56 pub deletions: usize,
58}
59
60impl DiffResult {
61 pub fn get_significant_changes(&self) -> Vec<&Change> {
63 self.significant_changes
64 .iter()
65 .filter_map(|&id| self.changes.iter().find(|c| c.id == id))
66 .collect()
67 }
68
69 pub fn get_hunk(&self, hunk_id: usize) -> Option<&Hunk> {
71 self.hunks.iter().find(|h| h.id == hunk_id)
72 }
73
74 pub fn hunk_for_change(&self, change_id: usize) -> Option<&Hunk> {
76 self.hunks
77 .iter()
78 .find(|h| h.change_ids.contains(&change_id))
79 }
80}
81
82#[derive(Debug, Clone)]
84pub struct FileDiff {
85 pub old_path: Option<String>,
86 pub new_path: Option<String>,
87 pub result: DiffResult,
88}
89
90pub struct DiffEngine {
92 context_lines: usize,
94 word_level: bool,
96}
97
98impl Default for DiffEngine {
99 fn default() -> Self {
100 Self {
101 context_lines: 3,
102 word_level: true,
103 }
104 }
105}
106
107impl DiffEngine {
108 pub fn new() -> Self {
109 Self::default()
110 }
111
112 pub fn with_context(mut self, lines: usize) -> Self {
113 self.context_lines = lines;
114 self
115 }
116
117 pub fn with_word_level(mut self, enabled: bool) -> Self {
118 self.word_level = enabled;
119 self
120 }
121
122 pub fn diff_strings(&self, old: &str, new: &str) -> DiffResult {
124 let text_diff = TextDiff::from_lines(old, new);
125 let mut changes = Vec::new();
126 let mut significant_changes = Vec::new();
127 let mut insertions = 0;
128 let mut deletions = 0;
129 let mut change_id = 0;
130
131 let mut old_line_num = 1usize;
132 let mut new_line_num = 1usize;
133
134 let mut pending_deletes: Vec<(String, usize)> = Vec::new();
136 let mut pending_inserts: Vec<(String, usize)> = Vec::new();
137
138 let ops: Vec<_> = text_diff.iter_all_changes().collect();
139
140 for change in ops.iter() {
141 match change.tag() {
142 ChangeTag::Equal => {
143 self.flush_pending_changes(
145 &mut pending_deletes,
146 &mut pending_inserts,
147 &mut changes,
148 &mut significant_changes,
149 &mut change_id,
150 &mut insertions,
151 &mut deletions,
152 );
153
154 let span = ChangeSpan::equal(change.value().trim_end_matches('\n'))
155 .with_lines(Some(old_line_num), Some(new_line_num));
156 changes.push(Change::single(change_id, span));
157 change_id += 1;
158 old_line_num += 1;
159 new_line_num += 1;
160 }
161 ChangeTag::Delete => {
162 pending_deletes.push((
163 change.value().trim_end_matches('\n').to_string(),
164 old_line_num,
165 ));
166 old_line_num += 1;
167 }
168 ChangeTag::Insert => {
169 pending_inserts.push((
170 change.value().trim_end_matches('\n').to_string(),
171 new_line_num,
172 ));
173 new_line_num += 1;
174 }
175 }
176 }
177
178 self.flush_pending_changes(
180 &mut pending_deletes,
181 &mut pending_inserts,
182 &mut changes,
183 &mut significant_changes,
184 &mut change_id,
185 &mut insertions,
186 &mut deletions,
187 );
188
189 let hunks = Self::compute_hunks(&significant_changes, &changes);
191
192 DiffResult {
193 changes,
194 significant_changes,
195 hunks,
196 insertions,
197 deletions,
198 }
199 }
200
201 fn compute_hunks(significant_changes: &[usize], changes: &[Change]) -> Vec<Hunk> {
204 const PROXIMITY_THRESHOLD: usize = 3;
205
206 let mut hunks = Vec::new();
207 if significant_changes.is_empty() {
208 return hunks;
209 }
210
211 let mut current_hunk_changes: Vec<usize> = Vec::new();
212 let mut current_hunk_old_start: Option<usize> = None;
213 let mut current_hunk_new_start: Option<usize> = None;
214 let mut last_old_line: Option<usize> = None;
215 let mut last_new_line: Option<usize> = None;
216 let mut current_insertions = 0;
217 let mut current_deletions = 0;
218 let mut hunk_id = 0;
219
220 for &change_id in significant_changes {
221 let change = match changes.iter().find(|c| c.id == change_id) {
222 Some(c) => c,
223 None => continue,
224 };
225
226 let (old_line, new_line) = change
228 .spans
229 .first()
230 .map(|s| (s.old_line, s.new_line))
231 .unwrap_or((None, None));
232
233 let is_close = match (last_old_line, last_new_line, old_line, new_line) {
235 (Some(lo), _, Some(co), _) => co.saturating_sub(lo) <= PROXIMITY_THRESHOLD,
236 (_, Some(ln), _, Some(cn)) => cn.saturating_sub(ln) <= PROXIMITY_THRESHOLD,
237 _ => current_hunk_changes.is_empty(), };
239
240 if is_close {
241 current_hunk_changes.push(change_id);
243 if current_hunk_old_start.is_none() {
244 current_hunk_old_start = old_line;
245 }
246 if current_hunk_new_start.is_none() {
247 current_hunk_new_start = new_line;
248 }
249 } else {
250 if !current_hunk_changes.is_empty() {
252 hunks.push(Hunk {
253 id: hunk_id,
254 change_ids: current_hunk_changes.clone(),
255 old_start: current_hunk_old_start,
256 new_start: current_hunk_new_start,
257 insertions: current_insertions,
258 deletions: current_deletions,
259 });
260 hunk_id += 1;
261 }
262
263 current_hunk_changes = vec![change_id];
265 current_hunk_old_start = old_line;
266 current_hunk_new_start = new_line;
267 current_insertions = 0;
268 current_deletions = 0;
269 }
270
271 if old_line.is_some() {
273 last_old_line = old_line;
274 }
275 if new_line.is_some() {
276 last_new_line = new_line;
277 }
278
279 for span in &change.spans {
281 match span.kind {
282 ChangeKind::Insert => current_insertions += 1,
283 ChangeKind::Delete => current_deletions += 1,
284 ChangeKind::Replace => {
285 current_insertions += 1;
286 current_deletions += 1;
287 }
288 ChangeKind::Equal => {}
289 }
290 }
291 }
292
293 if !current_hunk_changes.is_empty() {
295 hunks.push(Hunk {
296 id: hunk_id,
297 change_ids: current_hunk_changes,
298 old_start: current_hunk_old_start,
299 new_start: current_hunk_new_start,
300 insertions: current_insertions,
301 deletions: current_deletions,
302 });
303 }
304
305 hunks
306 }
307
308 #[allow(clippy::too_many_arguments)]
309 fn flush_pending_changes(
310 &self,
311 pending_deletes: &mut Vec<(String, usize)>,
312 pending_inserts: &mut Vec<(String, usize)>,
313 changes: &mut Vec<Change>,
314 significant_changes: &mut Vec<usize>,
315 change_id: &mut usize,
316 insertions: &mut usize,
317 deletions: &mut usize,
318 ) {
319 if pending_deletes.is_empty() && pending_inserts.is_empty() {
320 return;
321 }
322
323 if self.word_level && pending_deletes.len() == pending_inserts.len() {
325 for ((old_text, old_line), (new_text, new_line)) in
326 pending_deletes.iter().zip(pending_inserts.iter())
327 {
328 let spans = self.compute_word_diff(old_text, new_text, *old_line, *new_line);
329 let change = Change::new(*change_id, spans);
330 significant_changes.push(*change_id);
331 changes.push(change);
332 *change_id += 1;
333 *insertions += 1;
334 *deletions += 1;
335 }
336 } else {
337 for (text, line) in pending_deletes.iter() {
339 let span = ChangeSpan::delete(text.clone()).with_lines(Some(*line), None);
340 significant_changes.push(*change_id);
341 changes.push(Change::single(*change_id, span));
342 *change_id += 1;
343 *deletions += 1;
344 }
345 for (text, line) in pending_inserts.iter() {
346 let span = ChangeSpan::insert(text.clone()).with_lines(None, Some(*line));
347 significant_changes.push(*change_id);
348 changes.push(Change::single(*change_id, span));
349 *change_id += 1;
350 *insertions += 1;
351 }
352 }
353
354 pending_deletes.clear();
355 pending_inserts.clear();
356 }
357}
358
359fn tokenize_code(line: &str) -> Vec<String> {
362 let mut tokens = Vec::new();
363 let mut buf = String::new();
364 let mut in_word = false;
365
366 for ch in line.chars() {
367 let is_word = ch.is_alphanumeric() || ch == '_';
368 if is_word {
369 if !in_word {
370 if !buf.is_empty() {
371 tokens.push(std::mem::take(&mut buf));
372 }
373 in_word = true;
374 }
375 buf.push(ch);
376 } else {
377 if in_word {
378 if !buf.is_empty() {
379 tokens.push(std::mem::take(&mut buf));
380 }
381 in_word = false;
382 }
383 if ch.is_whitespace() {
384 if !buf.is_empty() && !buf.chars().all(char::is_whitespace) {
386 tokens.push(std::mem::take(&mut buf));
387 }
388 buf.push(ch);
389 } else {
390 if !buf.is_empty() {
392 tokens.push(std::mem::take(&mut buf));
393 }
394 tokens.push(ch.to_string());
395 }
396 }
397 }
398 if !buf.is_empty() {
399 tokens.push(buf);
400 }
401 tokens
402}
403
404impl DiffEngine {
405 fn compute_word_diff(
407 &self,
408 old: &str,
409 new: &str,
410 old_line: usize,
411 new_line: usize,
412 ) -> Vec<ChangeSpan> {
413 let old_tokens = tokenize_code(old);
414 let new_tokens = tokenize_code(new);
415 let old_refs: Vec<&str> = old_tokens.iter().map(|s| s.as_str()).collect();
416 let new_refs: Vec<&str> = new_tokens.iter().map(|s| s.as_str()).collect();
417 let word_diff = TextDiff::from_slices(&old_refs, &new_refs);
418 let mut spans = Vec::new();
419
420 for change in word_diff.iter_all_changes() {
421 let text = change.value().to_string();
422 let span = match change.tag() {
423 ChangeTag::Equal => ChangeSpan::equal(text),
424 ChangeTag::Delete => ChangeSpan::delete(text),
425 ChangeTag::Insert => ChangeSpan::insert(text),
426 }
427 .with_lines(Some(old_line), Some(new_line));
428 spans.push(span);
429 }
430
431 spans
432 }
433
434 pub fn diff_files(&self, old_path: &Path, new_path: &Path) -> Result<FileDiff, DiffError> {
436 let old_content = std::fs::read_to_string(old_path)?;
437 let new_content = std::fs::read_to_string(new_path)?;
438
439 let result = self.diff_strings(&old_content, &new_content);
440
441 Ok(FileDiff {
442 old_path: Some(old_path.to_string_lossy().to_string()),
443 new_path: Some(new_path.to_string_lossy().to_string()),
444 result,
445 })
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452
453 #[test]
454 fn test_simple_diff() {
455 let engine = DiffEngine::new();
456 let old = "foo\nbar\nbaz";
457 let new = "foo\nqux\nbaz";
458
459 let result = engine.diff_strings(old, new);
460
461 assert_eq!(result.insertions, 1);
462 assert_eq!(result.deletions, 1);
463 assert!(!result.significant_changes.is_empty());
464 }
465
466 #[test]
467 fn test_no_changes() {
468 let engine = DiffEngine::new();
469 let text = "foo\nbar\nbaz";
470
471 let result = engine.diff_strings(text, text);
472
473 assert_eq!(result.insertions, 0);
474 assert_eq!(result.deletions, 0);
475 assert!(result.significant_changes.is_empty());
476 }
477
478 #[test]
479 fn test_word_level_diff() {
480 let engine = DiffEngine::new().with_word_level(true);
481 let old = "const foo = 4";
482 let new = "const bar = 4";
483
484 let result = engine.diff_strings(old, new);
485
486 assert_eq!(result.significant_changes.len(), 1);
488 }
489
490 #[test]
491 fn test_tokenize_code_basic() {
492 let tokens = tokenize_code("KeyModifiers, MouseEventKind}");
493 assert_eq!(
494 tokens,
495 vec!["KeyModifiers", ",", " ", "MouseEventKind", "}"]
496 );
497 }
498
499 #[test]
500 fn test_tokenize_code_identifiers() {
501 let tokens = tokenize_code("foo_bar baz123");
502 assert_eq!(tokens, vec!["foo_bar", " ", "baz123"]);
503 }
504
505 #[test]
506 fn test_tokenize_code_punctuation() {
507 let tokens = tokenize_code("use foo::{A, B};");
508 assert_eq!(
509 tokens,
510 vec!["use", " ", "foo", ":", ":", "{", "A", ",", " ", "B", "}", ";"]
511 );
512 }
513
514 #[test]
515 fn test_word_diff_punctuation_separation() {
516 use crate::change::ChangeKind;
517
518 let engine = DiffEngine::new().with_word_level(true);
520 let old = "use foo::{KeyModifiers};";
521 let new = "use foo::{KeyModifiers, MouseEventKind};";
522
523 let result = engine.diff_strings(old, new);
524
525 assert_eq!(result.significant_changes.len(), 1);
527
528 let change = &result.changes[result.significant_changes[0]];
529
530 let equal_content: String = change
532 .spans
533 .iter()
534 .filter(|s| s.kind == ChangeKind::Equal)
535 .map(|s| s.text.as_str())
536 .collect();
537 let insert_content: String = change
538 .spans
539 .iter()
540 .filter(|s| s.kind == ChangeKind::Insert)
541 .map(|s| s.text.as_str())
542 .collect();
543
544 assert!(
546 equal_content.contains("KeyModifiers"),
547 "KeyModifiers should be equal, got equal: '{}', insert: '{}'",
548 equal_content,
549 insert_content
550 );
551
552 assert!(
554 insert_content.contains("MouseEventKind"),
555 "MouseEventKind should be inserted, got equal: '{}', insert: '{}'",
556 equal_content,
557 insert_content
558 );
559
560 assert!(
562 !insert_content.contains("KeyModifiers"),
563 "KeyModifiers should not be inserted, got insert: '{}'",
564 insert_content
565 );
566 }
567}