1use tracing::warn;
2
3use crate::differ::{Change, DiffAlgorithm};
4use crate::{Differ, Patch};
5use std::cmp::{max, min};
6
7use super::{create_patch, handle_empty_files, process_changes_to_chunks};
8
9const XDL_MAX_COST_MIN: usize = 256;
11const XDL_HEUR_MIN_COST: usize = 256;
12const XDL_SNAKE_CNT: usize = 20;
13const XDL_K_HEUR: usize = 4;
14const NEG_ONE: isize = -1;
16const LINE_MAX: isize = isize::MAX / 2; #[derive(Clone, Copy)]
21struct AlgoEnv {
22 mxcost: usize,
23 snake_cnt: usize,
24 heur_min: usize,
25 need_min: bool,
26}
27
28#[derive(Clone, Copy, Debug)]
30struct SplitPoint {
31 old_idx: usize, new_idx: usize, min_lo: bool, min_hi: bool, }
36
37pub struct XDiffDiffer<'a> {
39 differ: &'a Differ,
40}
41
42impl<'a> XDiffDiffer<'a> {
43 pub fn new(differ: &'a Differ) -> Self {
45 Self { differ }
46 }
47
48 fn xdiff(&self, old_lines: &[&str], new_lines: &[&str]) -> Vec<Change> {
50 let old_len = old_lines.len();
51 let new_len = new_lines.len();
52
53 let old_hash: Vec<u64> = old_lines.iter().map(|&line| self.hash_line(line)).collect();
55 let new_hash: Vec<u64> = new_lines.iter().map(|&line| self.hash_line(line)).collect();
56
57 let mut old_changes = vec![false; old_len];
61 let mut new_changes = vec![false; new_len];
62
63 let ndiags = old_len + new_len + 3;
65 let k_vec_size = 2 * ndiags + 2; let mut kvd = vec![0isize; k_vec_size]; let k_offset = new_len + 1; let approx_sqrt = (ndiags as f64).sqrt() as usize;
77 let mxcost = max(approx_sqrt, XDL_MAX_COST_MIN);
78 let env = AlgoEnv {
79 mxcost,
80 snake_cnt: XDL_SNAKE_CNT,
81 heur_min: XDL_HEUR_MIN_COST,
82 need_min: false,
86 };
87
88 let result = self.compare_recursive(
90 &old_hash,
91 &mut old_changes,
92 0,
93 old_len,
94 &new_hash,
95 &mut new_changes,
96 0,
97 new_len,
98 &mut kvd,
99 k_offset,
100 ndiags,
101 env,
102 );
103
104 if result.is_err() {
105 warn!("XDiff algorithm failed.");
107 return vec![];
108 }
109
110 self.build_script(&old_changes, &new_changes, old_len, new_len)
112 }
113
114 #[allow(clippy::too_many_arguments)]
117 fn compare_recursive(
118 &self,
119 old_hash: &[u64],
120 old_changes: &mut [bool],
121 mut old_start: usize,
122 mut old_end: usize,
123 new_hash: &[u64],
124 new_changes: &mut [bool],
125 mut new_start: usize,
126 mut new_end: usize,
127 kvd: &mut [isize], k_offset: usize, ndiags: usize, env: AlgoEnv,
131 ) -> Result<(), ()> {
132 while old_start < old_end
134 && new_start < new_end
135 && old_hash[old_start] == new_hash[new_start]
136 {
137 old_start += 1;
138 new_start += 1;
139 }
140 while old_start < old_end
142 && new_start < new_end
143 && old_hash[old_end - 1] == new_hash[new_end - 1]
144 {
145 old_end -= 1;
146 new_end -= 1;
147 }
148
149 if old_start == old_end {
151 if new_start < new_end {
152 new_changes[new_start..new_end]
154 .iter_mut()
155 .for_each(|c| *c = true);
156 }
157 return Ok(());
158 } else if new_start == new_end {
159 if old_start < old_end {
160 old_changes[old_start..old_end]
162 .iter_mut()
163 .for_each(|c| *c = true);
164 }
165 return Ok(());
166 }
167
168 let (kvdf_slice, kvdb_slice) = kvd.split_at_mut(ndiags);
170 let split_result = self.find_split_point(
171 old_hash, old_start, old_end, new_hash, new_start, new_end, kvdf_slice, kvdb_slice,
172 k_offset, env,
173 );
174
175 match split_result {
176 Ok(split) => {
177 let env_lo = AlgoEnv {
180 need_min: split.min_lo,
181 ..env
182 };
183 let env_hi = AlgoEnv {
184 need_min: split.min_hi,
185 ..env
186 };
187
188 self.compare_recursive(
189 old_hash,
190 old_changes,
191 old_start,
192 split.old_idx,
193 new_hash,
194 new_changes,
195 new_start,
196 split.new_idx,
197 kvd, k_offset,
199 ndiags,
200 env_lo,
201 )?;
202
203 self.compare_recursive(
204 old_hash,
205 old_changes,
206 split.old_idx,
207 old_end,
208 new_hash,
209 new_changes,
210 split.new_idx,
211 new_end,
212 kvd, k_offset,
214 ndiags,
215 env_hi,
216 )?;
217
218 Ok(())
219 }
220 Err(_) => {
221 Err(())
224 }
225 }
226 }
227
228 #[allow(clippy::too_many_arguments)]
231 fn find_split_point(
232 &self,
233 old_hash: &[u64],
234 old_start: usize,
235 old_end: usize,
236 new_hash: &[u64],
237 new_start: usize,
238 new_end: usize,
239 kvdf: &mut [isize], kvdb: &mut [isize], k_offset: usize, env: AlgoEnv,
243 ) -> Result<SplitPoint, ()> {
244 let old_start_i = old_start as isize;
246 let old_end_i = old_end as isize;
247 let new_start_i = new_start as isize;
248 let new_end_i = new_end as isize;
249
250 let dmin: isize = old_start_i - new_end_i;
252 let dmax: isize = old_end_i - new_start_i;
253 let fmid: isize = old_start_i - new_start_i;
254 let bmid: isize = old_end_i - new_end_i;
255 let odd: bool = (fmid - bmid) % 2 != 0;
256
257 let mut fmin: isize = fmid;
259 let mut fmax: isize = fmid;
260 let mut bmin: isize = bmid;
261 let mut bmax: isize = bmid;
262
263 kvdf[(fmid + k_offset as isize) as usize] = old_start_i;
266 kvdb[(bmid + k_offset as isize) as usize] = old_end_i;
267
268 kvdf[(fmid - 1 + k_offset as isize) as usize] = NEG_ONE;
270 kvdf[(fmid + 1 + k_offset as isize) as usize] = NEG_ONE;
271 kvdb[(bmid - 1 + k_offset as isize) as usize] = LINE_MAX;
272 kvdb[(bmid + 1 + k_offset as isize) as usize] = LINE_MAX;
273
274 for ec in 1.. {
275 let mut got_snake = false;
277
278 if fmin > dmin {
281 fmin -= 1;
282 kvdf[(fmin - 1 + k_offset as isize) as usize] = NEG_ONE; } else {
284 fmin += 1;
285 }
286 if fmax < dmax {
287 fmax += 1;
288 kvdf[(fmax + 1 + k_offset as isize) as usize] = NEG_ONE; } else {
290 fmax -= 1;
291 }
292
293 for d in (fmin..=fmax).rev().step_by(2) {
295 let k_idx = (d + k_offset as isize) as usize;
296 let km1_idx = (d - 1 + k_offset as isize) as usize;
297 let kp1_idx = (d + 1 + k_offset as isize) as usize;
298
299 let mut i1: isize = if kvdf[km1_idx] >= kvdf[kp1_idx] { kvdf[km1_idx] + 1 } else { kvdf[kp1_idx] };
301 let prev_i1 = i1;
302 let mut i2: isize = i1 - d; while i1 < old_end_i
306 && i2 < new_end_i
307 && old_hash[i1 as usize] == new_hash[i2 as usize]
308 {
309 i1 += 1;
310 i2 += 1;
311 }
312
313 if (i1 - prev_i1) as usize > env.snake_cnt {
314 got_snake = true;
315 }
316 kvdf[k_idx] = i1;
317
318 if odd && d >= bmin && d <= bmax {
320 let bk_idx = (d + k_offset as isize) as usize;
321 if kvdb[bk_idx] <= i1 {
322 return Ok(SplitPoint {
323 old_idx: i1 as usize,
324 new_idx: i2 as usize,
325 min_lo: true,
326 min_hi: true,
327 });
328 }
329 }
330 }
331
332 if bmin > dmin {
335 bmin -= 1;
336 kvdb[(bmin - 1 + k_offset as isize) as usize] = LINE_MAX;
337 } else {
338 bmin += 1;
339 }
340 if bmax < dmax {
341 bmax += 1;
342 kvdb[(bmax + 1 + k_offset as isize) as usize] = LINE_MAX;
343 } else {
344 bmax -= 1;
345 }
346
347 for d in (bmin..=bmax).rev().step_by(2) {
349 let k_idx = (d + k_offset as isize) as usize;
350 let km1_idx = (d - 1 + k_offset as isize) as usize;
351 let kp1_idx = (d + 1 + k_offset as isize) as usize;
352
353 let mut i1: isize = if kvdb[km1_idx] < kvdb[kp1_idx] { kvdb[km1_idx] } else { kvdb[kp1_idx] - 1 };
355 let prev_i1 = i1;
356 let mut i2: isize = i1 - d; while i1 > old_start_i
360 && i2 > new_start_i
361 && old_hash[(i1 - 1) as usize] == new_hash[(i2 - 1) as usize]
362 {
363 i1 -= 1;
364 i2 -= 1;
365 }
366
367 if (prev_i1 - i1) as usize > env.snake_cnt {
368 got_snake = true;
369 }
370 kvdb[k_idx] = i1;
371
372 if !odd && d >= fmin && d <= fmax {
374 let fk_idx = (d + k_offset as isize) as usize;
375 if i1 <= kvdf[fk_idx] {
376 return Ok(SplitPoint {
377 old_idx: i1 as usize,
378 new_idx: i2 as usize,
379 min_lo: true,
380 min_hi: true,
381 });
382 }
383 }
384 }
385
386 if !env.need_min {
388 if got_snake && ec > env.heur_min {
390 let mut best_v: isize = 0;
391 let mut best_split: Option<SplitPoint> = None;
392
393 for d in (fmin..=fmax).rev().step_by(2) {
395 let dd = (d - fmid).abs(); let i1 = kvdf[(d + k_offset as isize) as usize];
397 let i2 = i1 - d;
398 let v = (i1 - old_start_i) + (i2 - new_start_i) - dd; if v > (XDL_K_HEUR * ec) as isize
401 && v > best_v
402 && old_start_i + env.snake_cnt as isize <= i1
403 && i1 < old_end_i
404 && new_start_i + env.snake_cnt as isize <= i2
405 && i2 < new_end_i
406 {
407 let mut is_snake = true;
409 for k in 1..=env.snake_cnt {
410 if i1 < k as isize
411 || i2 < k as isize
412 || old_hash[(i1 - k as isize) as usize]
413 != new_hash[(i2 - k as isize) as usize]
414 {
415 is_snake = false;
416 break;
417 }
418 }
419 if is_snake {
420 best_v = v;
421 best_split = Some(SplitPoint {
422 old_idx: i1 as usize,
423 new_idx: i2 as usize,
424 min_lo: true,
425 min_hi: false,
426 });
427 }
428 }
429 }
430 if let Some(split) = best_split {
431 return Ok(split);
432 }
433
434 best_v = 0; best_split = None;
437 for d in (bmin..=bmax).rev().step_by(2) {
438 let dd = (d - bmid).abs();
439 let i1 = kvdb[(d + k_offset as isize) as usize];
440 let i2 = i1 - d;
441 let v = (old_end_i - i1) + (new_end_i - i2) - dd;
442
443 if v > (XDL_K_HEUR * ec) as isize
444 && v > best_v
445 && old_start_i < i1
446 && i1 <= old_end_i - env.snake_cnt as isize
447 && new_start_i < i2
448 && i2 <= new_end_i - env.snake_cnt as isize
449 {
450 let mut is_snake = true;
452 for k in 0..env.snake_cnt {
453 if i1 + k as isize >= old_end_i
454 || i2 + k as isize >= new_end_i
455 || old_hash[(i1 + k as isize) as usize]
456 != new_hash[(i2 + k as isize) as usize]
457 {
458 is_snake = false;
459 break;
460 }
461 }
462 if is_snake {
463 best_v = v;
464 best_split = Some(SplitPoint {
465 old_idx: i1 as usize,
466 new_idx: i2 as usize,
467 min_lo: false,
468 min_hi: true,
469 });
470 }
471 }
472 }
473 if let Some(split) = best_split {
474 return Ok(split);
475 }
476 }
477
478 if ec >= env.mxcost {
480 let mut fbest_val = -1;
481 let mut fbest_i1 = NEG_ONE;
482
483 for d in (fmin..=fmax).rev().step_by(2) {
484 let mut i1 = min(kvdf[(d + k_offset as isize) as usize], old_end_i);
485 let mut i2 = i1 - d;
486 if i2 > new_end_i {
487 i1 = new_end_i + d;
489 i2 = new_end_i;
490 }
491 if fbest_val < i1 + i2 {
492 fbest_val = i1 + i2;
493 fbest_i1 = i1;
494 }
495 }
496
497 let mut bbest_val = LINE_MAX;
498 let mut bbest_i1 = LINE_MAX;
499
500 for d in (bmin..=bmax).rev().step_by(2) {
501 let mut i1 = max(old_start_i, kvdb[(d + k_offset as isize) as usize]);
502 let mut i2 = i1 - d;
503 if i2 < new_start_i {
504 i1 = new_start_i + d;
506 i2 = new_start_i;
507 }
508 if i1 + i2 < bbest_val {
509 bbest_val = i1 + i2;
510 bbest_i1 = i1;
511 }
512 }
513
514 if (old_end_i + new_end_i - bbest_val)
516 < (fbest_val - (old_start_i + new_start_i))
517 {
518 return Ok(SplitPoint {
520 old_idx: fbest_i1 as usize,
521 new_idx: (fbest_val - fbest_i1) as usize,
522 min_lo: true,
523 min_hi: false,
524 });
525 } else {
526 return Ok(SplitPoint {
528 old_idx: bbest_i1 as usize,
529 new_idx: (bbest_val - bbest_i1) as usize,
530 min_lo: false,
531 min_hi: true,
532 });
533 }
534 }
535 }
536 else if env.need_min && ec >= env.mxcost {
538 warn!("XDiff: Max cost reached in minimal mode without finding overlap.");
541 return Err(()); }
543 } warn!("XDiff: find_split_point loop exited unexpectedly.");
548 Err(())
549 }
550
551 fn hash_line(&self, line: &str) -> u64 {
553 let mut hash: u64 = 0xcbf29ce484222325;
554 for byte in line.bytes() {
555 hash ^= byte as u64;
556 hash = hash.wrapping_mul(0x100000001b3);
557 }
558 hash
559 }
560
561 fn build_script(
564 &self,
565 old_changes: &[bool],
566 new_changes: &[bool],
567 old_len: usize,
568 new_len: usize,
569 ) -> Vec<Change> {
570 let mut changes = Vec::new();
571 let mut i1 = 0; let mut i2 = 0; while i1 < old_len || i2 < new_len {
575 if i1 < old_len && i2 < new_len && !old_changes[i1] && !new_changes[i2] {
576 let start_i1 = i1;
578 let start_i2 = i2;
579 while i1 < old_len && i2 < new_len && !old_changes[i1] && !new_changes[i2] {
580 i1 += 1;
584 i2 += 1;
585 }
586 for k in 0..(i1 - start_i1) {
588 changes.push(Change::Equal(start_i1 + k, start_i2 + k));
589 }
590 } else {
591 let start_del = i1;
593 while i1 < old_len && old_changes[i1] {
594 i1 += 1;
595 }
596 if i1 > start_del {
597 changes.push(Change::Delete(start_del, i1 - start_del));
598 }
599
600 let start_ins = i2;
602 while i2 < new_len && new_changes[i2] {
603 i2 += 1;
604 }
605 if i2 > start_ins {
606 changes.push(Change::Insert(start_ins, i2 - start_ins));
607 }
608
609 if i1 == start_del && i2 == start_ins {
613 if i1 >= old_len && i2 >= new_len {
617 break;
618 }
619 let mut advanced = false;
624 if i1 < old_len && !old_changes[i1] {
625 i1 += 1;
626 advanced = true;
627 }
628 if i2 < new_len && !new_changes[i2] {
629 i2 += 1;
630 advanced = true;
631 }
632 if !advanced {
634 warn!("XDiff build_script stuck on unmarked changes.");
635 break;
636 }
637 }
638 }
639 }
640 changes
645 }
646
647 }
651
652impl DiffAlgorithm for XDiffDiffer<'_> {
653 fn generate(&self) -> Patch {
655 let old_lines: Vec<&str> = self.differ.old.lines().collect();
656 let new_lines: Vec<&str> = self.differ.new.lines().collect();
657
658 if let Some(patch) = handle_empty_files(&old_lines, &new_lines) {
660 return patch;
661 }
662
663 let changes = self.xdiff(&old_lines, &new_lines);
665
666 let chunks =
668 process_changes_to_chunks(&changes, &old_lines, &new_lines, self.differ.context_lines);
669
670 create_patch(chunks)
672 }
673}
674
675#[cfg(test)]
676mod tests {
677 use super::*;
678 use crate::{PatchAlgorithm, Patcher, differ::DiffAlgorithmType, test_utils::load_fixture};
679
680 #[test]
684 fn test_simple_xdiff() {
685 let old = "line1\\nline2\\nline3";
686 let new = "line1\\nline2\\nline3";
687
688 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
689 let xdiff = XDiffDiffer::new(&differ);
690 let patch = xdiff.generate();
691
692 assert!(
695 patch.chunks.is_empty(),
696 "Patch should be empty for identical files"
697 );
698 let result = Patcher::new(patch).apply(old, false).unwrap();
699 assert_eq!(result, old); }
701
702 #[test]
703 fn test_xdiff_add_line() {
704 let old = "line1\\nline2\\nline3";
705 let new = "line1\\nline2\\nline3\\nline4";
706
707 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
708 let xdiff = XDiffDiffer::new(&differ);
709 let patch = xdiff.generate();
710
711 assert_eq!(patch.chunks.len(), 1);
712 let result = Patcher::new(patch).apply(old, false).unwrap();
713 assert_eq!(result, new);
714 }
715
716 #[test]
717 fn test_xdiff_remove_line() {
718 let old = "line1\\nline2\\nline3";
719 let new = "line1\\nline3";
720
721 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
722 let xdiff = XDiffDiffer::new(&differ);
723 let patch = xdiff.generate();
724
725 assert_eq!(patch.chunks.len(), 1);
726 let result = Patcher::new(patch).apply(old, false).unwrap();
727 assert_eq!(result, new);
728 }
729
730 #[test]
731 fn test_xdiff_complex_changes() {
732 let old = "line1\\nline2\\nline3\\nline4\\nline5";
733 let new = "line1\\nmodified\\nline3\\nadded\\nline5";
734
735 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
736 let xdiff = XDiffDiffer::new(&differ);
737 let patch = xdiff.generate();
738
739 assert!(!patch.chunks.is_empty());
740 let result = Patcher::new(patch).apply(old, false).unwrap();
741 assert_eq!(result, new);
742 }
743
744 #[test]
745 fn test_xdiff_trailing_newline_change() {
746 let old = "a\\nb\\nc";
747 let new = "a\\nb\\nc\\n";
748 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
749 let patch = differ.generate();
750 let result = Patcher::new(patch).apply(old, false).unwrap();
751 assert_eq!(result, new);
752
753 let old = "a\\nb\\nc\\n";
754 let new = "a\\nb\\nc";
755 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
756 let patch = differ.generate();
757 let result = Patcher::new(patch).apply(old, false).unwrap();
758 assert_eq!(result, new);
759 }
760
761 #[test]
762 fn test_xdiff_leading_change() {
763 let old = "a\\nb\\nc";
764 let new = "x\\na\\nb\\nc";
765 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
766 let patch = differ.generate();
767 let result = Patcher::new(patch).apply(old, false).unwrap();
768 assert_eq!(result, new);
769 }
770
771 #[test]
772 fn test_xdiff_middle_change() {
773 let old = "a\\nb\\nc\\nd";
774 let new = "a\\nx\\ny\\nd";
775 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
776 let patch = differ.generate();
777 let result = Patcher::new(patch).apply(old, false).unwrap();
778 assert_eq!(result, new);
779 }
780
781 #[test]
782 fn test_empty_to_non_empty() {
783 let old = "";
784 let new = "line1\\nline2";
785 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
786 let patch = differ.generate();
787 assert_eq!(patch.chunks.len(), 1);
788 let result = Patcher::new(patch).apply(old, false).unwrap();
789 assert_eq!(result, new);
790 }
791
792 #[test]
793 fn test_non_empty_to_empty() {
794 let old = "line1\\nline2";
795 let new = "";
796 let differ = Differ::new_with_algorithm(old, new, DiffAlgorithmType::XDiff);
797 let patch = differ.generate();
798 assert_eq!(patch.chunks.len(), 1);
799 let result = Patcher::new(patch).apply(old, false).unwrap();
800 assert_eq!(result, new);
801 }
802
803 #[test]
805 fn test_xdiff_fixture_simple() {
806 let old = load_fixture("simple_before.rs");
807 let new = load_fixture("simple_after.rs");
808 let differ = Differ::new_with_algorithm(&old, &new, DiffAlgorithmType::XDiff);
809 let xdiff = XDiffDiffer::new(&differ);
810 let patch = xdiff.generate();
811 let result = Patcher::new(patch).apply(&old, false).unwrap();
812 assert_eq!(result, new);
813 }
814
815 #[test]
816 fn test_xdiff_fixture_python() {
817 let old = load_fixture("old.py");
818 let new = load_fixture("new.py");
819 let differ = Differ::new_with_algorithm(&old, &new, DiffAlgorithmType::XDiff);
820 let xdiff = XDiffDiffer::new(&differ);
821 let patch = xdiff.generate();
822 let result = Patcher::new(patch).apply(&old, false).unwrap();
823 assert_eq!(result, new);
824 }
825
826 #[test]
827 fn test_xdiff_fixture_complex() {
828 let old = load_fixture("complex_before.rs");
829 let new = load_fixture("complex_after.rs");
830 let differ = Differ::new_with_algorithm(&old, &new, DiffAlgorithmType::XDiff);
831 let xdiff = XDiffDiffer::new(&differ);
832 let patch = xdiff.generate();
833 let result = Patcher::new(patch).apply(&old, false).unwrap();
834 assert_eq!(result, new);
835 }
836}