1use similar::{Algorithm, DiffOp, DiffTag, TextDiff};
6
7const MAX_INDENT: i32 = 200;
8const MAX_BLANKS: i32 = 20;
9const START_OF_FILE_PENALTY: i32 = 1;
10const END_OF_FILE_PENALTY: i32 = 21;
11const TOTAL_BLANK_WEIGHT: i32 = -30;
12const POST_BLANK_WEIGHT: i32 = 6;
13const RELATIVE_INDENT_PENALTY: i32 = -4;
14const RELATIVE_INDENT_WITH_BLANK_PENALTY: i32 = 10;
15const RELATIVE_OUTDENT_PENALTY: i32 = 24;
16const RELATIVE_OUTDENT_WITH_BLANK_PENALTY: i32 = 17;
17const RELATIVE_DEDENT_PENALTY: i32 = 23;
18const RELATIVE_DEDENT_WITH_BLANK_PENALTY: i32 = 17;
19const INDENT_WEIGHT: i32 = 60;
20const INDENT_HEURISTIC_MAX_SLIDING: isize = 100;
21
22#[derive(Clone)]
23struct XdFile<'a> {
24 lines: &'a [&'a str],
25 changed: Vec<bool>,
27}
28
29impl<'a> XdFile<'a> {
30 fn from_mask(lines: &'a [&'a str], mut changed: Vec<bool>) -> Self {
31 let n = lines.len();
32 changed.resize(n + 1, false);
33 if !changed.is_empty() {
34 changed[n] = false;
35 }
36 Self { lines, changed }
37 }
38
39 fn nrec(&self) -> usize {
40 self.lines.len()
41 }
42
43 fn changed_at(&self, i: isize) -> bool {
44 if i < 0 {
45 return false;
46 }
47 let u = i as usize;
48 if u >= self.changed.len() {
49 return false;
50 }
51 self.changed[u]
52 }
53
54 fn set_changed(&mut self, i: usize, v: bool) {
55 if i < self.changed.len() {
56 self.changed[i] = v;
57 }
58 }
59
60 fn into_changed(self) -> Vec<bool> {
61 let n = self.lines.len();
62 let mut c = self.changed;
63 c.truncate(n);
64 c
65 }
66}
67
68fn get_indent(line: &str) -> i32 {
69 let mut ret = 0_i32;
70 for c in line.chars() {
71 if c == ' ' {
72 ret += 1;
73 } else if c == '\t' {
74 ret += 8 - ret % 8;
75 } else if c.is_whitespace() {
76 } else {
78 return ret;
79 }
80 if ret >= MAX_INDENT {
81 return MAX_INDENT;
82 }
83 }
84 -1
85}
86
87#[derive(Default)]
88struct SplitMeasurement {
89 end_of_file: i32,
90 indent: i32,
91 pre_blank: i32,
92 pre_indent: i32,
93 post_blank: i32,
94 post_indent: i32,
95}
96
97fn measure_split(xdf: &XdFile<'_>, split: isize, m: &mut SplitMeasurement) {
98 let n = xdf.nrec() as isize;
99 if split >= n {
100 m.end_of_file = 1;
101 m.indent = -1;
102 } else {
103 m.end_of_file = 0;
104 m.indent = get_indent(xdf.lines[split as usize]);
105 }
106
107 m.pre_blank = 0;
108 m.pre_indent = -1;
109 let mut i = split - 1;
110 loop {
111 if i < 0 {
112 break;
113 }
114 m.pre_indent = get_indent(xdf.lines[i as usize]);
115 if m.pre_indent != -1 {
116 break;
117 }
118 m.pre_blank += 1;
119 if m.pre_blank == MAX_BLANKS {
120 m.pre_indent = 0;
121 break;
122 }
123 i -= 1;
124 }
125
126 m.post_blank = 0;
127 m.post_indent = -1;
128 i = split + 1;
129 loop {
130 if i >= n {
131 break;
132 }
133 m.post_indent = get_indent(xdf.lines[i as usize]);
134 if m.post_indent != -1 {
135 break;
136 }
137 m.post_blank += 1;
138 if m.post_blank == MAX_BLANKS {
139 m.post_indent = 0;
140 break;
141 }
142 i += 1;
143 }
144}
145
146#[derive(Default, Clone)]
147struct SplitScore {
148 effective_indent: i32,
149 penalty: i32,
150}
151
152fn score_add_split(m: &SplitMeasurement, s: &mut SplitScore) {
153 if m.pre_indent == -1 && m.pre_blank == 0 {
154 s.penalty += START_OF_FILE_PENALTY;
155 }
156
157 if m.end_of_file != 0 {
158 s.penalty += END_OF_FILE_PENALTY;
159 }
160
161 let post_blank = if m.indent == -1 { 1 + m.post_blank } else { 0 };
162 let total_blank = m.pre_blank + post_blank;
163
164 s.penalty += TOTAL_BLANK_WEIGHT * total_blank;
165 s.penalty += POST_BLANK_WEIGHT * post_blank;
166
167 let indent = if m.indent != -1 {
168 m.indent
169 } else {
170 m.post_indent
171 };
172
173 let any_blanks = total_blank != 0;
174
175 s.effective_indent += indent;
176
177 if indent == -1 {
178 } else if m.pre_indent == -1 {
180 } else if indent > m.pre_indent {
182 s.penalty += if any_blanks {
183 RELATIVE_INDENT_WITH_BLANK_PENALTY
184 } else {
185 RELATIVE_INDENT_PENALTY
186 };
187 } else if indent == m.pre_indent {
188 } else if m.post_indent != -1 && m.post_indent > indent {
190 s.penalty += if any_blanks {
191 RELATIVE_OUTDENT_WITH_BLANK_PENALTY
192 } else {
193 RELATIVE_OUTDENT_PENALTY
194 };
195 } else {
196 s.penalty += if any_blanks {
197 RELATIVE_DEDENT_WITH_BLANK_PENALTY
198 } else {
199 RELATIVE_DEDENT_PENALTY
200 };
201 }
202}
203
204fn score_cmp(s1: &SplitScore, s2: &SplitScore) -> i32 {
205 let cmp_indents = (s1.effective_indent > s2.effective_indent) as i32
206 - (s1.effective_indent < s2.effective_indent) as i32;
207 INDENT_WEIGHT * cmp_indents + (s1.penalty - s2.penalty)
208}
209
210#[derive(Clone, Copy)]
211struct XdlGroup {
212 start: isize,
213 end: isize,
214}
215
216fn group_init(xdf: &XdFile<'_>, g: &mut XdlGroup) {
217 g.start = 0;
218 g.end = 0;
219 let n = xdf.nrec() as isize;
220 while g.end < n && xdf.changed_at(g.end) {
221 g.end += 1;
222 }
223}
224
225fn group_next(xdf: &XdFile<'_>, g: &mut XdlGroup) -> bool {
226 let n = xdf.nrec() as isize;
227 if g.end == n {
228 return false;
229 }
230 g.start = g.end + 1;
231 g.end = g.start;
232 while g.end < n && xdf.changed_at(g.end) {
233 g.end += 1;
234 }
235 true
236}
237
238fn group_previous(xdf: &XdFile<'_>, g: &mut XdlGroup) -> bool {
239 if g.start == 0 {
240 return false;
241 }
242 g.end = g.start - 1;
243 g.start = g.end;
244 while g.start > 0 && xdf.changed_at(g.start - 1) {
245 g.start -= 1;
246 }
247 true
248}
249
250fn recs_match(xdf: &XdFile<'_>, i: isize, j: isize) -> bool {
251 if i < 0 || j < 0 {
252 return false;
253 }
254 let ui = i as usize;
255 let uj = j as usize;
256 if ui >= xdf.nrec() || uj >= xdf.nrec() {
257 return false;
258 }
259 xdf.lines[ui] == xdf.lines[uj]
260}
261
262fn group_slide_down(xdf: &mut XdFile<'_>, g: &mut XdlGroup) -> bool {
263 let n = xdf.nrec() as isize;
264 if g.end < n && recs_match(xdf, g.start, g.end) {
265 xdf.set_changed(g.start as usize, false);
266 xdf.set_changed(g.end as usize, true);
267 g.start += 1;
268 g.end += 1;
269 while g.end < n && xdf.changed_at(g.end) {
270 g.end += 1;
271 }
272 return true;
273 }
274 false
275}
276
277fn group_slide_up(xdf: &mut XdFile<'_>, g: &mut XdlGroup) -> bool {
278 if g.start > 0 && recs_match(xdf, g.start - 1, g.end - 1) {
279 g.start -= 1;
280 g.end -= 1;
281 xdf.set_changed(g.start as usize, true);
282 xdf.set_changed(g.end as usize, false);
283 while g.start > 0 && xdf.changed_at(g.start - 1) {
284 g.start -= 1;
285 }
286 return true;
287 }
288 false
289}
290
291fn change_compact_one(xdf: &mut XdFile<'_>, xdfo: &mut XdFile<'_>, indent_heuristic: bool) {
292 let mut g = XdlGroup { start: 0, end: 0 };
293 let mut go = XdlGroup { start: 0, end: 0 };
294 group_init(xdf, &mut g);
295 group_init(xdfo, &mut go);
296
297 loop {
298 if g.end != g.start {
299 loop {
300 let groupsize = g.end - g.start;
301 let mut end_matching_other = -1_isize;
302
303 while group_slide_up(xdf, &mut g) {
305 let slid = group_previous(xdfo, &mut go);
306 debug_assert!(slid, "group sync broken sliding up");
307 }
308
309 let earliest_end = g.end;
310 if go.end > go.start {
311 end_matching_other = g.end;
312 }
313
314 loop {
315 if !group_slide_down(xdf, &mut g) {
316 break;
317 }
318 let slid = group_next(xdfo, &mut go);
319 debug_assert!(slid, "group sync broken sliding down");
320 if go.end > go.start {
321 end_matching_other = g.end;
322 }
323 }
324
325 if groupsize != g.end - g.start {
326 continue;
327 }
328
329 if g.end == earliest_end {
330 } else if end_matching_other != -1 {
332 while go.end == go.start {
333 let slid = group_slide_up(xdf, &mut g);
334 debug_assert!(slid, "match disappeared");
335 let p = group_previous(xdfo, &mut go);
336 debug_assert!(p, "group sync broken sliding to match");
337 }
338 } else if indent_heuristic {
339 apply_indent_heuristic_shift(
340 xdf,
341 xdfo,
342 &mut g,
343 &mut go,
344 groupsize,
345 earliest_end,
346 );
347 }
348 break;
349 }
350 }
351
352 if !group_next(xdf, &mut g) {
353 break;
354 }
355 let advanced = group_next(xdfo, &mut go);
356 debug_assert!(advanced, "group sync broken at end of file");
357 }
358}
359
360fn apply_indent_heuristic_shift(
361 xdf: &mut XdFile<'_>,
362 xdfo: &mut XdFile<'_>,
363 g: &mut XdlGroup,
364 go: &mut XdlGroup,
365 groupsize: isize,
366 earliest_end: isize,
367) {
368 let mut shift = earliest_end;
369 if g.end - groupsize - 1 > shift {
370 shift = g.end - groupsize - 1;
371 }
372 if g.end - INDENT_HEURISTIC_MAX_SLIDING > shift {
373 shift = g.end - INDENT_HEURISTIC_MAX_SLIDING;
374 }
375
376 let mut best_shift: Option<isize> = None;
377 let mut best_score = SplitScore::default();
378
379 let mut s = shift;
380 while s <= g.end {
381 let mut m = SplitMeasurement::default();
382 let mut score = SplitScore::default();
383 measure_split(xdf, s, &mut m);
384 score_add_split(&m, &mut score);
385 measure_split(xdf, s - groupsize, &mut m);
386 score_add_split(&m, &mut score);
387
388 if best_shift.is_none() || score_cmp(&score, &best_score) <= 0 {
389 best_score.effective_indent = score.effective_indent;
390 best_score.penalty = score.penalty;
391 best_shift = Some(s);
392 }
393 s += 1;
394 }
395
396 let Some(best_shift) = best_shift else {
397 return;
398 };
399 while g.end > best_shift {
400 let _ = group_slide_up(xdf, g);
401 let _ = group_previous(xdfo, go);
402 }
403}
404
405fn change_compact_both_passes(
406 old_lines: &[&str],
407 new_lines: &[&str],
408 changed_old: &mut Vec<bool>,
409 changed_new: &mut Vec<bool>,
410 indent_heuristic: bool,
411) {
412 let c_old = std::mem::take(changed_old);
413 let c_new = std::mem::take(changed_new);
414 let mut xdf = XdFile::from_mask(old_lines, c_old);
415 let mut xdfo = XdFile::from_mask(new_lines, c_new);
416
417 change_compact_one(&mut xdf, &mut xdfo, indent_heuristic);
418 *changed_old = xdf.into_changed();
419 *changed_new = xdfo.into_changed();
420
421 let mut xdf2 = XdFile::from_mask(new_lines, std::mem::take(changed_new));
422 let mut xdfo2 = XdFile::from_mask(old_lines, std::mem::take(changed_old));
423 change_compact_one(&mut xdf2, &mut xdfo2, indent_heuristic);
424 *changed_new = xdf2.into_changed();
425 *changed_old = xdfo2.into_changed();
426}
427
428fn masks_from_ops(ops: &[DiffOp], old_len: usize, new_len: usize) -> (Vec<bool>, Vec<bool>) {
430 let mut c1 = vec![false; old_len];
431 let mut c2 = vec![false; new_len];
432 for op in ops {
433 match op.tag() {
434 DiffTag::Equal => {}
435 DiffTag::Delete => {
436 for i in op.old_range() {
437 if i < old_len {
438 c1[i] = true;
439 }
440 }
441 }
442 DiffTag::Insert => {
443 for j in op.new_range() {
444 if j < new_len {
445 c2[j] = true;
446 }
447 }
448 }
449 DiffTag::Replace => {
450 for i in op.old_range() {
451 if i < old_len {
452 c1[i] = true;
453 }
454 }
455 for j in op.new_range() {
456 if j < new_len {
457 c2[j] = true;
458 }
459 }
460 }
461 }
462 }
463 (c1, c2)
464}
465
466fn ops_from_masks(old_lines: &[&str], new_lines: &[&str], c1: &[bool], c2: &[bool]) -> Vec<DiffOp> {
468 let n1 = old_lines.len() as isize;
469 let n2 = new_lines.len() as isize;
470 let mut out_rev: Vec<DiffOp> = Vec::new();
471
472 let changed1 = |i: isize| -> bool {
473 if i < 0 || i >= n1 {
474 return false;
475 }
476 c1[i as usize]
477 };
478 let changed2 = |i: isize| -> bool {
479 if i < 0 || i >= n2 {
480 return false;
481 }
482 c2[i as usize]
483 };
484
485 let mut i1 = n1;
486 let mut i2 = n2;
487 while i1 > 0 || i2 > 0 {
488 if changed1(i1 - 1) || changed2(i2 - 1) {
489 let l1 = i1;
490 let l2 = i2;
491 while i1 > 0 && changed1(i1 - 1) {
492 i1 -= 1;
493 }
494 while i2 > 0 && changed2(i2 - 1) {
495 i2 -= 1;
496 }
497 let chg1 = (l1 - i1) as usize;
498 let chg2 = (l2 - i2) as usize;
499 let i1u = i1 as usize;
500 let i2u = i2 as usize;
501
502 if chg1 > 0 && chg2 > 0 {
503 out_rev.push(DiffOp::Replace {
504 old_index: i1u,
505 old_len: chg1,
506 new_index: i2u,
507 new_len: chg2,
508 });
509 } else if chg1 > 0 {
510 out_rev.push(DiffOp::Delete {
511 old_index: i1u,
512 old_len: chg1,
513 new_index: i2u,
514 });
515 } else if chg2 > 0 {
516 out_rev.push(DiffOp::Insert {
517 old_index: i1u,
518 new_index: i2u,
519 new_len: chg2,
520 });
521 }
522 } else {
523 i1 -= 1;
525 i2 -= 1;
526 out_rev.push(DiffOp::Equal {
527 old_index: i1 as usize,
528 new_index: i2 as usize,
529 len: 1,
530 });
531 }
532 }
533
534 out_rev.reverse();
535 merge_adjacent_equal_ops(out_rev)
536}
537
538fn merge_adjacent_equal_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
539 if ops.is_empty() {
540 return ops;
541 }
542 let mut merged: Vec<DiffOp> = Vec::with_capacity(ops.len());
543 for op in ops {
544 if let (
545 Some(DiffOp::Equal {
546 old_index: o0,
547 new_index: n0,
548 len: l0,
549 }),
550 DiffOp::Equal {
551 old_index: o1,
552 new_index: n1,
553 len: l1,
554 },
555 ) = (merged.last(), op)
556 {
557 if o0 + l0 == o1 && n0 + l0 == n1 {
558 let last = merged.len() - 1;
559 if let DiffOp::Equal { len, .. } = &mut merged[last] {
560 *len += l1;
561 }
562 continue;
563 }
564 }
565 merged.push(op);
566 }
567 merged
568}
569
570pub fn apply_change_compact_to_ops(
572 ops: &[DiffOp],
573 old_lines: &[&str],
574 new_lines: &[&str],
575 indent_heuristic: bool,
576) -> Vec<DiffOp> {
577 let (mut c1, mut c2) = masks_from_ops(ops, old_lines.len(), new_lines.len());
578 change_compact_both_passes(old_lines, new_lines, &mut c1, &mut c2, indent_heuristic);
579 ops_from_masks(old_lines, new_lines, &c1, &c2)
580}
581
582pub fn diff_lines_ops_compacted(
584 old_content: &str,
585 new_content: &str,
586 algorithm: Algorithm,
587 indent_heuristic: bool,
588) -> Vec<DiffOp> {
589 let diff = TextDiff::configure()
590 .algorithm(algorithm)
591 .diff_lines(old_content, new_content);
592 let old_lines: Vec<&str> = old_content.lines().collect();
593 let new_lines: Vec<&str> = new_content.lines().collect();
594 let raw = diff.ops().to_vec();
595 apply_change_compact_to_ops(&raw, &old_lines, &new_lines, indent_heuristic)
596}
597
598pub fn diff_slice_ops_compacted(
600 old_lines: &[&str],
601 new_lines: &[&str],
602 algorithm: Algorithm,
603 indent_heuristic: bool,
604) -> Vec<DiffOp> {
605 let diff = TextDiff::configure()
606 .algorithm(algorithm)
607 .diff_slices(old_lines, new_lines);
608 let raw = diff.ops().to_vec();
609 apply_change_compact_to_ops(&raw, old_lines, new_lines, indent_heuristic)
610}
611
612pub(crate) fn map_new_to_old_from_ops(ops: &[DiffOp], new_line_count: usize) -> Vec<Option<usize>> {
615 let mut result = vec![None; new_line_count];
616 let mut old_idx: usize = 0;
617 let mut new_idx: usize = 0;
618
619 for op in ops {
620 match op.tag() {
621 DiffTag::Equal => {
622 let len = op.new_range().len();
623 for k in 0..len {
624 if new_idx + k < result.len() {
625 result[new_idx + k] = Some(old_idx + k);
626 }
627 }
628 old_idx += len;
629 new_idx += len;
630 }
631 DiffTag::Delete => {
632 old_idx += op.old_range().len();
633 }
634 DiffTag::Insert => {
635 new_idx += op.new_range().len();
636 }
637 DiffTag::Replace => {
638 old_idx += op.old_range().len();
639 new_idx += op.new_range().len();
640 }
641 }
642 }
643
644 result
645}