Skip to main content

grit_lib/
diff_indent_heuristic.rs

1//! Git-compatible diff hunk sliding (`xdl_change_compact`) including the indent heuristic.
2//!
3//! Ported from Git's `xdiff/xdiffi.c` (`xdl_change_compact`, `score_split`, etc.).
4
5use similar::{Algorithm, DiffOp, DiffTag, TextDiff};
6
7const MAX_INDENT: i32 = 200;
8const MAX_BLANKS: i32 = 20;
9const START_OF_FILE_PENALTY: i32 = 1;
10const END_OF_FILE_PENALTY: i32 = 21;
11const TOTAL_BLANK_WEIGHT: i32 = -30;
12const POST_BLANK_WEIGHT: i32 = 6;
13const RELATIVE_INDENT_PENALTY: i32 = -4;
14const RELATIVE_INDENT_WITH_BLANK_PENALTY: i32 = 10;
15const RELATIVE_OUTDENT_PENALTY: i32 = 24;
16const RELATIVE_OUTDENT_WITH_BLANK_PENALTY: i32 = 17;
17const RELATIVE_DEDENT_PENALTY: i32 = 23;
18const RELATIVE_DEDENT_WITH_BLANK_PENALTY: i32 = 17;
19const INDENT_WEIGHT: i32 = 60;
20const INDENT_HEURISTIC_MAX_SLIDING: isize = 100;
21
22#[derive(Clone)]
23struct XdFile<'a> {
24    lines: &'a [&'a str],
25    /// `changed[i]` matches Git's `xdf->changed`: length `nrec + 1`, last entry always false.
26    changed: Vec<bool>,
27}
28
29impl<'a> XdFile<'a> {
30    fn from_mask(lines: &'a [&'a str], mut changed: Vec<bool>) -> Self {
31        let n = lines.len();
32        changed.resize(n + 1, false);
33        if !changed.is_empty() {
34            changed[n] = false;
35        }
36        Self { lines, changed }
37    }
38
39    fn nrec(&self) -> usize {
40        self.lines.len()
41    }
42
43    fn changed_at(&self, i: isize) -> bool {
44        if i < 0 {
45            return false;
46        }
47        let u = i as usize;
48        if u >= self.changed.len() {
49            return false;
50        }
51        self.changed[u]
52    }
53
54    fn set_changed(&mut self, i: usize, v: bool) {
55        if i < self.changed.len() {
56            self.changed[i] = v;
57        }
58    }
59
60    fn into_changed(self) -> Vec<bool> {
61        let n = self.lines.len();
62        let mut c = self.changed;
63        c.truncate(n);
64        c
65    }
66}
67
68fn get_indent(line: &str) -> i32 {
69    let mut ret = 0_i32;
70    for c in line.chars() {
71        if c == ' ' {
72            ret += 1;
73        } else if c == '\t' {
74            ret += 8 - ret % 8;
75        } else if c.is_whitespace() {
76            // Git ignores other whitespace in indent (no advance)
77        } else {
78            return ret;
79        }
80        if ret >= MAX_INDENT {
81            return MAX_INDENT;
82        }
83    }
84    -1
85}
86
87#[derive(Default)]
88struct SplitMeasurement {
89    end_of_file: i32,
90    indent: i32,
91    pre_blank: i32,
92    pre_indent: i32,
93    post_blank: i32,
94    post_indent: i32,
95}
96
97fn measure_split(xdf: &XdFile<'_>, split: isize, m: &mut SplitMeasurement) {
98    let n = xdf.nrec() as isize;
99    if split >= n {
100        m.end_of_file = 1;
101        m.indent = -1;
102    } else {
103        m.end_of_file = 0;
104        m.indent = get_indent(xdf.lines[split as usize]);
105    }
106
107    m.pre_blank = 0;
108    m.pre_indent = -1;
109    let mut i = split - 1;
110    loop {
111        if i < 0 {
112            break;
113        }
114        m.pre_indent = get_indent(xdf.lines[i as usize]);
115        if m.pre_indent != -1 {
116            break;
117        }
118        m.pre_blank += 1;
119        if m.pre_blank == MAX_BLANKS {
120            m.pre_indent = 0;
121            break;
122        }
123        i -= 1;
124    }
125
126    m.post_blank = 0;
127    m.post_indent = -1;
128    i = split + 1;
129    loop {
130        if i >= n {
131            break;
132        }
133        m.post_indent = get_indent(xdf.lines[i as usize]);
134        if m.post_indent != -1 {
135            break;
136        }
137        m.post_blank += 1;
138        if m.post_blank == MAX_BLANKS {
139            m.post_indent = 0;
140            break;
141        }
142        i += 1;
143    }
144}
145
146#[derive(Default, Clone)]
147struct SplitScore {
148    effective_indent: i32,
149    penalty: i32,
150}
151
152fn score_add_split(m: &SplitMeasurement, s: &mut SplitScore) {
153    if m.pre_indent == -1 && m.pre_blank == 0 {
154        s.penalty += START_OF_FILE_PENALTY;
155    }
156
157    if m.end_of_file != 0 {
158        s.penalty += END_OF_FILE_PENALTY;
159    }
160
161    let post_blank = if m.indent == -1 { 1 + m.post_blank } else { 0 };
162    let total_blank = m.pre_blank + post_blank;
163
164    s.penalty += TOTAL_BLANK_WEIGHT * total_blank;
165    s.penalty += POST_BLANK_WEIGHT * post_blank;
166
167    let indent = if m.indent != -1 {
168        m.indent
169    } else {
170        m.post_indent
171    };
172
173    let any_blanks = total_blank != 0;
174
175    s.effective_indent += indent;
176
177    if indent == -1 {
178        // no-op
179    } else if m.pre_indent == -1 {
180        // no-op
181    } else if indent > m.pre_indent {
182        s.penalty += if any_blanks {
183            RELATIVE_INDENT_WITH_BLANK_PENALTY
184        } else {
185            RELATIVE_INDENT_PENALTY
186        };
187    } else if indent == m.pre_indent {
188        // no-op
189    } else if m.post_indent != -1 && m.post_indent > indent {
190        s.penalty += if any_blanks {
191            RELATIVE_OUTDENT_WITH_BLANK_PENALTY
192        } else {
193            RELATIVE_OUTDENT_PENALTY
194        };
195    } else {
196        s.penalty += if any_blanks {
197            RELATIVE_DEDENT_WITH_BLANK_PENALTY
198        } else {
199            RELATIVE_DEDENT_PENALTY
200        };
201    }
202}
203
204fn score_cmp(s1: &SplitScore, s2: &SplitScore) -> i32 {
205    let cmp_indents = (s1.effective_indent > s2.effective_indent) as i32
206        - (s1.effective_indent < s2.effective_indent) as i32;
207    INDENT_WEIGHT * cmp_indents + (s1.penalty - s2.penalty)
208}
209
210#[derive(Clone, Copy)]
211struct XdlGroup {
212    start: isize,
213    end: isize,
214}
215
216fn group_init(xdf: &XdFile<'_>, g: &mut XdlGroup) {
217    g.start = 0;
218    g.end = 0;
219    let n = xdf.nrec() as isize;
220    while g.end < n && xdf.changed_at(g.end) {
221        g.end += 1;
222    }
223}
224
225fn group_next(xdf: &XdFile<'_>, g: &mut XdlGroup) -> bool {
226    let n = xdf.nrec() as isize;
227    if g.end == n {
228        return false;
229    }
230    g.start = g.end + 1;
231    g.end = g.start;
232    while g.end < n && xdf.changed_at(g.end) {
233        g.end += 1;
234    }
235    true
236}
237
238fn group_previous(xdf: &XdFile<'_>, g: &mut XdlGroup) -> bool {
239    if g.start == 0 {
240        return false;
241    }
242    g.end = g.start - 1;
243    g.start = g.end;
244    while g.start > 0 && xdf.changed_at(g.start - 1) {
245        g.start -= 1;
246    }
247    true
248}
249
250fn recs_match(xdf: &XdFile<'_>, i: isize, j: isize) -> bool {
251    if i < 0 || j < 0 {
252        return false;
253    }
254    let ui = i as usize;
255    let uj = j as usize;
256    if ui >= xdf.nrec() || uj >= xdf.nrec() {
257        return false;
258    }
259    xdf.lines[ui] == xdf.lines[uj]
260}
261
262fn group_slide_down(xdf: &mut XdFile<'_>, g: &mut XdlGroup) -> bool {
263    let n = xdf.nrec() as isize;
264    if g.end < n && recs_match(xdf, g.start, g.end) {
265        xdf.set_changed(g.start as usize, false);
266        xdf.set_changed(g.end as usize, true);
267        g.start += 1;
268        g.end += 1;
269        while g.end < n && xdf.changed_at(g.end) {
270            g.end += 1;
271        }
272        return true;
273    }
274    false
275}
276
277fn group_slide_up(xdf: &mut XdFile<'_>, g: &mut XdlGroup) -> bool {
278    if g.start > 0 && recs_match(xdf, g.start - 1, g.end - 1) {
279        g.start -= 1;
280        g.end -= 1;
281        xdf.set_changed(g.start as usize, true);
282        xdf.set_changed(g.end as usize, false);
283        while g.start > 0 && xdf.changed_at(g.start - 1) {
284            g.start -= 1;
285        }
286        return true;
287    }
288    false
289}
290
291fn change_compact_one(xdf: &mut XdFile<'_>, xdfo: &mut XdFile<'_>, indent_heuristic: bool) {
292    let mut g = XdlGroup { start: 0, end: 0 };
293    let mut go = XdlGroup { start: 0, end: 0 };
294    group_init(xdf, &mut g);
295    group_init(xdfo, &mut go);
296
297    loop {
298        if g.end != g.start {
299            loop {
300                let groupsize = g.end - g.start;
301                let mut end_matching_other = -1_isize;
302
303                // Git's C uses `while (!group_slide_up)` with 0=success; here bool true=success.
304                while group_slide_up(xdf, &mut g) {
305                    let slid = group_previous(xdfo, &mut go);
306                    debug_assert!(slid, "group sync broken sliding up");
307                }
308
309                let earliest_end = g.end;
310                if go.end > go.start {
311                    end_matching_other = g.end;
312                }
313
314                loop {
315                    if !group_slide_down(xdf, &mut g) {
316                        break;
317                    }
318                    let slid = group_next(xdfo, &mut go);
319                    debug_assert!(slid, "group sync broken sliding down");
320                    if go.end > go.start {
321                        end_matching_other = g.end;
322                    }
323                }
324
325                if groupsize != g.end - g.start {
326                    continue;
327                }
328
329                if g.end == earliest_end {
330                    // no shifting was possible
331                } else if end_matching_other != -1 {
332                    while go.end == go.start {
333                        let slid = group_slide_up(xdf, &mut g);
334                        debug_assert!(slid, "match disappeared");
335                        let p = group_previous(xdfo, &mut go);
336                        debug_assert!(p, "group sync broken sliding to match");
337                    }
338                } else if indent_heuristic {
339                    apply_indent_heuristic_shift(
340                        xdf,
341                        xdfo,
342                        &mut g,
343                        &mut go,
344                        groupsize,
345                        earliest_end,
346                    );
347                }
348                break;
349            }
350        }
351
352        if !group_next(xdf, &mut g) {
353            break;
354        }
355        let advanced = group_next(xdfo, &mut go);
356        debug_assert!(advanced, "group sync broken at end of file");
357    }
358}
359
360fn apply_indent_heuristic_shift(
361    xdf: &mut XdFile<'_>,
362    xdfo: &mut XdFile<'_>,
363    g: &mut XdlGroup,
364    go: &mut XdlGroup,
365    groupsize: isize,
366    earliest_end: isize,
367) {
368    let mut shift = earliest_end;
369    if g.end - groupsize - 1 > shift {
370        shift = g.end - groupsize - 1;
371    }
372    if g.end - INDENT_HEURISTIC_MAX_SLIDING > shift {
373        shift = g.end - INDENT_HEURISTIC_MAX_SLIDING;
374    }
375
376    let mut best_shift: Option<isize> = None;
377    let mut best_score = SplitScore::default();
378
379    let mut s = shift;
380    while s <= g.end {
381        let mut m = SplitMeasurement::default();
382        let mut score = SplitScore::default();
383        measure_split(xdf, s, &mut m);
384        score_add_split(&m, &mut score);
385        measure_split(xdf, s - groupsize, &mut m);
386        score_add_split(&m, &mut score);
387
388        if best_shift.is_none() || score_cmp(&score, &best_score) <= 0 {
389            best_score.effective_indent = score.effective_indent;
390            best_score.penalty = score.penalty;
391            best_shift = Some(s);
392        }
393        s += 1;
394    }
395
396    let Some(best_shift) = best_shift else {
397        return;
398    };
399    while g.end > best_shift {
400        let _ = group_slide_up(xdf, g);
401        let _ = group_previous(xdfo, go);
402    }
403}
404
405fn change_compact_both_passes(
406    old_lines: &[&str],
407    new_lines: &[&str],
408    changed_old: &mut Vec<bool>,
409    changed_new: &mut Vec<bool>,
410    indent_heuristic: bool,
411) {
412    let c_old = std::mem::take(changed_old);
413    let c_new = std::mem::take(changed_new);
414    let mut xdf = XdFile::from_mask(old_lines, c_old);
415    let mut xdfo = XdFile::from_mask(new_lines, c_new);
416
417    change_compact_one(&mut xdf, &mut xdfo, indent_heuristic);
418    *changed_old = xdf.into_changed();
419    *changed_new = xdfo.into_changed();
420
421    let mut xdf2 = XdFile::from_mask(new_lines, std::mem::take(changed_new));
422    let mut xdfo2 = XdFile::from_mask(old_lines, std::mem::take(changed_old));
423    change_compact_one(&mut xdf2, &mut xdfo2, indent_heuristic);
424    *changed_new = xdf2.into_changed();
425    *changed_old = xdfo2.into_changed();
426}
427
428/// Initial `changed[]` masks from a line diff (Myers etc.), matching Git's xdf `changed` flags.
429fn masks_from_ops(ops: &[DiffOp], old_len: usize, new_len: usize) -> (Vec<bool>, Vec<bool>) {
430    let mut c1 = vec![false; old_len];
431    let mut c2 = vec![false; new_len];
432    for op in ops {
433        match op.tag() {
434            DiffTag::Equal => {}
435            DiffTag::Delete => {
436                for i in op.old_range() {
437                    if i < old_len {
438                        c1[i] = true;
439                    }
440                }
441            }
442            DiffTag::Insert => {
443                for j in op.new_range() {
444                    if j < new_len {
445                        c2[j] = true;
446                    }
447                }
448            }
449            DiffTag::Replace => {
450                for i in op.old_range() {
451                    if i < old_len {
452                        c1[i] = true;
453                    }
454                }
455                for j in op.new_range() {
456                    if j < new_len {
457                        c2[j] = true;
458                    }
459                }
460            }
461        }
462    }
463    (c1, c2)
464}
465
466/// Rebuild `DiffOp` list from compacted masks (Git `xdl_build_script`, reversed collect).
467fn ops_from_masks(old_lines: &[&str], new_lines: &[&str], c1: &[bool], c2: &[bool]) -> Vec<DiffOp> {
468    let n1 = old_lines.len() as isize;
469    let n2 = new_lines.len() as isize;
470    let mut out_rev: Vec<DiffOp> = Vec::new();
471
472    let changed1 = |i: isize| -> bool {
473        if i < 0 || i >= n1 {
474            return false;
475        }
476        c1[i as usize]
477    };
478    let changed2 = |i: isize| -> bool {
479        if i < 0 || i >= n2 {
480            return false;
481        }
482        c2[i as usize]
483    };
484
485    let mut i1 = n1;
486    let mut i2 = n2;
487    while i1 > 0 || i2 > 0 {
488        if changed1(i1 - 1) || changed2(i2 - 1) {
489            let l1 = i1;
490            let l2 = i2;
491            while i1 > 0 && changed1(i1 - 1) {
492                i1 -= 1;
493            }
494            while i2 > 0 && changed2(i2 - 1) {
495                i2 -= 1;
496            }
497            let chg1 = (l1 - i1) as usize;
498            let chg2 = (l2 - i2) as usize;
499            let i1u = i1 as usize;
500            let i2u = i2 as usize;
501
502            if chg1 > 0 && chg2 > 0 {
503                out_rev.push(DiffOp::Replace {
504                    old_index: i1u,
505                    old_len: chg1,
506                    new_index: i2u,
507                    new_len: chg2,
508                });
509            } else if chg1 > 0 {
510                out_rev.push(DiffOp::Delete {
511                    old_index: i1u,
512                    old_len: chg1,
513                    new_index: i2u,
514                });
515            } else if chg2 > 0 {
516                out_rev.push(DiffOp::Insert {
517                    old_index: i1u,
518                    new_index: i2u,
519                    new_len: chg2,
520                });
521            }
522        } else {
523            // equal line
524            i1 -= 1;
525            i2 -= 1;
526            out_rev.push(DiffOp::Equal {
527                old_index: i1 as usize,
528                new_index: i2 as usize,
529                len: 1,
530            });
531        }
532    }
533
534    out_rev.reverse();
535    merge_adjacent_equal_ops(out_rev)
536}
537
538fn merge_adjacent_equal_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
539    if ops.is_empty() {
540        return ops;
541    }
542    let mut merged: Vec<DiffOp> = Vec::with_capacity(ops.len());
543    for op in ops {
544        if let (
545            Some(DiffOp::Equal {
546                old_index: o0,
547                new_index: n0,
548                len: l0,
549            }),
550            DiffOp::Equal {
551                old_index: o1,
552                new_index: n1,
553                len: l1,
554            },
555        ) = (merged.last(), op)
556        {
557            if o0 + l0 == o1 && n0 + l0 == n1 {
558                let last = merged.len() - 1;
559                if let DiffOp::Equal { len, .. } = &mut merged[last] {
560                    *len += l1;
561                }
562                continue;
563            }
564        }
565        merged.push(op);
566    }
567    merged
568}
569
570/// Apply Git `xdl_change_compact` twice (old/new), optionally with `XDF_INDENT_HEURISTIC`.
571pub fn apply_change_compact_to_ops(
572    ops: &[DiffOp],
573    old_lines: &[&str],
574    new_lines: &[&str],
575    indent_heuristic: bool,
576) -> Vec<DiffOp> {
577    let (mut c1, mut c2) = masks_from_ops(ops, old_lines.len(), new_lines.len());
578    change_compact_both_passes(old_lines, new_lines, &mut c1, &mut c2, indent_heuristic);
579    ops_from_masks(old_lines, new_lines, &c1, &c2)
580}
581
582/// Line-diff then Git-style compaction; used by unified diff generation.
583pub fn diff_lines_ops_compacted(
584    old_content: &str,
585    new_content: &str,
586    algorithm: Algorithm,
587    indent_heuristic: bool,
588) -> Vec<DiffOp> {
589    let diff = TextDiff::configure()
590        .algorithm(algorithm)
591        .diff_lines(old_content, new_content);
592    let old_lines: Vec<&str> = old_content.lines().collect();
593    let new_lines: Vec<&str> = new_content.lines().collect();
594    let raw = diff.ops().to_vec();
595    apply_change_compact_to_ops(&raw, &old_lines, &new_lines, indent_heuristic)
596}
597
598/// Like [`diff_lines_ops_compacted`] but for pre-split line slices (e.g. `--no-index` compare keys).
599pub fn diff_slice_ops_compacted(
600    old_lines: &[&str],
601    new_lines: &[&str],
602    algorithm: Algorithm,
603    indent_heuristic: bool,
604) -> Vec<DiffOp> {
605    let diff = TextDiff::configure()
606        .algorithm(algorithm)
607        .diff_slices(old_lines, new_lines);
608    let raw = diff.ops().to_vec();
609    apply_change_compact_to_ops(&raw, old_lines, new_lines, indent_heuristic)
610}
611
612/// Map each line in `new` to its origin line in `old` (if any), matching [`similar::TextDiff::iter_all_changes`]
613/// semantics over `ops` (typically compacted ops).
614pub(crate) fn map_new_to_old_from_ops(ops: &[DiffOp], new_line_count: usize) -> Vec<Option<usize>> {
615    let mut result = vec![None; new_line_count];
616    let mut old_idx: usize = 0;
617    let mut new_idx: usize = 0;
618
619    for op in ops {
620        match op.tag() {
621            DiffTag::Equal => {
622                let len = op.new_range().len();
623                for k in 0..len {
624                    if new_idx + k < result.len() {
625                        result[new_idx + k] = Some(old_idx + k);
626                    }
627                }
628                old_idx += len;
629                new_idx += len;
630            }
631            DiffTag::Delete => {
632                old_idx += op.old_range().len();
633            }
634            DiffTag::Insert => {
635                new_idx += op.new_range().len();
636            }
637            DiffTag::Replace => {
638                old_idx += op.old_range().len();
639                new_idx += op.new_range().len();
640            }
641        }
642    }
643
644    result
645}