Skip to main content

grit_lib/
combined_diff_patch.rs

1//! Git-style combined merge diff hunks (`diff --cc` / `diff --combined`).
2//!
3//! This approximates Git's `combine-diff.c` behaviour: per-parent Myers diffs against
4//! the merge result, lost-line lists with LCS coalescing, dense (`--cc`) hunk
5//! suppression, and `@@@` headers.
6
7use similar::{capture_diff_slices, Algorithm, DiffOp};
8
9/// Whitespace handling for combined diffs (Git `xdl_opts` subset).
10#[derive(Debug, Clone, Copy, Default)]
11pub struct CombinedDiffWsOptions {
12    pub ignore_all_space: bool,
13    pub ignore_space_change: bool,
14    pub ignore_space_at_eol: bool,
15    pub ignore_cr_at_eol: bool,
16}
17
18impl CombinedDiffWsOptions {
19    #[must_use]
20    pub fn any(self) -> bool {
21        self.ignore_all_space
22            || self.ignore_space_change
23            || self.ignore_space_at_eol
24            || self.ignore_cr_at_eol
25    }
26}
27
28#[derive(Clone)]
29struct LostSeg {
30    text: String,
31    parent_map: u32,
32}
33
34fn strip_trailing_cr(s: &str, ignore: bool) -> &str {
35    if ignore && s.ends_with('\r') {
36        &s[..s.len().saturating_sub(1)]
37    } else {
38        s
39    }
40}
41
42fn line_key(line: &str, ws: CombinedDiffWsOptions) -> String {
43    let s = strip_trailing_cr(line, ws.ignore_cr_at_eol);
44    if ws.ignore_all_space {
45        s.chars().filter(|c| !c.is_whitespace()).collect()
46    } else if ws.ignore_space_change {
47        s.split_whitespace().collect::<Vec<_>>().join(" ")
48    } else if ws.ignore_space_at_eol {
49        s.trim_end_matches(|c: char| c.is_whitespace()).to_string()
50    } else {
51        s.to_string()
52    }
53}
54
55fn lines_match(a: &str, b: &str, ws: CombinedDiffWsOptions) -> bool {
56    line_key(a, ws) == line_key(b, ws)
57}
58
59/// Coalesce `incoming` lost segments into `base` using LCS (Git `coalesce_lines`).
60fn coalesce_lost(
61    base: Vec<LostSeg>,
62    incoming: Vec<LostSeg>,
63    parent_bit: u32,
64    ws: CombinedDiffWsOptions,
65) -> Vec<LostSeg> {
66    if incoming.is_empty() {
67        return base;
68    }
69    if base.is_empty() {
70        return incoming;
71    }
72    let ob = base.len();
73    let nw = incoming.len();
74    #[derive(Clone, Copy)]
75    enum Dir {
76        Match,
77        Base,
78        New,
79    }
80    let mut lcs = vec![vec![0usize; nw + 1]; ob + 1];
81    let mut dir = vec![vec![Dir::Base; nw + 1]; ob + 1];
82    for j in 1..=nw {
83        dir[0][j] = Dir::New;
84    }
85    for i in 1..=ob {
86        dir[i][0] = Dir::Base;
87    }
88    for i in 1..=ob {
89        for j in 1..=nw {
90            if lines_match(&base[i - 1].text, &incoming[j - 1].text, ws) {
91                lcs[i][j] = lcs[i - 1][j - 1] + 1;
92                dir[i][j] = Dir::Match;
93            } else if lcs[i][j - 1] >= lcs[i - 1][j] {
94                lcs[i][j] = lcs[i][j - 1];
95                dir[i][j] = Dir::New;
96            } else {
97                lcs[i][j] = lcs[i - 1][j];
98                dir[i][j] = Dir::Base;
99            }
100        }
101    }
102    let mut out: Vec<LostSeg> = Vec::new();
103    let mut i = ob;
104    let mut j = nw;
105    while i > 0 || j > 0 {
106        match dir[i][j] {
107            Dir::Match => {
108                let mut seg = base[i - 1].clone();
109                seg.parent_map |= parent_bit;
110                out.push(seg);
111                i -= 1;
112                j -= 1;
113            }
114            Dir::New => {
115                out.push(incoming[j - 1].clone());
116                j -= 1;
117            }
118            Dir::Base => {
119                out.push(base[i - 1].clone());
120                i -= 1;
121            }
122        }
123    }
124    out.reverse();
125    out
126}
127
128struct Sline {
129    /// Lost lines shown before this result line (`-` / `--` rows).
130    lost: Vec<LostSeg>,
131    /// Pending lost lines for this result line before coalescing.
132    plost: Vec<LostSeg>,
133    /// Bitmask: parent P had this result line unchanged.
134    flag: u32,
135    bol: String,
136    p_lno: Vec<u32>,
137}
138
139fn split_lines_with_incomplete(text: &str) -> (Vec<String>, usize) {
140    if text.is_empty() {
141        return (Vec::new(), 0);
142    }
143    let lines: Vec<String> = text.lines().map(str::to_owned).collect();
144    let cnt = lines.len();
145    (lines, cnt)
146}
147
148fn combine_one_parent(
149    slines: &mut [Sline],
150    cnt: usize,
151    parent_lines: &[String],
152    n: usize,
153    _num_parent: usize,
154    ws: CombinedDiffWsOptions,
155) {
156    let nmask = 1u32 << n;
157    let old_keys: Vec<String> = parent_lines.iter().map(|l| line_key(l, ws)).collect();
158    let new_keys: Vec<String> = slines[..cnt].iter().map(|s| line_key(&s.bol, ws)).collect();
159    let ops = capture_diff_slices(Algorithm::Myers, &old_keys, &new_keys);
160
161    // Group consecutive non-Equal ops into hunks, mirroring xdiff's hunk emission.
162    // Within each hunk we replicate combine-diff.c's consume_hunk()/consume_line():
163    // the per-hunk "lost bucket" is chosen from the result-side hunk start, and all
164    // '-' lines of the hunk are appended to that one bucket in order.
165    let mut idx = 0usize;
166    while idx < ops.len() {
167        if matches!(ops[idx], DiffOp::Equal { .. }) {
168            idx += 1;
169            continue;
170        }
171        // Collect a maximal run of change ops as one hunk.
172        let start = idx;
173        let mut del_lines: Vec<usize> = Vec::new();
174        let mut ins_lines: Vec<usize> = Vec::new();
175        // new-begin (nb) and new-count (nn): result-side coordinates of the hunk.
176        let mut nb: Option<usize> = None;
177        let mut nn = 0usize;
178        while idx < ops.len() && !matches!(ops[idx], DiffOp::Equal { .. }) {
179            match ops[idx] {
180                DiffOp::Delete {
181                    old_index,
182                    old_len,
183                    new_index,
184                    ..
185                } => {
186                    if nb.is_none() {
187                        nb = Some(new_index);
188                    }
189                    for k in 0..old_len {
190                        del_lines.push(old_index + k);
191                    }
192                }
193                DiffOp::Insert {
194                    old_index: _,
195                    new_index,
196                    new_len,
197                } => {
198                    if nb.is_none() {
199                        nb = Some(new_index);
200                    }
201                    for k in 0..new_len {
202                        ins_lines.push(new_index + k);
203                        nn += 1;
204                    }
205                }
206                DiffOp::Replace {
207                    old_index,
208                    old_len,
209                    new_index,
210                    new_len,
211                } => {
212                    if nb.is_none() {
213                        nb = Some(new_index);
214                    }
215                    for k in 0..old_len {
216                        del_lines.push(old_index + k);
217                    }
218                    for k in 0..new_len {
219                        ins_lines.push(new_index + k);
220                        nn += 1;
221                    }
222                }
223                DiffOp::Equal { .. } => unreachable!(),
224            }
225            idx += 1;
226        }
227        let _ = start;
228        // nb here is 0-based (result index where the hunk's first change sits).
229        // Git's consume_hunk uses 1-based nb: bucket = sline[nb-1] normally, or
230        // sline[nb] when the hunk adds nothing to the result (nn == 0). With our
231        // 0-based nb, that translates to: bucket = nb (nn == 0) or nb (since
232        // sline[(nb+1)-1] == sline[nb]). Concretely git's nb_1based = nb0 + 1.
233        let nb0 = nb.unwrap_or(0);
234        let bucket = if nn == 0 {
235            // Lost lines hang on the result line *after* the removed span.
236            (nb0).min(cnt)
237        } else {
238            // Lost lines hang on the first result line of the hunk.
239            nb0.min(cnt)
240        };
241        let bucket = bucket.min(cnt);
242        for &oi in &del_lines {
243            slines[bucket].plost.push(LostSeg {
244                text: parent_lines[oi].clone(),
245                parent_map: nmask,
246            });
247        }
248        for &ni in &ins_lines {
249            if ni < cnt {
250                slines[ni].flag |= nmask;
251            }
252        }
253    }
254
255    let mut p_lno = 1u32;
256    for lno in 0..=cnt {
257        slines[lno].p_lno[n] = p_lno;
258        if !slines[lno].plost.is_empty() {
259            let incoming = std::mem::take(&mut slines[lno].plost);
260            slines[lno].lost =
261                coalesce_lost(std::mem::take(&mut slines[lno].lost), incoming, nmask, ws);
262        }
263        for seg in &slines[lno].lost {
264            if seg.parent_map & nmask != 0 {
265                p_lno = p_lno.saturating_add(1);
266            }
267        }
268        if lno < cnt && slines[lno].flag & nmask == 0 {
269            p_lno = p_lno.saturating_add(1);
270        }
271    }
272    // Trailer: p_lno[cnt + 1] is the end line number, read when a hunk extends to
273    // the end of the file (matches Git's `sline[cnt + 1].p_lno[n] = p_lno`).
274    if let Some(trailer) = slines.get_mut(cnt + 1) {
275        trailer.p_lno[n] = p_lno;
276    }
277}
278
279fn interesting(s: &Sline, all_mask: u32) -> bool {
280    (s.flag & all_mask) != 0 || !s.lost.is_empty()
281}
282
283fn find_next(slines: &[Sline], mark: u32, mut i: usize, cnt: usize, want_unmarked: bool) -> usize {
284    while i <= cnt {
285        let marked = slines[i].flag & mark != 0;
286        if want_unmarked != marked {
287            return i;
288        }
289        i += 1;
290    }
291    cnt + 1
292}
293
294fn adjust_hunk_tail(slines: &[Sline], all_mask: u32, hunk_begin: usize, mut i: usize) -> usize {
295    if hunk_begin < i && slines[i - 1].flag & all_mask == 0 {
296        i -= 1;
297    }
298    i
299}
300
301fn give_context(slines: &mut [Sline], cnt: usize, num_parent: usize, context: usize) {
302    let all_mask = (1u32 << num_parent) - 1;
303    let mark = 1u32 << num_parent;
304    let no_pre_delete = 2u32 << num_parent;
305
306    let mut i = find_next(slines, mark, 0, cnt, false);
307    if cnt < i {
308        return;
309    }
310
311    while i <= cnt {
312        let mut j = i.saturating_sub(context);
313        while j < i {
314            if slines[j].flag & mark == 0 {
315                slines[j].flag |= no_pre_delete;
316            }
317            slines[j].flag |= mark;
318            j += 1;
319        }
320
321        loop {
322            j = find_next(slines, mark, i, cnt, true);
323            if cnt < j {
324                return;
325            }
326            let k = find_next(slines, mark, j, cnt, false);
327            let j_adj = adjust_hunk_tail(slines, all_mask, i, j);
328
329            if k < j_adj + context {
330                let mut t = j;
331                while t < k {
332                    slines[t].flag |= mark;
333                    t += 1;
334                }
335                i = k;
336                continue;
337            }
338
339            i = k;
340            let k_end = (j + context).min(cnt + 1);
341            let mut t = j;
342            while t < k_end {
343                slines[t].flag |= mark;
344                t += 1;
345            }
346            break;
347        }
348    }
349}
350
351fn make_hunks(slines: &mut [Sline], cnt: usize, num_parent: usize, dense: bool, context: usize) {
352    let all_mask = (1u32 << num_parent) - 1;
353    let mark = 1u32 << num_parent;
354
355    for i in 0..=cnt {
356        if interesting(&slines[i], all_mask) {
357            slines[i].flag |= mark;
358        } else {
359            slines[i].flag &= !mark;
360        }
361    }
362
363    if dense {
364        let mut i = 0usize;
365        while i <= cnt {
366            while i <= cnt && slines[i].flag & mark == 0 {
367                i += 1;
368            }
369            if cnt < i {
370                break;
371            }
372            let hunk_begin = i;
373            let mut j = i + 1;
374            while j <= cnt {
375                if slines[j].flag & mark == 0 {
376                    let j_adj = adjust_hunk_tail(slines, all_mask, hunk_begin, j);
377                    let la = (j_adj + context).min(cnt + 1);
378                    let mut contin = false;
379                    let mut la2 = la;
380                    while la2 > j {
381                        la2 -= 1;
382                        if slines[la2].flag & mark != 0 {
383                            contin = true;
384                            break;
385                        }
386                    }
387                    if !contin {
388                        break;
389                    }
390                    j = la2;
391                }
392                j += 1;
393            }
394            let hunk_end = j;
395
396            let mut same_diff = 0u32;
397            let mut has_interesting = false;
398            let mut jj = hunk_begin;
399            while jj < hunk_end && !has_interesting {
400                let mut this_diff = slines[jj].flag & all_mask;
401                if this_diff != 0 {
402                    if same_diff == 0 {
403                        same_diff = this_diff;
404                    } else if same_diff != this_diff {
405                        has_interesting = true;
406                        break;
407                    }
408                }
409                let ll_iter = slines[jj].lost.iter();
410                for seg in ll_iter {
411                    if has_interesting {
412                        break;
413                    }
414                    this_diff = seg.parent_map;
415                    if same_diff == 0 {
416                        same_diff = this_diff;
417                    } else if same_diff != this_diff {
418                        has_interesting = true;
419                    }
420                }
421                jj += 1;
422            }
423            if !has_interesting && same_diff != 0 && same_diff != all_mask {
424                for k in hunk_begin..hunk_end {
425                    slines[k].flag &= !mark;
426                }
427            }
428            i = hunk_end;
429        }
430    }
431
432    give_context(slines, cnt, num_parent, context);
433}
434
435fn show_parent_lno(slines: &[Sline], l0: usize, l1: usize, n: usize, null_ctx: u32) -> String {
436    let a = slines[l0].p_lno[n];
437    let b = slines[l1].p_lno[n];
438    format!(" -{a},{}", b.saturating_sub(a).saturating_sub(null_ctx))
439}
440
441fn dump_slines(slines: &[Sline], cnt: usize, num_parent: usize, context: usize) -> String {
442    let mark = 1u32 << num_parent;
443    let no_pre_delete = 2u32 << num_parent;
444    let mut out = String::new();
445    let mut lno = 0usize;
446
447    loop {
448        while lno <= cnt && slines[lno].flag & mark == 0 {
449            lno += 1;
450        }
451        if cnt < lno {
452            break;
453        }
454        let h_start = lno;
455        let mut h_end = lno + 1;
456        while h_end <= cnt && slines[h_end].flag & mark != 0 {
457            h_end += 1;
458        }
459        let mut rlines = h_end - h_start;
460        if cnt < h_end {
461            rlines = rlines.saturating_sub(1);
462        }
463        let mut null_ctx = 0u32;
464        if context == 0 {
465            for j in h_start..h_end {
466                if slines[j].flag & (mark - 1) == 0 {
467                    null_ctx = null_ctx.saturating_add(1);
468                }
469            }
470            rlines = rlines.saturating_sub(null_ctx as usize);
471        }
472
473        // Git emits `num_parent + 1` `@` characters on each side of a combined hunk
474        // header (e.g. `@@@` for a two-parent merge).
475        for _ in 0..=num_parent {
476            out.push('@');
477        }
478        for n in 0..num_parent {
479            out.push_str(&show_parent_lno(slines, h_start, h_end, n, null_ctx));
480        }
481        out.push_str(&format!(" +{},{} ", h_start + 1, rlines));
482        for _ in 0..=num_parent {
483            out.push('@');
484        }
485        out.push('\n');
486
487        while lno < h_end {
488            let sl = &slines[lno];
489            lno += 1;
490            let show_lost = if sl.flag & no_pre_delete == 0 {
491                sl.lost.as_slice()
492            } else {
493                &[][..]
494            };
495            // Each combined line is prefixed with exactly `num_parent` columns
496            // (one per parent), like Git's dump_sline(); there is no extra leading
497            // result-marker column.
498            for seg in show_lost {
499                for j in 0..num_parent {
500                    if seg.parent_map & (1u32 << j) != 0 {
501                        out.push('-');
502                    } else {
503                        out.push(' ');
504                    }
505                }
506                out.push_str(&seg.text);
507                out.push('\n');
508            }
509            if cnt < lno {
510                break;
511            }
512            let sl = &slines[lno - 1];
513            if sl.flag & (mark - 1) == 0 && context == 0 {
514                continue;
515            }
516            let mut p_mask = 1u32;
517            for _ in 0..num_parent {
518                if p_mask & sl.flag != 0 {
519                    out.push('+');
520                } else {
521                    out.push(' ');
522                }
523                p_mask <<= 1;
524            }
525            out.push_str(&sl.bol);
526            out.push('\n');
527        }
528    }
529    out
530}
531
532fn reuse_parent(slines: &mut [Sline], cnt: usize, i: usize, j: usize) {
533    let im = 1u32 << i;
534    let jm = 1u32 << j;
535    for lno in 0..=cnt {
536        for seg in &mut slines[lno].lost {
537            if seg.parent_map & jm != 0 {
538                seg.parent_map |= im;
539            }
540        }
541        if slines[lno].flag & jm != 0 {
542            slines[lno].flag |= im;
543        }
544        slines[lno].p_lno[i] = slines[lno].p_lno[j];
545    }
546    // Mirror the trailer (sline[cnt + 1]) so an EOF-spanning hunk reports the right
547    // end line number for the reused parent.
548    if let Some(trailer) = slines.get_mut(cnt + 1) {
549        trailer.p_lno[i] = trailer.p_lno[j];
550    }
551}
552
553/// Combined diff body: `@@@` hunks and parent/result lines (no `diff --cc` header).
554#[must_use]
555pub fn format_combined_diff_body(
556    parent_texts: &[String],
557    result_text: &str,
558    context: usize,
559    dense: bool,
560    ws: CombinedDiffWsOptions,
561) -> String {
562    let num_parent = parent_texts.len();
563    if num_parent == 0 {
564        return String::new();
565    }
566
567    let (res_lines, cnt) = split_lines_with_incomplete(result_text);
568    // An empty result with non-empty parents still produces a combined diff that
569    // shows every parent line as a deletion (e.g. a merge that empties a file).
570    // Only bail out when there is genuinely nothing on either side.
571    if cnt == 0 && result_text.is_empty() && parent_texts.iter().all(String::is_empty) {
572        return String::new();
573    }
574
575    let mut slines: Vec<Sline> = (0..=cnt + 1)
576        .map(|idx| Sline {
577            lost: Vec::new(),
578            plost: Vec::new(),
579            flag: 0,
580            bol: if idx < cnt {
581                res_lines[idx].clone()
582            } else {
583                String::new()
584            },
585            p_lno: vec![0; num_parent],
586        })
587        .collect();
588
589    let parents: Vec<Vec<String>> = parent_texts
590        .iter()
591        .map(|p| p.lines().map(str::to_owned).collect())
592        .collect();
593
594    for i in 0..num_parent {
595        let mut reused = false;
596        for j in 0..i {
597            if parents[i] == parents[j] {
598                reuse_parent(&mut slines, cnt, i, j);
599                reused = true;
600                break;
601            }
602        }
603        if !reused {
604            combine_one_parent(&mut slines, cnt, &parents[i], i, num_parent, ws);
605        }
606    }
607
608    make_hunks(&mut slines, cnt, num_parent, dense, context);
609    dump_slines(&slines, cnt, num_parent, context)
610}