Skip to main content

grit_lib/
combined_diff_patch.rs

1//! Git-style combined merge diff hunks (`diff --cc` / `diff --combined`).
2//!
3//! This approximates Git's `combine-diff.c` behaviour: per-parent Myers diffs against
4//! the merge result, lost-line lists with LCS coalescing, dense (`--cc`) hunk
5//! suppression, and `@@@` headers.
6
7use similar::{capture_diff_slices, Algorithm, DiffOp};
8
9/// Whitespace handling for combined diffs (Git `xdl_opts` subset).
10#[derive(Debug, Clone, Copy, Default)]
11pub struct CombinedDiffWsOptions {
12    pub ignore_all_space: bool,
13    pub ignore_space_change: bool,
14    pub ignore_space_at_eol: bool,
15    pub ignore_cr_at_eol: bool,
16}
17
18impl CombinedDiffWsOptions {
19    #[must_use]
20    pub fn any(self) -> bool {
21        self.ignore_all_space
22            || self.ignore_space_change
23            || self.ignore_space_at_eol
24            || self.ignore_cr_at_eol
25    }
26}
27
28#[derive(Clone)]
29struct LostSeg {
30    text: String,
31    parent_map: u32,
32}
33
34fn strip_trailing_cr(s: &str, ignore: bool) -> &str {
35    if ignore && s.ends_with('\r') {
36        &s[..s.len().saturating_sub(1)]
37    } else {
38        s
39    }
40}
41
42fn line_key(line: &str, ws: CombinedDiffWsOptions) -> String {
43    let s = strip_trailing_cr(line, ws.ignore_cr_at_eol);
44    if ws.ignore_all_space {
45        s.chars().filter(|c| !c.is_whitespace()).collect()
46    } else if ws.ignore_space_change {
47        s.split_whitespace().collect::<Vec<_>>().join(" ")
48    } else if ws.ignore_space_at_eol {
49        s.trim_end_matches(|c: char| c.is_whitespace()).to_string()
50    } else {
51        s.to_string()
52    }
53}
54
55fn lines_match(a: &str, b: &str, ws: CombinedDiffWsOptions) -> bool {
56    line_key(a, ws) == line_key(b, ws)
57}
58
59/// Coalesce `incoming` lost segments into `base` using LCS (Git `coalesce_lines`).
60fn coalesce_lost(
61    base: Vec<LostSeg>,
62    incoming: Vec<LostSeg>,
63    parent_bit: u32,
64    ws: CombinedDiffWsOptions,
65) -> Vec<LostSeg> {
66    if incoming.is_empty() {
67        return base;
68    }
69    if base.is_empty() {
70        return incoming;
71    }
72    let ob = base.len();
73    let nw = incoming.len();
74    #[derive(Clone, Copy)]
75    enum Dir {
76        Match,
77        Base,
78        New,
79    }
80    let mut lcs = vec![vec![0usize; nw + 1]; ob + 1];
81    let mut dir = vec![vec![Dir::Base; nw + 1]; ob + 1];
82    for j in 1..=nw {
83        dir[0][j] = Dir::New;
84    }
85    for i in 1..=ob {
86        dir[i][0] = Dir::Base;
87    }
88    for i in 1..=ob {
89        for j in 1..=nw {
90            if lines_match(&base[i - 1].text, &incoming[j - 1].text, ws) {
91                lcs[i][j] = lcs[i - 1][j - 1] + 1;
92                dir[i][j] = Dir::Match;
93            } else if lcs[i][j - 1] >= lcs[i - 1][j] {
94                lcs[i][j] = lcs[i][j - 1];
95                dir[i][j] = Dir::New;
96            } else {
97                lcs[i][j] = lcs[i - 1][j];
98                dir[i][j] = Dir::Base;
99            }
100        }
101    }
102    let mut out: Vec<LostSeg> = Vec::new();
103    let mut i = ob;
104    let mut j = nw;
105    while i > 0 || j > 0 {
106        match dir[i][j] {
107            Dir::Match => {
108                let mut seg = base[i - 1].clone();
109                seg.parent_map |= parent_bit;
110                out.push(seg);
111                i -= 1;
112                j -= 1;
113            }
114            Dir::New => {
115                out.push(incoming[j - 1].clone());
116                j -= 1;
117            }
118            Dir::Base => {
119                out.push(base[i - 1].clone());
120                i -= 1;
121            }
122        }
123    }
124    out.reverse();
125    out
126}
127
128struct Sline {
129    /// Lost lines shown before this result line (`-` / `--` rows).
130    lost: Vec<LostSeg>,
131    /// Pending lost lines for this result line before coalescing.
132    plost: Vec<LostSeg>,
133    /// Bitmask: parent P had this result line unchanged.
134    flag: u32,
135    bol: String,
136    p_lno: Vec<u32>,
137}
138
139fn split_lines_with_incomplete(text: &str) -> (Vec<String>, usize) {
140    if text.is_empty() {
141        return (Vec::new(), 0);
142    }
143    let lines: Vec<String> = text.lines().map(str::to_owned).collect();
144    let cnt = lines.len();
145    (lines, cnt)
146}
147
148fn combine_one_parent(
149    slines: &mut [Sline],
150    cnt: usize,
151    parent_lines: &[String],
152    n: usize,
153    _num_parent: usize,
154    ws: CombinedDiffWsOptions,
155) {
156    let nmask = 1u32 << n;
157    let old_keys: Vec<String> = parent_lines.iter().map(|l| line_key(l, ws)).collect();
158    let new_keys: Vec<String> = slines[..cnt].iter().map(|s| line_key(&s.bol, ws)).collect();
159    let ops = capture_diff_slices(Algorithm::Myers, &old_keys, &new_keys);
160
161    for op in ops {
162        match op {
163            DiffOp::Equal { .. } => {}
164            DiffOp::Delete {
165                old_index,
166                old_len,
167                new_index,
168                ..
169            } => {
170                let mut b = new_index.min(cnt);
171                if old_len > 0 && b == 0 && cnt > 0 {
172                    b = 1;
173                }
174                let b = b.min(cnt.saturating_sub(1));
175                for k in 0..old_len {
176                    slines[b].plost.push(LostSeg {
177                        text: parent_lines[old_index + k].clone(),
178                        parent_map: nmask,
179                    });
180                }
181            }
182            DiffOp::Insert {
183                new_index, new_len, ..
184            } => {
185                for k in 0..new_len {
186                    slines[new_index + k].flag |= nmask;
187                }
188            }
189            DiffOp::Replace {
190                old_index,
191                old_len,
192                new_index,
193                new_len,
194            } => {
195                let b = if new_len == 0 {
196                    let mut b = new_index.min(cnt);
197                    if old_len > 0 && b == 0 && cnt > 0 {
198                        b = 1;
199                    }
200                    b.min(cnt.saturating_sub(1))
201                } else {
202                    new_index.saturating_sub(1).min(cnt.saturating_sub(1))
203                };
204                for k in 0..old_len {
205                    slines[b].plost.push(LostSeg {
206                        text: parent_lines[old_index + k].clone(),
207                        parent_map: nmask,
208                    });
209                }
210                for k in 0..new_len {
211                    slines[new_index + k].flag |= nmask;
212                }
213            }
214        }
215    }
216
217    let mut p_lno = 1u32;
218    for lno in 0..=cnt {
219        slines[lno].p_lno[n] = p_lno;
220        if !slines[lno].plost.is_empty() {
221            let incoming = std::mem::take(&mut slines[lno].plost);
222            slines[lno].lost =
223                coalesce_lost(std::mem::take(&mut slines[lno].lost), incoming, nmask, ws);
224        }
225        for seg in &slines[lno].lost {
226            if seg.parent_map & nmask != 0 {
227                p_lno = p_lno.saturating_add(1);
228            }
229        }
230        if lno < cnt && slines[lno].flag & nmask == 0 {
231            p_lno = p_lno.saturating_add(1);
232        }
233    }
234}
235
236fn interesting(s: &Sline, all_mask: u32) -> bool {
237    (s.flag & all_mask) != 0 || !s.lost.is_empty()
238}
239
240fn find_next(slines: &[Sline], mark: u32, mut i: usize, cnt: usize, want_unmarked: bool) -> usize {
241    while i <= cnt {
242        let marked = slines[i].flag & mark != 0;
243        if want_unmarked != marked {
244            return i;
245        }
246        i += 1;
247    }
248    cnt + 1
249}
250
251fn adjust_hunk_tail(slines: &[Sline], all_mask: u32, hunk_begin: usize, mut i: usize) -> usize {
252    if hunk_begin < i && slines[i - 1].flag & all_mask == 0 {
253        i -= 1;
254    }
255    i
256}
257
258fn give_context(slines: &mut [Sline], cnt: usize, num_parent: usize, context: usize) {
259    let all_mask = (1u32 << num_parent) - 1;
260    let mark = 1u32 << num_parent;
261    let no_pre_delete = 2u32 << num_parent;
262
263    let mut i = find_next(slines, mark, 0, cnt, false);
264    if cnt < i {
265        return;
266    }
267
268    while i <= cnt {
269        let mut j = i.saturating_sub(context);
270        while j < i {
271            if slines[j].flag & mark == 0 {
272                slines[j].flag |= no_pre_delete;
273            }
274            slines[j].flag |= mark;
275            j += 1;
276        }
277
278        loop {
279            j = find_next(slines, mark, i, cnt, true);
280            if cnt < j {
281                return;
282            }
283            let k = find_next(slines, mark, j, cnt, false);
284            let j_adj = adjust_hunk_tail(slines, all_mask, i, j);
285
286            if k < j_adj + context {
287                let mut t = j;
288                while t < k {
289                    slines[t].flag |= mark;
290                    t += 1;
291                }
292                i = k;
293                continue;
294            }
295
296            i = k;
297            let k_end = (j + context).min(cnt + 1);
298            let mut t = j;
299            while t < k_end {
300                slines[t].flag |= mark;
301                t += 1;
302            }
303            break;
304        }
305    }
306}
307
308fn make_hunks(slines: &mut [Sline], cnt: usize, num_parent: usize, dense: bool, context: usize) {
309    let all_mask = (1u32 << num_parent) - 1;
310    let mark = 1u32 << num_parent;
311
312    for i in 0..=cnt {
313        if interesting(&slines[i], all_mask) {
314            slines[i].flag |= mark;
315        } else {
316            slines[i].flag &= !mark;
317        }
318    }
319
320    if dense {
321        let mut i = 0usize;
322        while i <= cnt {
323            while i <= cnt && slines[i].flag & mark == 0 {
324                i += 1;
325            }
326            if cnt < i {
327                break;
328            }
329            let hunk_begin = i;
330            let mut j = i + 1;
331            while j <= cnt {
332                if slines[j].flag & mark == 0 {
333                    let j_adj = adjust_hunk_tail(slines, all_mask, hunk_begin, j);
334                    let la = (j_adj + context).min(cnt + 1);
335                    let mut contin = false;
336                    let mut la2 = la;
337                    while la2 > j {
338                        la2 -= 1;
339                        if slines[la2].flag & mark != 0 {
340                            contin = true;
341                            break;
342                        }
343                    }
344                    if !contin {
345                        break;
346                    }
347                    j = la2;
348                }
349                j += 1;
350            }
351            let hunk_end = j;
352
353            let mut same_diff = 0u32;
354            let mut has_interesting = false;
355            let mut jj = hunk_begin;
356            while jj < hunk_end && !has_interesting {
357                let mut this_diff = slines[jj].flag & all_mask;
358                if this_diff != 0 {
359                    if same_diff == 0 {
360                        same_diff = this_diff;
361                    } else if same_diff != this_diff {
362                        has_interesting = true;
363                        break;
364                    }
365                }
366                let ll_iter = slines[jj].lost.iter();
367                for seg in ll_iter {
368                    if has_interesting {
369                        break;
370                    }
371                    this_diff = seg.parent_map;
372                    if same_diff == 0 {
373                        same_diff = this_diff;
374                    } else if same_diff != this_diff {
375                        has_interesting = true;
376                    }
377                }
378                jj += 1;
379            }
380            if !has_interesting && same_diff != 0 && same_diff != all_mask {
381                for k in hunk_begin..hunk_end {
382                    slines[k].flag &= !mark;
383                }
384            }
385            i = hunk_end;
386        }
387    }
388
389    give_context(slines, cnt, num_parent, context);
390}
391
392fn show_parent_lno(slines: &[Sline], l0: usize, l1: usize, n: usize, null_ctx: u32) -> String {
393    let a = slines[l0].p_lno[n];
394    let b = slines[l1].p_lno[n];
395    format!(" -{a},{}", b.saturating_sub(a).saturating_sub(null_ctx))
396}
397
398fn dump_slines(slines: &[Sline], cnt: usize, num_parent: usize, context: usize) -> String {
399    let mark = 1u32 << num_parent;
400    let no_pre_delete = 2u32 << num_parent;
401    let mut out = String::new();
402    let mut lno = 0usize;
403
404    loop {
405        while lno <= cnt && slines[lno].flag & mark == 0 {
406            lno += 1;
407        }
408        if cnt < lno {
409            break;
410        }
411        let h_start = lno;
412        let mut h_end = lno + 1;
413        while h_end <= cnt && slines[h_end].flag & mark != 0 {
414            h_end += 1;
415        }
416        let mut rlines = h_end - h_start;
417        if cnt < h_end {
418            rlines = rlines.saturating_sub(1);
419        }
420        let mut null_ctx = 0u32;
421        if context == 0 {
422            for j in h_start..h_end {
423                if slines[j].flag & (mark - 1) == 0 {
424                    null_ctx = null_ctx.saturating_add(1);
425                }
426            }
427            rlines = rlines.saturating_sub(null_ctx as usize);
428        }
429
430        out.push_str("@@@");
431        for _ in 0..=num_parent {
432            out.push('@');
433        }
434        for n in 0..num_parent {
435            out.push_str(&show_parent_lno(slines, h_start, h_end, n, null_ctx));
436        }
437        out.push_str(&format!(" +{},{} ", h_start + 1, rlines));
438        for _ in 0..=num_parent {
439            out.push('@');
440        }
441        out.push('\n');
442
443        while lno < h_end {
444            let sl = &slines[lno];
445            lno += 1;
446            let show_lost = if sl.flag & no_pre_delete == 0 {
447                sl.lost.as_slice()
448            } else {
449                &[][..]
450            };
451            for seg in show_lost {
452                out.push('-');
453                for j in 0..num_parent {
454                    if seg.parent_map & (1u32 << j) != 0 {
455                        out.push('-');
456                    } else {
457                        out.push(' ');
458                    }
459                }
460                out.push_str(&seg.text);
461                out.push('\n');
462            }
463            if cnt < lno {
464                break;
465            }
466            let sl = &slines[lno - 1];
467            if sl.flag & (mark - 1) == 0 {
468                if context == 0 {
469                    continue;
470                }
471                out.push(' ');
472            } else {
473                out.push('+');
474            }
475            let mut p_mask = 1u32;
476            for _ in 0..num_parent {
477                if p_mask & sl.flag != 0 {
478                    out.push('+');
479                } else {
480                    out.push(' ');
481                }
482                p_mask <<= 1;
483            }
484            out.push_str(&sl.bol);
485            out.push('\n');
486        }
487    }
488    out
489}
490
491fn reuse_parent(slines: &mut [Sline], cnt: usize, i: usize, j: usize) {
492    let im = 1u32 << i;
493    let jm = 1u32 << j;
494    for lno in 0..=cnt {
495        for seg in &mut slines[lno].lost {
496            if seg.parent_map & jm != 0 {
497                seg.parent_map |= im;
498            }
499        }
500        if slines[lno].flag & jm != 0 {
501            slines[lno].flag |= im;
502        }
503        slines[lno].p_lno[i] = slines[lno].p_lno[j];
504    }
505}
506
507/// Combined diff body: `@@@` hunks and parent/result lines (no `diff --cc` header).
508#[must_use]
509pub fn format_combined_diff_body(
510    parent_texts: &[String],
511    result_text: &str,
512    context: usize,
513    dense: bool,
514    ws: CombinedDiffWsOptions,
515) -> String {
516    let num_parent = parent_texts.len();
517    if num_parent == 0 {
518        return String::new();
519    }
520
521    let (res_lines, cnt) = split_lines_with_incomplete(result_text);
522    if cnt == 0 && result_text.is_empty() {
523        return String::new();
524    }
525
526    let mut slines: Vec<Sline> = (0..=cnt + 1)
527        .map(|idx| Sline {
528            lost: Vec::new(),
529            plost: Vec::new(),
530            flag: 0,
531            bol: if idx < cnt {
532                res_lines[idx].clone()
533            } else {
534                String::new()
535            },
536            p_lno: vec![0; num_parent],
537        })
538        .collect();
539
540    let parents: Vec<Vec<String>> = parent_texts
541        .iter()
542        .map(|p| p.lines().map(str::to_owned).collect())
543        .collect();
544
545    for i in 0..num_parent {
546        let mut reused = false;
547        for j in 0..i {
548            if parents[i] == parents[j] {
549                reuse_parent(&mut slines, cnt, i, j);
550                reused = true;
551                break;
552            }
553        }
554        if !reused {
555            combine_one_parent(&mut slines, cnt, &parents[i], i, num_parent, ws);
556        }
557    }
558
559    make_hunks(&mut slines, cnt, num_parent, dense, context);
560    dump_slines(&slines, cnt, num_parent, context)
561}