Skip to main content

grit_lib/
merge_file.rs

1//! Three-way file merge — the engine behind `grit merge-file`.
2//!
3//! Performs a line-level three-way merge of *base*, *ours*, and *theirs*
4//! using the Myers diff algorithm (via the `similar` crate).
5//!
6//! # Algorithm
7//!
8//! 1. Split each file into lines, preserving line endings.
9//! 2. Annotate each base line with its diff status in `ours` and `theirs`.
10//! 3. For each non-Unchanged position, expand the region to fully cover every
11//!    overlapping Changed op from either side, then classify as
12//!    OnlyOurs / OnlyTheirs / Conflict based on which sides are changed.
13//! 4. Emit each hunk: unchanged → base; single-side → that side's content;
14//!    two-side → conflict markers (or resolved via `favor`).
15//!
16//! Pure insertions (ops with empty old_range) are attached to the adjacent
17//! base position and emitted as OnlyOurs / OnlyTheirs hunks.
18
19use crate::error::Result;
20use similar::{Algorithm, DiffOp, DiffTag};
21
22/// How conflict regions should be resolved.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
24pub enum MergeFavor {
25    /// Leave conflict markers in the output (default).
26    #[default]
27    None,
28    /// For conflicts, keep our version.
29    Ours,
30    /// For conflicts, keep their version.
31    Theirs,
32    /// For conflicts, concatenate both versions.
33    Union,
34}
35
36/// Conflict-marker output style.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
38pub enum ConflictStyle {
39    /// Standard two-section markers (`<<<<<<<`, `=======`, `>>>>>>>`).
40    #[default]
41    Merge,
42    /// Three-section markers including the base section.
43    Diff3,
44    /// Zealous diff3 (same as Diff3 for our purposes).
45    ZealousDiff3,
46}
47
48/// Input and options for a three-way merge.
49pub struct MergeInput<'a> {
50    /// Base (ancestor) content.
51    pub base: &'a [u8],
52    /// Our version of the file.
53    pub ours: &'a [u8],
54    /// Their version of the file.
55    pub theirs: &'a [u8],
56    /// Label for the ours conflict marker line.
57    pub label_ours: &'a str,
58    /// Label for the base conflict marker line (diff3 only).
59    pub label_base: &'a str,
60    /// Label for the theirs conflict marker line.
61    pub label_theirs: &'a str,
62    /// Conflict resolution strategy.
63    pub favor: MergeFavor,
64    /// Conflict marker style.
65    pub style: ConflictStyle,
66    /// Width of conflict markers in characters (0 → use default of 7).
67    pub marker_size: usize,
68    /// Diff algorithm.
69    pub diff_algorithm: Option<String>,
70    /// Ignore all whitespace when comparing lines (`-w`).
71    pub ignore_all_space: bool,
72    /// Ignore changes in amount of whitespace (`-b`).
73    pub ignore_space_change: bool,
74    /// Ignore whitespace at end of line.
75    pub ignore_space_at_eol: bool,
76    /// Ignore CR at end of line.
77    pub ignore_cr_at_eol: bool,
78}
79
80/// Result of a three-way merge.
81pub struct MergeOutput {
82    /// Merged file content.
83    pub content: Vec<u8>,
84    /// Number of conflict regions (0 = clean merge).
85    pub conflicts: usize,
86}
87
88/// Perform a three-way line-level merge.
89///
90/// # Errors
91///
92/// Currently infallible but returns `Result` for future extension.
93pub fn merge(input: &MergeInput<'_>) -> Result<MergeOutput> {
94    let base_lines = split_lines(input.base);
95    let ours_lines = split_lines(input.ours);
96    let theirs_lines = split_lines(input.theirs);
97    let ws_mode = WhitespaceMode {
98        ignore_all_space: input.ignore_all_space,
99        ignore_space_change: input.ignore_space_change,
100        ignore_space_at_eol: input.ignore_space_at_eol,
101        ignore_cr_at_eol: input.ignore_cr_at_eol,
102    };
103    // Ignore trailing `\n` / `\r\n` / `\r` when diffing (git `xdl_recmatch`), so clean merges
104    // match `git merge-file` when only the final newline differs (t6403).
105    let base_compare_lines = lines_for_merge_diff_compare(&base_lines, &ws_mode);
106    let ours_compare_lines = lines_for_merge_diff_compare(&ours_lines, &ws_mode);
107    let theirs_compare_lines = lines_for_merge_diff_compare(&theirs_lines, &ws_mode);
108
109    let algo = input
110        .diff_algorithm
111        .as_deref()
112        .map(|name| match name.to_lowercase().as_str() {
113            "histogram" | "patience" => similar::Algorithm::Patience,
114            _ => similar::Algorithm::Myers,
115        })
116        .unwrap_or(similar::Algorithm::Myers);
117    let ours_ops = similar::capture_diff_slices(algo, &base_compare_lines, &ours_compare_lines);
118    let theirs_ops = similar::capture_diff_slices(algo, &base_compare_lines, &theirs_compare_lines);
119
120    let mut hunks = compute_hunks(
121        &base_lines,
122        &ours_lines,
123        &theirs_lines,
124        &ours_ops,
125        &theirs_ops,
126        &ws_mode,
127    );
128    // Zealous diff3 (`adjust_zealous_hunks`) assumes the raw `compute_hunks` sequence.
129    if !matches!(input.style, ConflictStyle::ZealousDiff3) {
130        hunks = merge_adjacent_replace_and_trailing_insert_conflicts(hunks);
131        hunks = fold_adjacent_inserts_into_conflict(hunks);
132        hunks = merge_adjacent_one_sided_line_changes_to_conflict(hunks);
133    }
134    // Match `git merge-file` (`XDL_MERGE_ZEALOUS_ALNUM`): refine then simplify conflicts.
135    if !matches!(
136        input.style,
137        ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3
138    ) {
139        hunks = refine_conflicts_by_subdiff(hunks, algo, &ws_mode);
140        hunks = simplify_non_conflicts_between_conflicts(hunks, true);
141    }
142    // Git keeps adjacent conflict regions separate when identical lines appear
143    // between them (e.g. t4200-rerere); do not merge Conflict+gap+Conflict.
144    hunks = coalesce_nearby_conflicts(hunks, 3, false);
145    if matches!(input.style, ConflictStyle::ZealousDiff3) {
146        hunks = adjust_zealous_hunks(hunks);
147    }
148
149    let marker = if input.marker_size == 0 {
150        7
151    } else {
152        input.marker_size
153    };
154
155    let mut content: Vec<u8> = Vec::new();
156    let mut conflicts = 0usize;
157    let mut ours_line_pos = 0usize;
158    let mut theirs_line_pos = 0usize;
159
160    for (idx, hunk) in hunks.iter().enumerate() {
161        match hunk {
162            Hunk::Unchanged(lines) => {
163                append_lines(&mut content, lines);
164                ours_line_pos += lines.len();
165                theirs_line_pos += lines.len();
166            }
167            Hunk::OnlyOurs { ours, .. } => {
168                append_lines(&mut content, ours);
169                ours_line_pos += ours.len();
170            }
171            Hunk::OnlyTheirs { theirs, .. } => {
172                append_lines(&mut content, theirs);
173                theirs_line_pos += theirs.len();
174            }
175            Hunk::Conflict { base, ours, theirs } => {
176                let conflict_ours_start = ours_line_pos;
177                let conflict_theirs_start = theirs_line_pos;
178                match input.favor {
179                    MergeFavor::Ours => {
180                        append_lines(&mut content, ours);
181                        ours_line_pos += ours.len();
182                    }
183                    MergeFavor::Theirs => {
184                        append_lines(&mut content, theirs);
185                        theirs_line_pos += theirs.len();
186                    }
187                    MergeFavor::Union => {
188                        append_lines(&mut content, ours);
189                        // If the ours portion doesn't end with \n and theirs is
190                        // non-empty, insert a newline so both sections appear as
191                        // separate lines (matches git's missing-LF handling).
192                        if !theirs.is_empty()
193                            && !ours.is_empty()
194                            && !ours.last().map(|l| l.ends_with(b"\n")).unwrap_or(false)
195                        {
196                            content.push(b'\n');
197                        }
198                        append_lines(&mut content, theirs);
199                        ours_line_pos += ours.len();
200                        theirs_line_pos += theirs.len();
201                    }
202                    MergeFavor::None => {
203                        conflicts += 1;
204                        if matches!(input.style, ConflictStyle::ZealousDiff3) {
205                            let (mut prefix_len, mut suffix_len) =
206                                common_prefix_suffix(ours, theirs);
207
208                            if prefix_len > 0
209                                && idx > 0
210                                && hunk_output_lines(&hunks[idx - 1])
211                                    .map(|lines| lines_end_with(lines, &ours[..prefix_len]))
212                                    .unwrap_or(false)
213                            {
214                                prefix_len = 0;
215                            }
216
217                            if suffix_len > 0
218                                && idx + 1 < hunks.len()
219                                && hunk_output_lines(&hunks[idx + 1])
220                                    .map(|lines| {
221                                        lines_start_with(lines, &ours[ours.len() - suffix_len..])
222                                    })
223                                    .unwrap_or(false)
224                            {
225                                suffix_len = 0;
226                            }
227
228                            if prefix_len > 0 {
229                                append_lines(&mut content, &ours[..prefix_len]);
230                            }
231                            let i1 = conflict_ours_start + prefix_len;
232                            let i2 = conflict_theirs_start + prefix_len;
233                            let needs_cr = conflict_markers_need_cr_at(
234                                &ours_lines,
235                                &theirs_lines,
236                                &base_lines,
237                                i1,
238                                i2,
239                            );
240                            emit_conflict(
241                                &mut content,
242                                base,
243                                &ours[prefix_len..ours.len() - suffix_len],
244                                &theirs[prefix_len..theirs.len() - suffix_len],
245                                input.label_ours,
246                                input.label_base,
247                                input.label_theirs,
248                                input.style,
249                                marker,
250                                needs_cr,
251                            );
252                            if suffix_len > 0 {
253                                append_lines(&mut content, &ours[ours.len() - suffix_len..]);
254                            }
255                        } else if matches!(input.style, ConflictStyle::Merge) {
256                            let (prefix_len, suffix_len) =
257                                common_prefix_suffix_merge_lines(ours, theirs);
258                            let pre = &ours[..prefix_len];
259                            let suf_start = ours.len().saturating_sub(suffix_len);
260                            let o_mid = &ours[prefix_len..suf_start];
261                            let t_mid =
262                                &theirs[prefix_len..theirs.len().saturating_sub(suffix_len)];
263                            let suf = &ours[suf_start..];
264                            append_lines(&mut content, pre);
265                            let i1 = conflict_ours_start + prefix_len;
266                            let i2 = conflict_theirs_start + prefix_len;
267                            let needs_cr = conflict_markers_need_cr_at(
268                                &ours_lines,
269                                &theirs_lines,
270                                &base_lines,
271                                i1,
272                                i2,
273                            );
274                            emit_conflict(
275                                &mut content,
276                                base,
277                                o_mid,
278                                t_mid,
279                                input.label_ours,
280                                input.label_base,
281                                input.label_theirs,
282                                input.style,
283                                marker,
284                                needs_cr,
285                            );
286                            append_lines(&mut content, suf);
287                        } else {
288                            let needs_cr = conflict_markers_need_cr_at(
289                                &ours_lines,
290                                &theirs_lines,
291                                &base_lines,
292                                conflict_ours_start,
293                                conflict_theirs_start,
294                            );
295                            emit_conflict(
296                                &mut content,
297                                base,
298                                ours,
299                                theirs,
300                                input.label_ours,
301                                input.label_base,
302                                input.label_theirs,
303                                input.style,
304                                marker,
305                                needs_cr,
306                            );
307                        }
308                        ours_line_pos += ours.len();
309                        theirs_line_pos += theirs.len();
310                    }
311                }
312            }
313        }
314    }
315
316    Ok(MergeOutput { content, conflicts })
317}
318
319fn append_lines(out: &mut Vec<u8>, lines: &[Vec<u8>]) {
320    // If previous content ended without a newline and this hunk adds more
321    // content, insert a newline separator to avoid merging lines together.
322    if !out.is_empty() && !out.ends_with(b"\n") && !lines.is_empty() {
323        out.push(b'\n');
324    }
325    for line in lines {
326        out.extend_from_slice(line);
327    }
328}
329
330fn common_prefix_suffix(ours: &[Vec<u8>], theirs: &[Vec<u8>]) -> (usize, usize) {
331    let mut prefix = 0usize;
332    while prefix < ours.len() && prefix < theirs.len() && ours[prefix] == theirs[prefix] {
333        prefix += 1;
334    }
335
336    let mut suffix = 0usize;
337    while suffix < ours.len().saturating_sub(prefix)
338        && suffix < theirs.len().saturating_sub(prefix)
339        && ours[ours.len() - 1 - suffix] == theirs[theirs.len() - 1 - suffix]
340    {
341        suffix += 1;
342    }
343
344    (prefix, suffix)
345}
346
347fn hunk_output_lines(hunk: &Hunk) -> Option<&[Vec<u8>]> {
348    match hunk {
349        Hunk::Unchanged(lines) => Some(lines),
350        Hunk::OnlyOurs { ours, .. } => Some(ours),
351        Hunk::OnlyTheirs { theirs, .. } => Some(theirs),
352        Hunk::Conflict { .. } => None,
353    }
354}
355
356fn lines_end_with(lines: &[Vec<u8>], suffix: &[Vec<u8>]) -> bool {
357    if suffix.is_empty() || suffix.len() > lines.len() {
358        return false;
359    }
360    lines[lines.len() - suffix.len()..] == *suffix
361}
362
363fn lines_start_with(lines: &[Vec<u8>], prefix: &[Vec<u8>]) -> bool {
364    if prefix.is_empty() || prefix.len() > lines.len() {
365        return false;
366    }
367    lines[..prefix.len()] == *prefix
368}
369
370/// Emit conflict markers into `out`.
371#[allow(clippy::too_many_arguments)]
372fn emit_conflict(
373    out: &mut Vec<u8>,
374    base: &[Vec<u8>],
375    ours: &[Vec<u8>],
376    theirs: &[Vec<u8>],
377    label_ours: &str,
378    label_base: &str,
379    label_theirs: &str,
380    style: ConflictStyle,
381    marker: usize,
382    needs_cr: bool,
383) {
384    let open = "<".repeat(marker);
385    let eq = "=".repeat(marker);
386    let close = ">".repeat(marker);
387    let marker_eol: &[u8] = if needs_cr { b"\r\n" } else { b"\n" };
388
389    // Ensure ours section starts on a new line if the previous content didn't
390    // end with one.
391    if !out.is_empty() && !out.ends_with(b"\n") && !out.ends_with(b"\r\n") {
392        out.extend_from_slice(marker_eol);
393    }
394
395    write_conflict_marker_line(out, &open, Some(label_ours), marker_eol);
396    for line in ours {
397        out.extend_from_slice(line);
398    }
399    // Ensure separator starts on its own line.
400    if out.last().copied() != Some(b'\n') {
401        out.extend_from_slice(marker_eol);
402    }
403    match style {
404        ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => {
405            let pipe = "|".repeat(marker);
406            write_conflict_marker_line(out, &pipe, Some(label_base), marker_eol);
407            for line in base {
408                out.extend_from_slice(line);
409            }
410            if out.last().copied() != Some(b'\n') {
411                out.extend_from_slice(marker_eol);
412            }
413            write_conflict_marker_line(out, &eq, None, marker_eol);
414        }
415        ConflictStyle::Merge => {
416            write_conflict_marker_line(out, &eq, None, marker_eol);
417        }
418    }
419    for line in theirs {
420        out.extend_from_slice(line);
421    }
422    if out.last().copied() != Some(b'\n') {
423        out.extend_from_slice(marker_eol);
424    }
425    write_conflict_marker_line(out, &close, Some(label_theirs), marker_eol);
426}
427
428fn write_conflict_marker_line(out: &mut Vec<u8>, marker: &str, label: Option<&str>, eol: &[u8]) {
429    out.extend_from_slice(marker.as_bytes());
430    // `git rerere` recognizes `<<<<<<< ` / `>>>>>>> ` only when there is a space after the marker
431    // run, even if the branch label is empty (`ll_merge` with empty names — `try_replay_merge`).
432    let open_or_close = marker.starts_with('<') || marker.starts_with('>');
433    let has_label = label.is_some_and(|l| !l.is_empty());
434    // Diff3 `|||||||` lines use a space before the base label so they match Git's conflict format
435    // (and `print_sanitized_conflicted_diff` in t4108 can strip the label).
436    if open_or_close || has_label {
437        out.push(b' ');
438    }
439    if let Some(label) = label {
440        if !label.is_empty() {
441            out.extend_from_slice(label.as_bytes());
442        }
443    }
444    out.extend_from_slice(eol);
445}
446
447/// Strip trailing `\n`, `\r\n`, or lone `\r` from a logical line (content kept for output).
448fn strip_line_terminator(line: &[u8]) -> &[u8] {
449    if line.ends_with(b"\r\n") {
450        return &line[..line.len() - 2];
451    }
452    if line.ends_with(b"\n") {
453        return &line[..line.len() - 1];
454    }
455    if line.ends_with(b"\r") {
456        return &line[..line.len() - 1];
457    }
458    line
459}
460
461/// Compare two lines ignoring trailing newline / CR conventions (matches git `xdl_recmatch` for EOL).
462fn merge_line_equal(a: &[u8], b: &[u8]) -> bool {
463    strip_line_terminator(a) == strip_line_terminator(b)
464}
465
466/// Compare two line sequences line-by-line, ignoring trailing-newline conventions on each line.
467/// Mirrors git `xdl_merge_cmp_lines`, used to auto-resolve a both-sides-changed region when the
468/// two postimages are textually identical.
469fn merge_lines_equal(a: &[Vec<u8>], b: &[Vec<u8>]) -> bool {
470    a.len() == b.len() && a.iter().zip(b.iter()).all(|(x, y)| merge_line_equal(x, y))
471}
472
473fn common_prefix_suffix_merge_lines(ours: &[Vec<u8>], theirs: &[Vec<u8>]) -> (usize, usize) {
474    let mut prefix = 0usize;
475    while prefix < ours.len()
476        && prefix < theirs.len()
477        && merge_line_equal(&ours[prefix], &theirs[prefix])
478    {
479        prefix += 1;
480    }
481
482    let mut suffix = 0usize;
483    while suffix < ours.len().saturating_sub(prefix)
484        && suffix < theirs.len().saturating_sub(prefix)
485        && merge_line_equal(
486            &ours[ours.len() - 1 - suffix],
487            &theirs[theirs.len() - 1 - suffix],
488        )
489    {
490        suffix += 1;
491    }
492
493    (prefix, suffix)
494}
495
496/// Git `is_eol_crlf` for a single logical line in our representation.
497fn is_eol_crlf_line(line: &[u8]) -> i8 {
498    if line.ends_with(b"\r\n") {
499        return 1;
500    }
501    if line.ends_with(b"\n") {
502        return 0;
503    }
504    -1
505}
506
507/// Git `is_cr_needed` using indices into the full postimages / ancestor (see `xmerge.c`).
508fn conflict_markers_need_cr_at(
509    ours_lines: &[Vec<u8>],
510    theirs_lines: &[Vec<u8>],
511    base_lines: &[Vec<u8>],
512    i1: usize,
513    i2: usize,
514) -> bool {
515    let mut needs = if i1 > 0 {
516        is_eol_crlf_line(ours_lines[i1 - 1].as_slice())
517    } else {
518        -1
519    };
520    if needs > 0 {
521        let t0 = if i2 > 0 {
522            is_eol_crlf_line(theirs_lines[i2 - 1].as_slice())
523        } else {
524            -1
525        };
526        if t0 >= 0 {
527            needs = t0;
528        }
529    }
530    if needs > 0 && !base_lines.is_empty() {
531        needs = is_eol_crlf_line(base_lines[0].as_slice());
532    }
533    needs > 0
534}
535
536fn line_contains_alnum(line: &[u8]) -> bool {
537    strip_line_terminator(line)
538        .iter()
539        .any(|b| b.is_ascii_alphanumeric())
540}
541
542/// `true` when git `xdl_simplify_non_conflicts` would **not** merge `Conflict + gap + Conflict`.
543///
544/// Git merges the outer conflicts when the gap is "small enough" unless `simplify_if_no_alnum`
545/// (`XDL_MERGE_ZEALOUS < level`) requires keeping gaps that are long **and** alphanumeric.
546fn gap_blocks_merge_between_conflicts(gap: &[Vec<u8>], simplify_if_no_alnum: bool) -> bool {
547    if gap.len() <= 3 {
548        return false;
549    }
550    !simplify_if_no_alnum || gap.iter().any(|l| line_contains_alnum(l.as_slice()))
551}
552
553fn merge_two_adjacent_conflicts(left: &Hunk, gap: &[Vec<u8>], right: &Hunk) -> Option<Hunk> {
554    let Hunk::Conflict {
555        base: b1,
556        ours: o1,
557        theirs: t1,
558    } = left
559    else {
560        return None;
561    };
562    let Hunk::Conflict {
563        base: b2,
564        ours: o2,
565        theirs: t2,
566    } = right
567    else {
568        return None;
569    };
570    // Git's xdl_simplify_non_conflicts absorbs the common gap between the two conflicts back
571    // into *both* sides of the merged conflict (the gap lines are identical on ours/theirs).
572    let mut merged_base = b1.clone();
573    merged_base.extend(gap.iter().cloned());
574    merged_base.extend(b2.iter().cloned());
575    let mut merged_ours = o1.clone();
576    merged_ours.extend(gap.iter().cloned());
577    merged_ours.extend(o2.iter().cloned());
578    let mut merged_theirs = t1.clone();
579    merged_theirs.extend(gap.iter().cloned());
580    merged_theirs.extend(t2.iter().cloned());
581    Some(Hunk::Conflict {
582        base: merged_base,
583        ours: merged_ours,
584        theirs: merged_theirs,
585    })
586}
587
588/// Git `xdl_simplify_non_conflicts` for our hunk list.
589fn simplify_non_conflicts_between_conflicts(
590    hunks: Vec<Hunk>,
591    simplify_if_no_alnum: bool,
592) -> Vec<Hunk> {
593    let mut out: Vec<Hunk> = Vec::with_capacity(hunks.len());
594    let mut i = 0usize;
595    while i < hunks.len() {
596        let Hunk::Conflict { .. } = &hunks[i] else {
597            out.push(hunks[i].clone());
598            i += 1;
599            continue;
600        };
601        let mut merged = hunks[i].clone();
602        let mut j = i;
603        loop {
604            let Some(Hunk::Unchanged(gap)) = hunks.get(j + 1) else {
605                break;
606            };
607            let Some(Hunk::Conflict { .. }) = hunks.get(j + 2) else {
608                break;
609            };
610            if gap_blocks_merge_between_conflicts(gap, simplify_if_no_alnum) {
611                break;
612            }
613            let Some(m) = merge_two_adjacent_conflicts(&merged, gap, &hunks[j + 2]) else {
614                break;
615            };
616            merged = m;
617            j += 2;
618        }
619        out.push(merged);
620        i = j + 1;
621    }
622    out
623}
624
625/// Split a conflict into sub-hunks by diffing the two sides (git `xdl_refine_conflicts`).
626fn refine_conflicts_by_subdiff(
627    hunks: Vec<Hunk>,
628    algo: Algorithm,
629    ws_mode: &WhitespaceMode,
630) -> Vec<Hunk> {
631    let mut out = Vec::with_capacity(hunks.len());
632    for h in hunks {
633        let Hunk::Conflict { base, ours, theirs } = h else {
634            out.push(h);
635            continue;
636        };
637        if ours.is_empty() || theirs.is_empty() {
638            out.push(Hunk::Conflict { base, ours, theirs });
639            continue;
640        }
641        let o_cmp = normalize_lines_for_compare(&ours, ws_mode);
642        let t_cmp = normalize_lines_for_compare(&theirs, ws_mode);
643        let sub_ops = similar::capture_diff_slices(algo, &o_cmp, &t_cmp);
644        if sub_ops.len() == 1 && sub_ops[0].tag() == DiffTag::Equal {
645            out.push(Hunk::Unchanged(ours));
646            continue;
647        }
648        for op in &sub_ops {
649            let old = op.old_range();
650            let new_ = op.new_range();
651            match op.tag() {
652                DiffTag::Equal => {
653                    if !old.is_empty() {
654                        out.push(Hunk::Unchanged(ours[old.start..old.end].to_vec()));
655                    }
656                }
657                DiffTag::Delete => {
658                    // Lines present only on ours within an overlapping (both-changed) region
659                    // stay a conflict with an empty theirs side (git's xdl_refine_conflicts
660                    // re-emits these as conflict hunks, not clean resolutions).
661                    if !old.is_empty() {
662                        out.push(Hunk::Conflict {
663                            base: Vec::new(),
664                            ours: ours[old.start..old.end].to_vec(),
665                            theirs: Vec::new(),
666                        });
667                    }
668                }
669                DiffTag::Insert => {
670                    if !new_.is_empty() {
671                        out.push(Hunk::Conflict {
672                            base: Vec::new(),
673                            ours: Vec::new(),
674                            theirs: theirs[new_.start..new_.end].to_vec(),
675                        });
676                    }
677                }
678                DiffTag::Replace => {
679                    out.push(Hunk::Conflict {
680                        base: Vec::new(),
681                        ours: ours[old.start..old.end].to_vec(),
682                        theirs: theirs[new_.start..new_.end].to_vec(),
683                    });
684                }
685            }
686        }
687    }
688    out
689}
690
691/// A classified merge region (owns its lines).
692#[derive(Debug, Clone)]
693enum Hunk {
694    /// Lines unchanged by both sides (base content).
695    Unchanged(Vec<Vec<u8>>),
696    /// Lines changed only by ours.
697    OnlyOurs {
698        /// Base lines for the changed region (empty for pure insertions).
699        base: Vec<Vec<u8>>,
700        /// Output lines from ours.
701        ours: Vec<Vec<u8>>,
702    },
703    /// Lines changed only by theirs.
704    OnlyTheirs {
705        /// Base lines for the changed region (empty for pure insertions).
706        base: Vec<Vec<u8>>,
707        /// Output lines from theirs.
708        theirs: Vec<Vec<u8>>,
709    },
710    /// Lines changed by both sides — a conflict.
711    Conflict {
712        base: Vec<Vec<u8>>,
713        ours: Vec<Vec<u8>>,
714        theirs: Vec<Vec<u8>>,
715    },
716}
717
718/// Compute the merge hunks from two diff sequences against the same base.
719///
720/// Uses a position-by-position scan with op-based expansion to ensure that
721/// changed regions spanning multiple lines are not split mid-op.
722fn compute_hunks(
723    base: &[Vec<u8>],
724    ours: &[Vec<u8>],
725    theirs: &[Vec<u8>],
726    ours_ops: &[DiffOp],
727    theirs_ops: &[DiffOp],
728    ws_mode: &WhitespaceMode,
729) -> Vec<Hunk> {
730    let ours_changed = changed_mask(ours_ops, base.len());
731    let theirs_changed = changed_mask(theirs_ops, base.len());
732
733    let ours_inserts = collect_inserts(ours_ops, base.len());
734    let theirs_inserts = collect_inserts(theirs_ops, base.len());
735
736    let mut hunks: Vec<Hunk> = Vec::new();
737    let mut pos = 0usize;
738
739    while pos <= base.len() {
740        // Emit pure insertions before this base position.
741        emit_inserts_at(
742            pos,
743            &ours_inserts,
744            &theirs_inserts,
745            ours,
746            theirs,
747            &mut hunks,
748        );
749
750        if pos == base.len() {
751            break;
752        }
753
754        let o = ours_changed[pos];
755        let t = theirs_changed[pos];
756
757        if !o && !t {
758            // Unchanged run. Stop before a position that has pending insertions
759            // on either side so that insertions are emitted in-order at the
760            // correct base position.
761            let mut end = pos + 1;
762            while end < base.len()
763                && !ours_changed[end]
764                && !theirs_changed[end]
765                && ours_inserts[end].is_empty()
766                && theirs_inserts[end].is_empty()
767            {
768                end += 1;
769            }
770            let terminator_only_diff = base.len() == ours.len()
771                && (pos..end).all(|i| merge_line_equal(&base[i], &ours[i]))
772                && (pos..end).any(|i| base[i] != ours[i]);
773            let unchanged_lines = if ws_mode.ignore_all_space
774                || ws_mode.ignore_space_change
775                || ws_mode.ignore_space_at_eol
776                || ws_mode.ignore_cr_at_eol
777                || terminator_only_diff
778            {
779                &ours[pos..end]
780            } else {
781                &base[pos..end]
782            };
783            hunks.push(Hunk::Unchanged(unchanged_lines.to_vec()));
784            pos = end;
785            continue;
786        }
787
788        // Changed region: expand end until all overlapping Changed ops from
789        // either side are fully consumed.  Repeat until stable.
790        //
791        // A region is also extended across *contiguous* changed base lines from EITHER side:
792        // when ours changes one base line and theirs changes the very next base line (with no
793        // unchanged base line between), Git's `xdl_merge` treats them as a single overlapping
794        // region and reports a conflict. Without this, interleaved deletions (e.g. base `a b c d e`,
795        // ours `a c e`, theirs `b d`) would be split into alternating one-sided hunks that each
796        // "resolve" cleanly, producing an empty merge where Git conflicts (t7201 8).
797        let mut end = pos + 1;
798        loop {
799            let mut new_end = furthest_changed_op_end(ours_ops, pos, end)
800                .max(furthest_changed_op_end(theirs_ops, pos, end));
801            while new_end < base.len() && (ours_changed[new_end] || theirs_changed[new_end]) {
802                new_end += 1;
803            }
804            if new_end <= end {
805                break;
806            }
807            end = new_end;
808        }
809
810        // Classify the full range [pos..end).
811        let any_ours = (pos..end).any(|p| ours_changed[p]);
812        let any_theirs = (pos..end).any(|p| theirs_changed[p]);
813
814        match (any_ours, any_theirs) {
815            (true, false) => {
816                let c = collect_new_lines(ours_ops, ours, pos, end);
817                hunks.push(Hunk::OnlyOurs {
818                    base: base[pos..end].to_vec(),
819                    ours: c,
820                });
821            }
822            (false, true) => {
823                let c = collect_new_lines(theirs_ops, theirs, pos, end);
824                hunks.push(Hunk::OnlyTheirs {
825                    base: base[pos..end].to_vec(),
826                    theirs: c,
827                });
828            }
829            (true, true) => {
830                let o = collect_new_lines(ours_ops, ours, pos, end);
831                let t = collect_new_lines(theirs_ops, theirs, pos, end);
832                // Git's `xdl_merge` auto-resolves a both-sides-changed region when both sides
833                // produce textually identical content (`xdl_merge_cmp_lines` over the two
834                // postimages): the change is taken once instead of conflicting. Only a genuine
835                // textual difference becomes a conflict (see t3424 `--empty=keep`, where a commit
836                // whose change is already present in upstream merges cleanly and becomes empty).
837                if merge_lines_equal(&o, &t) {
838                    hunks.push(Hunk::OnlyOurs {
839                        base: base[pos..end].to_vec(),
840                        ours: o,
841                    });
842                } else {
843                    hunks.push(Hunk::Conflict {
844                        base: base[pos..end].to_vec(),
845                        ours: o,
846                        theirs: t,
847                    });
848                }
849            }
850            (false, false) => {
851                // Should not happen, but treat as unchanged.
852                hunks.push(Hunk::Unchanged(base[pos..end].to_vec()));
853            }
854        }
855
856        pos = end;
857    }
858
859    hunks
860}
861
862/// Build a boolean mask: `mask[i]` is `true` if base line `i` is covered by
863/// a non-Equal op.
864fn changed_mask(ops: &[DiffOp], base_len: usize) -> Vec<bool> {
865    let mut mask = vec![false; base_len];
866    for op in ops {
867        if op.tag() == DiffTag::Equal {
868            continue;
869        }
870        for p in op.old_range() {
871            if p < base_len {
872                mask[p] = true;
873            }
874        }
875    }
876    mask
877}
878
879/// Collect pure insertions (ops with empty old_range) at each base position.
880///
881/// Returns a `Vec` of length `base_len + 1`; entry `i` holds all
882/// `(new_start, new_end)` ranges inserted before base line `i`.
883fn collect_inserts(ops: &[DiffOp], base_len: usize) -> Vec<Vec<(usize, usize)>> {
884    let mut result: Vec<Vec<(usize, usize)>> = vec![Vec::new(); base_len + 1];
885    for op in ops {
886        let old = op.old_range();
887        let new_ = op.new_range();
888        if old.is_empty() && !new_.is_empty() {
889            let pos = old.start.min(base_len);
890            result[pos].push((new_.start, new_.end));
891        }
892    }
893    result
894}
895
896/// Emit hunks for pure insertions at base position `pos`.
897fn emit_inserts_at(
898    pos: usize,
899    ours_inserts: &[Vec<(usize, usize)>],
900    theirs_inserts: &[Vec<(usize, usize)>],
901    ours: &[Vec<u8>],
902    theirs: &[Vec<u8>],
903    hunks: &mut Vec<Hunk>,
904) {
905    let o_ins = &ours_inserts[pos];
906    let t_ins = &theirs_inserts[pos];
907
908    let has_ours = !o_ins.is_empty();
909    let has_theirs = !t_ins.is_empty();
910
911    if has_ours && has_theirs {
912        // Both sides insert at the same base position (empty base region). Git's
913        // `xdl_merge` treats this as a single overlapping change region and only
914        // auto-resolves when the two insertions are textually identical; any
915        // difference is a conflict. It still factors out a shared leading and
916        // trailing run as unchanged context (emitted outside the conflict
917        // markers), so we extract a common prefix *and* suffix and conflict on
918        // whatever remains -- even when one side's remainder is empty (a
919        // "superset" insertion still conflicts; see t4108/t4038).
920        let o_lines: Vec<Vec<u8>> = o_ins
921            .iter()
922            .flat_map(|&(s, e)| ours[s..e].to_vec())
923            .collect();
924        let t_lines: Vec<Vec<u8>> = t_ins
925            .iter()
926            .flat_map(|&(s, e)| theirs[s..e].to_vec())
927            .collect();
928
929        let mut common_prefix = 0usize;
930        while common_prefix < o_lines.len()
931            && common_prefix < t_lines.len()
932            && o_lines[common_prefix] == t_lines[common_prefix]
933        {
934            common_prefix += 1;
935        }
936
937        let mut common_suffix = 0usize;
938        while common_suffix < o_lines.len() - common_prefix
939            && common_suffix < t_lines.len() - common_prefix
940            && o_lines[o_lines.len() - 1 - common_suffix]
941                == t_lines[t_lines.len() - 1 - common_suffix]
942        {
943            common_suffix += 1;
944        }
945
946        if common_prefix > 0 {
947            hunks.push(Hunk::Unchanged(o_lines[..common_prefix].to_vec()));
948        }
949
950        let ours_tail = o_lines[common_prefix..o_lines.len() - common_suffix].to_vec();
951        let theirs_tail = t_lines[common_prefix..t_lines.len() - common_suffix].to_vec();
952
953        if !ours_tail.is_empty() || !theirs_tail.is_empty() {
954            // Identical insertions on both sides auto-resolve to a single copy (git
955            // `xdl_merge`); only a genuine textual difference conflicts.
956            if merge_lines_equal(&ours_tail, &theirs_tail) {
957                hunks.push(Hunk::OnlyOurs {
958                    base: Vec::new(),
959                    ours: ours_tail,
960                });
961            } else {
962                hunks.push(Hunk::Conflict {
963                    base: Vec::new(),
964                    ours: ours_tail,
965                    theirs: theirs_tail,
966                });
967            }
968        }
969
970        if common_suffix > 0 {
971            hunks.push(Hunk::Unchanged(
972                o_lines[o_lines.len() - common_suffix..].to_vec(),
973            ));
974        }
975    } else if has_ours {
976        for &(ns, ne) in o_ins {
977            let lines: Vec<Vec<u8>> = ours[ns..ne].to_vec();
978            if !lines.is_empty() {
979                hunks.push(Hunk::OnlyOurs {
980                    base: Vec::new(),
981                    ours: lines,
982                });
983            }
984        }
985    } else if has_theirs {
986        for &(ns, ne) in t_ins {
987            let lines: Vec<Vec<u8>> = theirs[ns..ne].to_vec();
988            if !lines.is_empty() {
989                hunks.push(Hunk::OnlyTheirs {
990                    base: Vec::new(),
991                    theirs: lines,
992                });
993            }
994        }
995    }
996}
997
998fn adjust_zealous_hunks(hunks: Vec<Hunk>) -> Vec<Hunk> {
999    let mut out: Vec<Hunk> = Vec::new();
1000    let mut i = 0usize;
1001
1002    while i < hunks.len() {
1003        let mut consumed = 1usize;
1004        let mut transformed: Option<Vec<Hunk>> = None;
1005
1006        let (pre_insert, mid_idx) = match &hunks[i] {
1007            Hunk::OnlyTheirs { base, theirs } if base.is_empty() => {
1008                (Some(theirs.as_slice()), i + 1)
1009            }
1010            _ => (None, i),
1011        };
1012
1013        if let Some(Hunk::OnlyOurs { base, ours }) = hunks.get(mid_idx) {
1014            if !base.is_empty() {
1015                let post_insert = match hunks.get(mid_idx + 1) {
1016                    Some(Hunk::OnlyTheirs { base, theirs }) if base.is_empty() => {
1017                        Some(theirs.as_slice())
1018                    }
1019                    _ => None,
1020                };
1021
1022                let mut prefix_len = 0usize;
1023                if let Some(pre) = pre_insert {
1024                    if !pre.is_empty() && ours.starts_with(pre) {
1025                        prefix_len = pre.len();
1026                    }
1027                }
1028
1029                let mut suffix_len = 0usize;
1030                if let Some(post) = post_insert {
1031                    if !post.is_empty() && ours[prefix_len..].ends_with(post) {
1032                        suffix_len = post.len();
1033                    }
1034                }
1035
1036                if prefix_len > 0 || suffix_len > 0 {
1037                    consumed = if pre_insert.is_some() {
1038                        if post_insert.is_some() {
1039                            3
1040                        } else {
1041                            2
1042                        }
1043                    } else if post_insert.is_some() {
1044                        2
1045                    } else {
1046                        1
1047                    };
1048
1049                    let mut replacement: Vec<Hunk> = Vec::new();
1050                    if prefix_len > 0 {
1051                        replacement.push(Hunk::Unchanged(ours[..prefix_len].to_vec()));
1052                    }
1053                    replacement.push(Hunk::Conflict {
1054                        base: base.clone(),
1055                        ours: ours[prefix_len..ours.len() - suffix_len].to_vec(),
1056                        theirs: base.clone(),
1057                    });
1058                    if suffix_len > 0 {
1059                        replacement.push(Hunk::Unchanged(ours[ours.len() - suffix_len..].to_vec()));
1060                    }
1061                    transformed = Some(replacement);
1062                }
1063            }
1064        }
1065
1066        if let Some(replacement) = transformed {
1067            for h in replacement {
1068                push_hunk_with_unchanged_merge(&mut out, h);
1069            }
1070            i += consumed;
1071            continue;
1072        }
1073
1074        push_hunk_with_unchanged_merge(&mut out, hunks[i].clone());
1075        i += 1;
1076    }
1077
1078    out
1079}
1080
1081/// If one side replaces base lines and the other appends an insertion immediately after
1082/// that base span, `compute_hunks` emits two hunks (`Only*` + trailing insert). Git treats
1083/// the combined region as one conflict (e.g. base `hello\n`, ours adds `hello\n`, theirs
1084/// replaces with `remove-conflict\n`).
1085/// When both sides edit **adjacent** base lines but each line is changed on only one side, Git's
1086/// merge reports a single conflict spanning both lines (`t4108-apply-threeway`).
1087fn merge_adjacent_one_sided_line_changes_to_conflict(hunks: Vec<Hunk>) -> Vec<Hunk> {
1088    let mut out: Vec<Hunk> = Vec::with_capacity(hunks.len());
1089    let mut i = 0usize;
1090    while i < hunks.len() {
1091        let merged = match (&hunks[i], hunks.get(i + 1)) {
1092            (
1093                Hunk::OnlyTheirs {
1094                    base: b1,
1095                    theirs: t1,
1096                },
1097                Some(Hunk::OnlyOurs { base: b2, ours: o2 }),
1098            ) if b1.len() == 1 && b2.len() == 1 && t1.len() == 1 && o2.len() == 1 => {
1099                Some(Hunk::Conflict {
1100                    base: vec![b1[0].clone(), b2[0].clone()],
1101                    ours: vec![b1[0].clone(), o2[0].clone()],
1102                    theirs: vec![t1[0].clone(), b2[0].clone()],
1103                })
1104            }
1105            (
1106                Hunk::OnlyOurs { base: b1, ours: o1 },
1107                Some(Hunk::OnlyTheirs {
1108                    base: b2,
1109                    theirs: t2,
1110                }),
1111            ) if b1.len() == 1 && b2.len() == 1 && o1.len() == 1 && t2.len() == 1 => {
1112                Some(Hunk::Conflict {
1113                    base: vec![b1[0].clone(), b2[0].clone()],
1114                    ours: vec![o1[0].clone(), b2[0].clone()],
1115                    theirs: vec![b1[0].clone(), t2[0].clone()],
1116                })
1117            }
1118            _ => None,
1119        };
1120        if let Some(h) = merged {
1121            out.push(h);
1122            i += 2;
1123        } else {
1124            out.push(hunks[i].clone());
1125            i += 1;
1126        }
1127    }
1128    out
1129}
1130
1131fn merge_adjacent_replace_and_trailing_insert_conflicts(hunks: Vec<Hunk>) -> Vec<Hunk> {
1132    let mut out: Vec<Hunk> = Vec::with_capacity(hunks.len());
1133    let mut i = 0usize;
1134    while i < hunks.len() {
1135        let merged = match (&hunks[i], hunks.get(i + 1)) {
1136            (Hunk::OnlyTheirs { base, theirs }, Some(Hunk::OnlyOurs { base: bo, ours: o }))
1137                if !base.is_empty() && bo.is_empty() && !o.is_empty() && !theirs.is_empty() =>
1138            {
1139                Some(Hunk::Conflict {
1140                    base: base.clone(),
1141                    ours: o.clone(),
1142                    theirs: theirs.clone(),
1143                })
1144            }
1145            (
1146                Hunk::OnlyOurs { base, ours },
1147                Some(Hunk::OnlyTheirs {
1148                    base: bt,
1149                    theirs: t,
1150                }),
1151            ) if !base.is_empty() && bt.is_empty() && !t.is_empty() && !ours.is_empty() => {
1152                Some(Hunk::Conflict {
1153                    base: base.clone(),
1154                    ours: ours.clone(),
1155                    theirs: t.clone(),
1156                })
1157            }
1158            // A pure insertion by one side immediately *before* a base line the other side
1159            // modifies. `compute_hunks` emits the leading insert (`Only*` with empty base) then
1160            // the modify (`Only*` with non-empty base) as two separate hunks, but git's
1161            // `xdl_merge` attaches the insertion to the adjacent change region and reports a
1162            // single conflict (e.g. base `d`, ours `c\nd`, theirs `e`). Mirror that here.
1163            (Hunk::OnlyOurs { base: bo, ours }, Some(Hunk::OnlyTheirs { base: bt, theirs }))
1164                if bo.is_empty() && !bt.is_empty() && !ours.is_empty() && !theirs.is_empty() =>
1165            {
1166                let mut conflict_ours = ours.clone();
1167                conflict_ours.extend(bt.iter().cloned());
1168                Some(Hunk::Conflict {
1169                    base: bt.clone(),
1170                    ours: conflict_ours,
1171                    theirs: theirs.clone(),
1172                })
1173            }
1174            (Hunk::OnlyTheirs { base: bt, theirs }, Some(Hunk::OnlyOurs { base: bo, ours }))
1175                if bt.is_empty() && !bo.is_empty() && !theirs.is_empty() && !ours.is_empty() =>
1176            {
1177                let mut conflict_theirs = theirs.clone();
1178                conflict_theirs.extend(bo.iter().cloned());
1179                Some(Hunk::Conflict {
1180                    base: bo.clone(),
1181                    ours: ours.clone(),
1182                    theirs: conflict_theirs,
1183                })
1184            }
1185            _ => None,
1186        };
1187        if let Some(h) = merged {
1188            out.push(h);
1189            i += 2;
1190        } else {
1191            out.push(hunks[i].clone());
1192            i += 1;
1193        }
1194    }
1195    out
1196}
1197
1198/// Fold a one-sided pure insertion (`Only* { base: [] }`) that sits *immediately
1199/// adjacent* to a `Conflict` into that conflict's matching side.
1200///
1201/// Git's `xdl_merge` attaches an insertion to a neighbouring change region when
1202/// they touch (`xdl_append_merge`: `i1 <= m->i1 + m->chg1`). After
1203/// `merge_adjacent_replace_and_trailing_insert_conflicts` has turned a
1204/// leading-insert + modify into a `Conflict`, a *trailing* one-sided insert that
1205/// directly follows the conflict (empty base ⇒ no intervening base line) is part
1206/// of the same region and must be absorbed. This is what makes a previously
1207/// committed conflict (whose `>>>>>>>` marker line is inserted right after the
1208/// contested base line) re-conflict correctly — e.g. t4200 "rerere with inner
1209/// conflict markers", where base `bar`, ours `<<<\nfoo\n===\nbar\n>>>`,
1210/// theirs `baz` must yield a single conflict containing all of `ours` vs `baz`.
1211fn fold_adjacent_inserts_into_conflict(hunks: Vec<Hunk>) -> Vec<Hunk> {
1212    let mut out: Vec<Hunk> = Vec::with_capacity(hunks.len());
1213    for hunk in hunks {
1214        match hunk {
1215            // A pure insertion directly following a conflict joins that conflict's
1216            // matching side.
1217            Hunk::OnlyOurs { ref base, ref ours } if base.is_empty() => {
1218                if let Some(Hunk::Conflict { ours: c_ours, .. }) = out.last_mut() {
1219                    c_ours.extend(ours.iter().cloned());
1220                    continue;
1221                }
1222                out.push(hunk);
1223            }
1224            Hunk::OnlyTheirs {
1225                ref base,
1226                ref theirs,
1227            } if base.is_empty() => {
1228                if let Some(Hunk::Conflict {
1229                    theirs: c_theirs, ..
1230                }) = out.last_mut()
1231                {
1232                    c_theirs.extend(theirs.iter().cloned());
1233                    continue;
1234                }
1235                out.push(hunk);
1236            }
1237            other => out.push(other),
1238        }
1239    }
1240    out
1241}
1242
1243fn coalesce_nearby_conflicts(hunks: Vec<Hunk>, max_gap_lines: usize, enable: bool) -> Vec<Hunk> {
1244    if !enable {
1245        return hunks;
1246    }
1247    let mut out: Vec<Hunk> = Vec::new();
1248    let mut i = 0usize;
1249
1250    while i < hunks.len() {
1251        let Some(Hunk::Conflict { base, ours, theirs }) = hunks.get(i) else {
1252            out.push(hunks[i].clone());
1253            i += 1;
1254            continue;
1255        };
1256
1257        let mut merged_base = base.clone();
1258        let mut merged_ours = ours.clone();
1259        let mut merged_theirs = theirs.clone();
1260        let mut j = i;
1261
1262        loop {
1263            let Some(Hunk::Unchanged(gap)) = hunks.get(j + 1) else {
1264                break;
1265            };
1266            let Some(Hunk::Conflict {
1267                base: next_base,
1268                ours: next_ours,
1269                theirs: next_theirs,
1270            }) = hunks.get(j + 2)
1271            else {
1272                break;
1273            };
1274            if gap.len() > max_gap_lines {
1275                break;
1276            }
1277
1278            merged_base.extend(gap.iter().cloned());
1279            merged_base.extend(next_base.iter().cloned());
1280            merged_ours.extend(gap.iter().cloned());
1281            merged_ours.extend(next_ours.iter().cloned());
1282            merged_theirs.extend(gap.iter().cloned());
1283            merged_theirs.extend(next_theirs.iter().cloned());
1284            j += 2;
1285        }
1286
1287        out.push(Hunk::Conflict {
1288            base: merged_base,
1289            ours: merged_ours,
1290            theirs: merged_theirs,
1291        });
1292        i = j + 1;
1293    }
1294
1295    out
1296}
1297
1298fn push_hunk_with_unchanged_merge(out: &mut Vec<Hunk>, hunk: Hunk) {
1299    match hunk {
1300        Hunk::Unchanged(mut lines) => {
1301            if let Some(Hunk::Unchanged(prev)) = out.last_mut() {
1302                prev.append(&mut lines);
1303            } else if !lines.is_empty() {
1304                out.push(Hunk::Unchanged(lines));
1305            }
1306        }
1307        other => out.push(other),
1308    }
1309}
1310
1311/// Return the maximum `old_range().end` among all Changed ops that overlap
1312/// with `[run_start..current_end)`.  Returns `current_end` if nothing
1313/// extends further.
1314fn furthest_changed_op_end(ops: &[DiffOp], run_start: usize, current_end: usize) -> usize {
1315    let mut max = current_end;
1316    for op in ops {
1317        if op.tag() == DiffTag::Equal {
1318            continue;
1319        }
1320        let old = op.old_range();
1321        if old.start < current_end && old.end > run_start && old.end > max {
1322            max = old.end;
1323        }
1324    }
1325    max
1326}
1327
1328/// Collect new (output) lines for base range `[base_start..base_end)`.
1329///
1330/// For each op whose `old_range` overlaps the range:
1331/// - Equal ops contribute their corresponding new-side lines.
1332/// - Changed ops contribute their full replacement.
1333fn collect_new_lines(
1334    ops: &[DiffOp],
1335    new: &[Vec<u8>],
1336    base_start: usize,
1337    base_end: usize,
1338) -> Vec<Vec<u8>> {
1339    let mut lines = Vec::new();
1340    for op in ops {
1341        let old = op.old_range();
1342        let new_ = op.new_range();
1343        if old.is_empty() {
1344            continue; // pure insertion, handled separately
1345        }
1346        if old.end <= base_start || old.start >= base_end {
1347            continue; // no overlap
1348        }
1349        if op.tag() == DiffTag::Equal {
1350            let overlap_start = base_start.max(old.start);
1351            let overlap_end = base_end.min(old.end);
1352            let offset = overlap_start - old.start;
1353            let len = overlap_end - overlap_start;
1354            for i in offset..offset + len {
1355                if new_.start + i < new_.end {
1356                    lines.push(new[new_.start + i].clone());
1357                }
1358            }
1359        } else {
1360            for i in new_.clone() {
1361                lines.push(new[i].clone());
1362            }
1363        }
1364    }
1365    lines
1366}
1367
1368/// Run Myers diff on two line slices.
1369fn _diff_ops(old: &[Vec<u8>], new: &[Vec<u8>]) -> Vec<DiffOp> {
1370    similar::capture_diff_slices(Algorithm::Myers, old, new)
1371}
1372
1373/// Split a byte slice into lines, each including its trailing `\n` if present.
1374fn split_lines(data: &[u8]) -> Vec<Vec<u8>> {
1375    if data.is_empty() {
1376        return Vec::new();
1377    }
1378    let mut lines = Vec::new();
1379    let mut start = 0;
1380    for i in 0..data.len() {
1381        if data[i] == b'\n' {
1382            lines.push(data[start..=i].to_vec());
1383            start = i + 1;
1384        }
1385    }
1386    if start < data.len() {
1387        lines.push(data[start..].to_vec());
1388    }
1389    lines
1390}
1391
1392#[derive(Clone, Copy, Debug, Default)]
1393struct WhitespaceMode {
1394    ignore_all_space: bool,
1395    ignore_space_change: bool,
1396    ignore_space_at_eol: bool,
1397    ignore_cr_at_eol: bool,
1398}
1399
1400fn normalize_lines_for_compare(lines: &[Vec<u8>], mode: &WhitespaceMode) -> Vec<Vec<u8>> {
1401    lines
1402        .iter()
1403        .map(|line| normalize_line_for_compare(line, mode))
1404        .collect()
1405}
1406
1407fn lines_for_merge_diff_compare(lines: &[Vec<u8>], mode: &WhitespaceMode) -> Vec<Vec<u8>> {
1408    lines
1409        .iter()
1410        .map(|line| normalize_line_for_compare(strip_line_terminator(line), mode))
1411        .collect()
1412}
1413
1414fn normalize_line_for_compare(line: &[u8], mode: &WhitespaceMode) -> Vec<u8> {
1415    let mut bytes = line.to_vec();
1416
1417    if mode.ignore_cr_at_eol && bytes.ends_with(b"\r\n") {
1418        let len = bytes.len();
1419        bytes.remove(len - 2);
1420    }
1421
1422    if mode.ignore_all_space {
1423        return bytes
1424            .into_iter()
1425            .filter(|b| !b.is_ascii_whitespace())
1426            .collect();
1427    }
1428
1429    if mode.ignore_space_change {
1430        let mut out = Vec::with_capacity(bytes.len());
1431        let mut in_ws = false;
1432        for ch in bytes {
1433            if ch.is_ascii_whitespace() {
1434                if !in_ws {
1435                    out.push(b' ');
1436                    in_ws = true;
1437                }
1438            } else {
1439                out.push(ch);
1440                in_ws = false;
1441            }
1442        }
1443        while out.last().is_some_and(|b| b.is_ascii_whitespace()) {
1444            out.pop();
1445        }
1446        return out;
1447    }
1448
1449    if mode.ignore_space_at_eol {
1450        if bytes.last().copied() == Some(b'\n') {
1451            let mut body = bytes[..bytes.len() - 1].to_vec();
1452            while body.last().is_some_and(|b| b.is_ascii_whitespace()) {
1453                body.pop();
1454            }
1455            body.push(b'\n');
1456            bytes = body;
1457        } else {
1458            while bytes.last().is_some_and(|b| b.is_ascii_whitespace()) {
1459                bytes.pop();
1460            }
1461        }
1462    }
1463
1464    bytes
1465}
1466
1467/// Returns `true` if the byte slice appears to be binary content.
1468///
1469/// Uses the same heuristic as git: any NUL byte means binary.
1470#[must_use]
1471pub fn is_binary(data: &[u8]) -> bool {
1472    data.contains(&0u8)
1473}
1474
1475#[cfg(test)]
1476mod tests {
1477    use super::*;
1478
1479    fn do_merge(base: &str, ours: &str, theirs: &str) -> (String, usize) {
1480        let input = MergeInput {
1481            base: base.as_bytes(),
1482            ours: ours.as_bytes(),
1483            theirs: theirs.as_bytes(),
1484            label_ours: "ours",
1485            label_base: "base",
1486            label_theirs: "theirs",
1487            favor: MergeFavor::None,
1488            style: ConflictStyle::Merge,
1489            marker_size: 7,
1490            diff_algorithm: None,
1491            ignore_all_space: false,
1492            ignore_space_change: false,
1493            ignore_space_at_eol: false,
1494            ignore_cr_at_eol: false,
1495        };
1496        let out = merge(&input).unwrap();
1497        (String::from_utf8(out.content).unwrap(), out.conflicts)
1498    }
1499
1500    #[test]
1501    fn no_changes() {
1502        let t = "line1\nline2\nline3\n";
1503        let (r, c) = do_merge(t, t, t);
1504        assert_eq!(r, t);
1505        assert_eq!(c, 0);
1506    }
1507
1508    #[test]
1509    fn only_ours() {
1510        let (r, c) = do_merge("a\nb\nc\n", "a\nB\nc\n", "a\nb\nc\n");
1511        assert_eq!(r, "a\nB\nc\n");
1512        assert_eq!(c, 0);
1513    }
1514
1515    #[test]
1516    fn only_theirs() {
1517        let (r, c) = do_merge("a\nb\nc\n", "a\nb\nc\n", "a\nB\nc\n");
1518        assert_eq!(r, "a\nB\nc\n");
1519        assert_eq!(c, 0);
1520    }
1521
1522    #[test]
1523    fn conflict() {
1524        let (r, c) = do_merge("a\nb\nc\n", "a\nX\nc\n", "a\nY\nc\n");
1525        assert_eq!(c, 1);
1526        assert!(r.contains("<<<<<<< ours\nX\n=======\nY\n>>>>>>> theirs"));
1527    }
1528
1529    #[test]
1530    fn t4108_style_overlapping_edits_conflict() {
1531        let base = "1\n2\n3\n4\n5\n6\n7\n";
1532        let ours = "1\ntwo\n3\n4\n5\nsix\n7\n";
1533        let theirs = "1\n2\n3\n4\nfive\n6\n7\n";
1534        let (_r, c) = do_merge(base, ours, theirs);
1535        assert!(
1536            c >= 1,
1537            "expected at least one conflict region (Git merge reports CONFLICT for this shape)"
1538        );
1539    }
1540
1541    #[test]
1542    fn conflict_delete_vs_change() {
1543        // ours deletes lines 1-2, theirs changes line 1 → conflict
1544        let base = "a\nb\nc\n";
1545        let ours = "c\n"; // deleted a and b
1546        let theirs = "A\nb\nc\n"; // changed a → A
1547        let (r, c) = do_merge(base, ours, theirs);
1548        assert_eq!(c, 1, "expected conflict, got: {r:?}");
1549    }
1550
1551    #[test]
1552    fn conflict_replace_vs_insert_after_same_line() {
1553        // Base one line; ours duplicates it; theirs replaces it — Git reports content conflict.
1554        let base = "hello\n";
1555        let ours = "hello\nhello\n";
1556        let theirs = "remove-conflict\n";
1557        let (r, c) = do_merge(base, ours, theirs);
1558        assert_eq!(c, 1, "expected conflict, got: {r:?}");
1559    }
1560
1561    #[test]
1562    fn conflict_insert_before_modified_line() {
1563        // Base one line; ours inserts a new line *before* it (keeping the base line); theirs
1564        // modifies that base line. Git's `xdl_merge` attaches the leading insertion to the
1565        // adjacent change region and reports a content conflict (t3407 `--abort after
1566        // --continue`: base `d`, ours `c\nd`, theirs `e`).
1567        let base = "d\n";
1568        let ours = "c\nd\n";
1569        let theirs = "e\n";
1570        let (r, c) = do_merge(base, ours, theirs);
1571        assert_eq!(c, 1, "expected conflict, got: {r:?}");
1572        // The ours side of the conflict carries both the inserted and the kept base line.
1573        assert!(r.contains("c\nd\n"), "ours side should keep c\\nd: {r}");
1574        assert!(r.contains("e\n"), "theirs side should be e: {r}");
1575    }
1576
1577    #[test]
1578    fn conflict_insert_before_modified_line_symmetric() {
1579        // Symmetric case: theirs inserts before the base line, ours modifies it.
1580        let base = "d\n";
1581        let ours = "e\n";
1582        let theirs = "c\nd\n";
1583        let (r, c) = do_merge(base, ours, theirs);
1584        assert_eq!(c, 1, "expected conflict, got: {r:?}");
1585    }
1586
1587    #[test]
1588    fn clean_insert_separated_from_modified_line() {
1589        // An insertion separated by unchanged context from the other side's modification must
1590        // still merge cleanly (git only conflicts when the insertion is *adjacent* to the change).
1591        let base = "a\nb\nc\n";
1592        let ours = "x\na\nb\nc\n"; // insert x at the top
1593        let theirs = "a\nb\nC\n"; // modify the last line, two lines away
1594        let (r, c) = do_merge(base, ours, theirs);
1595        assert_eq!(c, 0, "expected clean merge, got: {r:?}");
1596    }
1597
1598    #[test]
1599    fn favor_ours() {
1600        let input = MergeInput {
1601            base: b"a\nb\nc\n",
1602            ours: b"a\nX\nc\n",
1603            theirs: b"a\nY\nc\n",
1604            label_ours: "o",
1605            label_base: "b",
1606            label_theirs: "t",
1607            favor: MergeFavor::Ours,
1608            style: ConflictStyle::Merge,
1609            marker_size: 7,
1610            diff_algorithm: None,
1611            ignore_all_space: false,
1612            ignore_space_change: false,
1613            ignore_space_at_eol: false,
1614            ignore_cr_at_eol: false,
1615        };
1616        let out = merge(&input).unwrap();
1617        assert_eq!(out.content, b"a\nX\nc\n");
1618        assert_eq!(out.conflicts, 0);
1619    }
1620
1621    #[test]
1622    fn union_missing_lf() {
1623        let input = MergeInput {
1624            base: b"line1\nline2\nline3",
1625            ours: b"line1\nline2\nline3x",
1626            theirs: b"line1\nline2\nline3y",
1627            label_ours: "o",
1628            label_base: "b",
1629            label_theirs: "t",
1630            favor: MergeFavor::Union,
1631            style: ConflictStyle::Merge,
1632            marker_size: 7,
1633            diff_algorithm: None,
1634            ignore_all_space: false,
1635            ignore_space_change: false,
1636            ignore_space_at_eol: false,
1637            ignore_cr_at_eol: false,
1638        };
1639        let out = merge(&input).unwrap();
1640        // union: line3x\nline3y (newline inserted between no-LF lines)
1641        assert_eq!(out.content, b"line1\nline2\nline3x\nline3y");
1642    }
1643
1644    #[test]
1645    fn zdiff3_interesting_conflict_shape() {
1646        let input = MergeInput {
1647            base: b"1\n2\n3\n4\n5\n6\n7\n8\n9\n",
1648            ours: b"1\n2\n3\n4\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\n7\n8\n9\n",
1649            theirs: b"1\n2\n3\n4\nA\nB\nC\n5\n6\nG\nH\nI\nJ\n7\n8\n9\n",
1650            label_ours: "HEAD",
1651            label_base: "base",
1652            label_theirs: "right^0",
1653            favor: MergeFavor::None,
1654            style: ConflictStyle::ZealousDiff3,
1655            marker_size: 7,
1656            diff_algorithm: None,
1657            ignore_all_space: false,
1658            ignore_space_change: false,
1659            ignore_space_at_eol: false,
1660            ignore_cr_at_eol: false,
1661        };
1662        let out = merge(&input).unwrap();
1663        let rendered = String::from_utf8(out.content).unwrap();
1664        assert_eq!(out.conflicts, 1, "{rendered}");
1665        assert!(rendered.contains("<<<<<<< HEAD\nD\nE\nF\n"), "{rendered}");
1666    }
1667
1668    #[test]
1669    fn preserves_shared_and_superset_insertions() {
1670        let input = MergeInput {
1671            base: b"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n",
1672            ours: b"1\n2\n3\n4\n5\n5.5\n6\n7\n8\n9\n10\n",
1673            theirs: b"1\n2\n3\n4\n5\n5.5\n6\n7\n8\n9\n10\n10.5\n",
1674            label_ours: "ours",
1675            label_base: "base",
1676            label_theirs: "theirs",
1677            favor: MergeFavor::None,
1678            style: ConflictStyle::Merge,
1679            marker_size: 7,
1680            diff_algorithm: None,
1681            ignore_all_space: false,
1682            ignore_space_change: false,
1683            ignore_space_at_eol: false,
1684            ignore_cr_at_eol: false,
1685        };
1686        let base_lines = split_lines(input.base);
1687        let ours_lines = split_lines(input.ours);
1688        let theirs_lines = split_lines(input.theirs);
1689        let ws_mode = WhitespaceMode::default();
1690        let base_compare_lines = normalize_lines_for_compare(&base_lines, &ws_mode);
1691        let ours_compare_lines = normalize_lines_for_compare(&ours_lines, &ws_mode);
1692        let theirs_compare_lines = normalize_lines_for_compare(&theirs_lines, &ws_mode);
1693        let ours_ops = similar::capture_diff_slices(
1694            Algorithm::Myers,
1695            &base_compare_lines,
1696            &ours_compare_lines,
1697        );
1698        let theirs_ops = similar::capture_diff_slices(
1699            Algorithm::Myers,
1700            &base_compare_lines,
1701            &theirs_compare_lines,
1702        );
1703        let hunks = compute_hunks(
1704            &base_lines,
1705            &ours_lines,
1706            &theirs_lines,
1707            &ours_ops,
1708            &theirs_ops,
1709            &ws_mode,
1710        );
1711        assert_eq!(
1712            hunks.len(),
1713            4,
1714            "expected unchanged shared insertion and theirs-only tail insertion"
1715        );
1716        let out = merge(&input).unwrap();
1717        let rendered = String::from_utf8(out.content).unwrap();
1718        assert_eq!(out.conflicts, 0, "{rendered}");
1719        assert_eq!(
1720            rendered, "1\n2\n3\n4\n5\n5.5\n6\n7\n8\n9\n10\n10.5\n",
1721            "{rendered}"
1722        );
1723    }
1724}