Skip to main content

grit_lib/
merge_file.rs

1//! Three-way file merge — the engine behind `grit merge-file`.
2//!
3//! Performs a line-level three-way merge of *base*, *ours*, and *theirs*
4//! using the Myers diff algorithm (via the `similar` crate).
5//!
6//! # Algorithm
7//!
8//! 1. Split each file into lines, preserving line endings.
9//! 2. Annotate each base line with its diff status in `ours` and `theirs`.
10//! 3. For each non-Unchanged position, expand the region to fully cover every
11//!    overlapping Changed op from either side, then classify as
12//!    OnlyOurs / OnlyTheirs / Conflict based on which sides are changed.
13//! 4. Emit each hunk: unchanged → base; single-side → that side's content;
14//!    two-side → conflict markers (or resolved via `favor`).
15//!
16//! Pure insertions (ops with empty old_range) are attached to the adjacent
17//! base position and emitted as OnlyOurs / OnlyTheirs hunks.
18
19use crate::error::Result;
20use similar::{Algorithm, DiffOp, DiffTag};
21
22/// How conflict regions should be resolved.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
24pub enum MergeFavor {
25    /// Leave conflict markers in the output (default).
26    #[default]
27    None,
28    /// For conflicts, keep our version.
29    Ours,
30    /// For conflicts, keep their version.
31    Theirs,
32    /// For conflicts, concatenate both versions.
33    Union,
34}
35
36/// Conflict-marker output style.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
38pub enum ConflictStyle {
39    /// Standard two-section markers (`<<<<<<<`, `=======`, `>>>>>>>`).
40    #[default]
41    Merge,
42    /// Three-section markers including the base section.
43    Diff3,
44    /// Zealous diff3 (same as Diff3 for our purposes).
45    ZealousDiff3,
46}
47
48/// Input and options for a three-way merge.
49pub struct MergeInput<'a> {
50    /// Base (ancestor) content.
51    pub base: &'a [u8],
52    /// Our version of the file.
53    pub ours: &'a [u8],
54    /// Their version of the file.
55    pub theirs: &'a [u8],
56    /// Label for the ours conflict marker line.
57    pub label_ours: &'a str,
58    /// Label for the base conflict marker line (diff3 only).
59    pub label_base: &'a str,
60    /// Label for the theirs conflict marker line.
61    pub label_theirs: &'a str,
62    /// Conflict resolution strategy.
63    pub favor: MergeFavor,
64    /// Conflict marker style.
65    pub style: ConflictStyle,
66    /// Width of conflict markers in characters (0 → use default of 7).
67    pub marker_size: usize,
68    /// Diff algorithm.
69    pub diff_algorithm: Option<String>,
70    /// Ignore all whitespace when comparing lines (`-w`).
71    pub ignore_all_space: bool,
72    /// Ignore changes in amount of whitespace (`-b`).
73    pub ignore_space_change: bool,
74    /// Ignore whitespace at end of line.
75    pub ignore_space_at_eol: bool,
76    /// Ignore CR at end of line.
77    pub ignore_cr_at_eol: bool,
78}
79
80/// Result of a three-way merge.
81pub struct MergeOutput {
82    /// Merged file content.
83    pub content: Vec<u8>,
84    /// Number of conflict regions (0 = clean merge).
85    pub conflicts: usize,
86}
87
88/// Perform a three-way line-level merge.
89///
90/// # Errors
91///
92/// Currently infallible but returns `Result` for future extension.
93pub fn merge(input: &MergeInput<'_>) -> Result<MergeOutput> {
94    let base_lines = split_lines(input.base);
95    let ours_lines = split_lines(input.ours);
96    let theirs_lines = split_lines(input.theirs);
97    let ws_mode = WhitespaceMode {
98        ignore_all_space: input.ignore_all_space,
99        ignore_space_change: input.ignore_space_change,
100        ignore_space_at_eol: input.ignore_space_at_eol,
101        ignore_cr_at_eol: input.ignore_cr_at_eol,
102    };
103    let base_compare_lines = normalize_lines_for_compare(&base_lines, &ws_mode);
104    let ours_compare_lines = normalize_lines_for_compare(&ours_lines, &ws_mode);
105    let theirs_compare_lines = normalize_lines_for_compare(&theirs_lines, &ws_mode);
106
107    let algo = input
108        .diff_algorithm
109        .as_deref()
110        .map(|name| match name.to_lowercase().as_str() {
111            "histogram" | "patience" => similar::Algorithm::Patience,
112            _ => similar::Algorithm::Myers,
113        })
114        .unwrap_or(similar::Algorithm::Myers);
115    let ours_ops = similar::capture_diff_slices(algo, &base_compare_lines, &ours_compare_lines);
116    let theirs_ops = similar::capture_diff_slices(algo, &base_compare_lines, &theirs_compare_lines);
117
118    let mut hunks = compute_hunks(
119        &base_lines,
120        &ours_lines,
121        &theirs_lines,
122        &ours_ops,
123        &theirs_ops,
124        &ws_mode,
125    );
126    // Zealous diff3 (`adjust_zealous_hunks`) assumes the raw `compute_hunks` sequence.
127    if !matches!(input.style, ConflictStyle::ZealousDiff3) {
128        hunks = merge_adjacent_replace_and_trailing_insert_conflicts(hunks);
129    }
130    // Git keeps adjacent conflict regions separate when identical lines appear
131    // between them (e.g. t4200-rerere); do not merge Conflict+gap+Conflict.
132    hunks = coalesce_nearby_conflicts(hunks, 3, false);
133    if matches!(input.style, ConflictStyle::ZealousDiff3) {
134        hunks = adjust_zealous_hunks(hunks);
135    }
136
137    let marker = if input.marker_size == 0 {
138        7
139    } else {
140        input.marker_size
141    };
142
143    let mut content: Vec<u8> = Vec::new();
144    let mut conflicts = 0usize;
145
146    for (idx, hunk) in hunks.iter().enumerate() {
147        match hunk {
148            Hunk::Unchanged(lines) => append_lines(&mut content, lines),
149            Hunk::OnlyOurs { ours, .. } => append_lines(&mut content, ours),
150            Hunk::OnlyTheirs { theirs, .. } => append_lines(&mut content, theirs),
151            Hunk::Conflict { base, ours, theirs } => {
152                match input.favor {
153                    MergeFavor::Ours => {
154                        append_lines(&mut content, ours);
155                    }
156                    MergeFavor::Theirs => {
157                        append_lines(&mut content, theirs);
158                    }
159                    MergeFavor::Union => {
160                        append_lines(&mut content, ours);
161                        // If the ours portion doesn't end with \n and theirs is
162                        // non-empty, insert a newline so both sections appear as
163                        // separate lines (matches git's missing-LF handling).
164                        if !theirs.is_empty()
165                            && !ours.is_empty()
166                            && !ours.last().map(|l| l.ends_with(b"\n")).unwrap_or(false)
167                        {
168                            content.push(b'\n');
169                        }
170                        append_lines(&mut content, theirs);
171                    }
172                    MergeFavor::None => {
173                        conflicts += 1;
174                        if matches!(input.style, ConflictStyle::ZealousDiff3) {
175                            let (mut prefix_len, mut suffix_len) =
176                                common_prefix_suffix(ours, theirs);
177
178                            if prefix_len > 0
179                                && idx > 0
180                                && hunk_output_lines(&hunks[idx - 1])
181                                    .map(|lines| lines_end_with(lines, &ours[..prefix_len]))
182                                    .unwrap_or(false)
183                            {
184                                prefix_len = 0;
185                            }
186
187                            if suffix_len > 0
188                                && idx + 1 < hunks.len()
189                                && hunk_output_lines(&hunks[idx + 1])
190                                    .map(|lines| {
191                                        lines_start_with(lines, &ours[ours.len() - suffix_len..])
192                                    })
193                                    .unwrap_or(false)
194                            {
195                                suffix_len = 0;
196                            }
197
198                            if prefix_len > 0 {
199                                append_lines(&mut content, &ours[..prefix_len]);
200                            }
201                            emit_conflict(
202                                &mut content,
203                                base,
204                                &ours[prefix_len..ours.len() - suffix_len],
205                                &theirs[prefix_len..theirs.len() - suffix_len],
206                                input.label_ours,
207                                input.label_base,
208                                input.label_theirs,
209                                input.style,
210                                marker,
211                            );
212                            if suffix_len > 0 {
213                                append_lines(&mut content, &ours[ours.len() - suffix_len..]);
214                            }
215                        } else if matches!(input.style, ConflictStyle::Merge) {
216                            let (prefix_len, suffix_len) = common_prefix_suffix(ours, theirs);
217                            let pre = &ours[..prefix_len];
218                            let suf_start = ours.len().saturating_sub(suffix_len);
219                            let o_mid = &ours[prefix_len..suf_start];
220                            let t_mid =
221                                &theirs[prefix_len..theirs.len().saturating_sub(suffix_len)];
222                            let suf = &ours[suf_start..];
223                            append_lines(&mut content, pre);
224                            emit_conflict(
225                                &mut content,
226                                base,
227                                o_mid,
228                                t_mid,
229                                input.label_ours,
230                                input.label_base,
231                                input.label_theirs,
232                                input.style,
233                                marker,
234                            );
235                            append_lines(&mut content, suf);
236                        } else {
237                            emit_conflict(
238                                &mut content,
239                                base,
240                                ours,
241                                theirs,
242                                input.label_ours,
243                                input.label_base,
244                                input.label_theirs,
245                                input.style,
246                                marker,
247                            );
248                        }
249                    }
250                }
251            }
252        }
253    }
254
255    Ok(MergeOutput { content, conflicts })
256}
257
258fn append_lines(out: &mut Vec<u8>, lines: &[Vec<u8>]) {
259    // If previous content ended without a newline and this hunk adds more
260    // content, insert a newline separator to avoid merging lines together.
261    if !out.is_empty() && !out.ends_with(b"\n") && !lines.is_empty() {
262        out.push(b'\n');
263    }
264    for line in lines {
265        out.extend_from_slice(line);
266    }
267}
268
269fn common_prefix_suffix(ours: &[Vec<u8>], theirs: &[Vec<u8>]) -> (usize, usize) {
270    let mut prefix = 0usize;
271    while prefix < ours.len() && prefix < theirs.len() && ours[prefix] == theirs[prefix] {
272        prefix += 1;
273    }
274
275    let mut suffix = 0usize;
276    while suffix < ours.len().saturating_sub(prefix)
277        && suffix < theirs.len().saturating_sub(prefix)
278        && ours[ours.len() - 1 - suffix] == theirs[theirs.len() - 1 - suffix]
279    {
280        suffix += 1;
281    }
282
283    (prefix, suffix)
284}
285
286fn hunk_output_lines(hunk: &Hunk) -> Option<&[Vec<u8>]> {
287    match hunk {
288        Hunk::Unchanged(lines) => Some(lines),
289        Hunk::OnlyOurs { ours, .. } => Some(ours),
290        Hunk::OnlyTheirs { theirs, .. } => Some(theirs),
291        Hunk::Conflict { .. } => None,
292    }
293}
294
295fn lines_end_with(lines: &[Vec<u8>], suffix: &[Vec<u8>]) -> bool {
296    if suffix.is_empty() || suffix.len() > lines.len() {
297        return false;
298    }
299    lines[lines.len() - suffix.len()..] == *suffix
300}
301
302fn lines_start_with(lines: &[Vec<u8>], prefix: &[Vec<u8>]) -> bool {
303    if prefix.is_empty() || prefix.len() > lines.len() {
304        return false;
305    }
306    lines[..prefix.len()] == *prefix
307}
308
309/// Emit conflict markers into `out`.
310#[allow(clippy::too_many_arguments)]
311fn emit_conflict(
312    out: &mut Vec<u8>,
313    base: &[Vec<u8>],
314    ours: &[Vec<u8>],
315    theirs: &[Vec<u8>],
316    label_ours: &str,
317    label_base: &str,
318    label_theirs: &str,
319    style: ConflictStyle,
320    marker: usize,
321) {
322    let open = "<".repeat(marker);
323    let eq = "=".repeat(marker);
324    let close = ">".repeat(marker);
325    let marker_terminator: &[u8] = b"\n";
326
327    // Ensure ours section starts on a new line if the previous content didn't
328    // end with one.
329    if !out.is_empty() && !out.ends_with(b"\n") {
330        out.extend_from_slice(marker_terminator);
331    }
332
333    write_conflict_marker_line(out, &open, Some(label_ours), marker_terminator);
334    for line in ours {
335        out.extend_from_slice(line);
336    }
337    // Ensure separator starts on its own line.
338    if out.last().copied() != Some(b'\n') {
339        out.extend_from_slice(marker_terminator);
340    }
341    match style {
342        ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => {
343            let pipe = "|".repeat(marker);
344            write_conflict_marker_line(out, &pipe, Some(label_base), marker_terminator);
345            for line in base {
346                out.extend_from_slice(line);
347            }
348            if out.last().copied() != Some(b'\n') {
349                out.extend_from_slice(marker_terminator);
350            }
351            write_conflict_marker_line(out, &eq, None, marker_terminator);
352        }
353        ConflictStyle::Merge => {
354            write_conflict_marker_line(out, &eq, None, marker_terminator);
355        }
356    }
357    for line in theirs {
358        out.extend_from_slice(line);
359    }
360    if out.last().copied() != Some(b'\n') {
361        out.extend_from_slice(marker_terminator);
362    }
363    write_conflict_marker_line(out, &close, Some(label_theirs), marker_terminator);
364}
365
366fn write_conflict_marker_line(out: &mut Vec<u8>, marker: &str, label: Option<&str>, eol: &[u8]) {
367    out.extend_from_slice(marker.as_bytes());
368    if let Some(label) = label {
369        out.push(b' ');
370        out.extend_from_slice(label.as_bytes());
371    }
372    out.extend_from_slice(eol);
373}
374
375/// A classified merge region (owns its lines).
376#[derive(Debug, Clone)]
377enum Hunk {
378    /// Lines unchanged by both sides (base content).
379    Unchanged(Vec<Vec<u8>>),
380    /// Lines changed only by ours.
381    OnlyOurs {
382        /// Base lines for the changed region (empty for pure insertions).
383        base: Vec<Vec<u8>>,
384        /// Output lines from ours.
385        ours: Vec<Vec<u8>>,
386    },
387    /// Lines changed only by theirs.
388    OnlyTheirs {
389        /// Base lines for the changed region (empty for pure insertions).
390        base: Vec<Vec<u8>>,
391        /// Output lines from theirs.
392        theirs: Vec<Vec<u8>>,
393    },
394    /// Lines changed by both sides — a conflict.
395    Conflict {
396        base: Vec<Vec<u8>>,
397        ours: Vec<Vec<u8>>,
398        theirs: Vec<Vec<u8>>,
399    },
400}
401
402/// Compute the merge hunks from two diff sequences against the same base.
403///
404/// Uses a position-by-position scan with op-based expansion to ensure that
405/// changed regions spanning multiple lines are not split mid-op.
406fn compute_hunks(
407    base: &[Vec<u8>],
408    ours: &[Vec<u8>],
409    theirs: &[Vec<u8>],
410    ours_ops: &[DiffOp],
411    theirs_ops: &[DiffOp],
412    ws_mode: &WhitespaceMode,
413) -> Vec<Hunk> {
414    let ours_changed = changed_mask(ours_ops, base.len());
415    let theirs_changed = changed_mask(theirs_ops, base.len());
416
417    let ours_inserts = collect_inserts(ours_ops, base.len());
418    let theirs_inserts = collect_inserts(theirs_ops, base.len());
419
420    let mut hunks: Vec<Hunk> = Vec::new();
421    let mut pos = 0usize;
422
423    while pos <= base.len() {
424        // Emit pure insertions before this base position.
425        emit_inserts_at(
426            pos,
427            &ours_inserts,
428            &theirs_inserts,
429            ours,
430            theirs,
431            &mut hunks,
432        );
433
434        if pos == base.len() {
435            break;
436        }
437
438        let o = ours_changed[pos];
439        let t = theirs_changed[pos];
440
441        if !o && !t {
442            // Unchanged run. Stop before a position that has pending insertions
443            // on either side so that insertions are emitted in-order at the
444            // correct base position.
445            let mut end = pos + 1;
446            while end < base.len()
447                && !ours_changed[end]
448                && !theirs_changed[end]
449                && ours_inserts[end].is_empty()
450                && theirs_inserts[end].is_empty()
451            {
452                end += 1;
453            }
454            let unchanged_lines = if ws_mode.ignore_all_space
455                || ws_mode.ignore_space_change
456                || ws_mode.ignore_space_at_eol
457                || ws_mode.ignore_cr_at_eol
458            {
459                &ours[pos..end]
460            } else {
461                &base[pos..end]
462            };
463            hunks.push(Hunk::Unchanged(unchanged_lines.to_vec()));
464            pos = end;
465            continue;
466        }
467
468        // Changed region: expand end until all overlapping Changed ops from
469        // either side are fully consumed.  Repeat until stable.
470        let mut end = pos + 1;
471        loop {
472            let new_end = furthest_changed_op_end(ours_ops, pos, end)
473                .max(furthest_changed_op_end(theirs_ops, pos, end));
474            if new_end <= end {
475                break;
476            }
477            end = new_end;
478        }
479
480        // Classify the full range [pos..end).
481        let any_ours = (pos..end).any(|p| ours_changed[p]);
482        let any_theirs = (pos..end).any(|p| theirs_changed[p]);
483
484        match (any_ours, any_theirs) {
485            (true, false) => {
486                let c = collect_new_lines(ours_ops, ours, pos, end);
487                hunks.push(Hunk::OnlyOurs {
488                    base: base[pos..end].to_vec(),
489                    ours: c,
490                });
491            }
492            (false, true) => {
493                let c = collect_new_lines(theirs_ops, theirs, pos, end);
494                hunks.push(Hunk::OnlyTheirs {
495                    base: base[pos..end].to_vec(),
496                    theirs: c,
497                });
498            }
499            (true, true) => {
500                let o = collect_new_lines(ours_ops, ours, pos, end);
501                let t = collect_new_lines(theirs_ops, theirs, pos, end);
502                if lines_equal_for_compare(&o, &t, ws_mode) {
503                    // Both sides produce the same content — not really a conflict.
504                    hunks.push(Hunk::OnlyOurs {
505                        base: base[pos..end].to_vec(),
506                        ours: o,
507                    });
508                } else {
509                    hunks.push(Hunk::Conflict {
510                        base: base[pos..end].to_vec(),
511                        ours: o,
512                        theirs: t,
513                    });
514                }
515            }
516            (false, false) => {
517                // Should not happen, but treat as unchanged.
518                hunks.push(Hunk::Unchanged(base[pos..end].to_vec()));
519            }
520        }
521
522        pos = end;
523    }
524
525    hunks
526}
527
528/// Build a boolean mask: `mask[i]` is `true` if base line `i` is covered by
529/// a non-Equal op.
530fn changed_mask(ops: &[DiffOp], base_len: usize) -> Vec<bool> {
531    let mut mask = vec![false; base_len];
532    for op in ops {
533        if op.tag() == DiffTag::Equal {
534            continue;
535        }
536        for p in op.old_range() {
537            if p < base_len {
538                mask[p] = true;
539            }
540        }
541    }
542    mask
543}
544
545/// Collect pure insertions (ops with empty old_range) at each base position.
546///
547/// Returns a `Vec` of length `base_len + 1`; entry `i` holds all
548/// `(new_start, new_end)` ranges inserted before base line `i`.
549fn collect_inserts(ops: &[DiffOp], base_len: usize) -> Vec<Vec<(usize, usize)>> {
550    let mut result: Vec<Vec<(usize, usize)>> = vec![Vec::new(); base_len + 1];
551    for op in ops {
552        let old = op.old_range();
553        let new_ = op.new_range();
554        if old.is_empty() && !new_.is_empty() {
555            let pos = old.start.min(base_len);
556            result[pos].push((new_.start, new_.end));
557        }
558    }
559    result
560}
561
562/// Emit hunks for pure insertions at base position `pos`.
563fn emit_inserts_at(
564    pos: usize,
565    ours_inserts: &[Vec<(usize, usize)>],
566    theirs_inserts: &[Vec<(usize, usize)>],
567    ours: &[Vec<u8>],
568    theirs: &[Vec<u8>],
569    hunks: &mut Vec<Hunk>,
570) {
571    let o_ins = &ours_inserts[pos];
572    let t_ins = &theirs_inserts[pos];
573
574    let has_ours = !o_ins.is_empty();
575    let has_theirs = !t_ins.is_empty();
576
577    if has_ours && has_theirs {
578        // Both sides insert at the same position. If they share a common
579        // prefix, emit that prefix as unchanged and only conflict on the
580        // diverging remainder. This preserves superset insertions.
581        let o_lines: Vec<Vec<u8>> = o_ins
582            .iter()
583            .flat_map(|&(s, e)| ours[s..e].to_vec())
584            .collect();
585        let t_lines: Vec<Vec<u8>> = t_ins
586            .iter()
587            .flat_map(|&(s, e)| theirs[s..e].to_vec())
588            .collect();
589
590        let mut common_len = 0usize;
591        while common_len < o_lines.len()
592            && common_len < t_lines.len()
593            && o_lines[common_len] == t_lines[common_len]
594        {
595            common_len += 1;
596        }
597
598        if common_len > 0 {
599            hunks.push(Hunk::Unchanged(o_lines[..common_len].to_vec()));
600        }
601
602        let ours_tail = o_lines[common_len..].to_vec();
603        let theirs_tail = t_lines[common_len..].to_vec();
604
605        let ours_has_extra = !ours_tail.is_empty();
606        let theirs_has_extra = !theirs_tail.is_empty();
607
608        if ours_has_extra && theirs_has_extra {
609            hunks.push(Hunk::Conflict {
610                base: Vec::new(),
611                ours: ours_tail,
612                theirs: theirs_tail,
613            });
614        } else if ours_has_extra {
615            hunks.push(Hunk::OnlyOurs {
616                base: Vec::new(),
617                ours: ours_tail,
618            });
619        } else if theirs_has_extra {
620            hunks.push(Hunk::OnlyTheirs {
621                base: Vec::new(),
622                theirs: theirs_tail,
623            });
624        }
625    } else if has_ours {
626        for &(ns, ne) in o_ins {
627            let lines: Vec<Vec<u8>> = ours[ns..ne].to_vec();
628            if !lines.is_empty() {
629                hunks.push(Hunk::OnlyOurs {
630                    base: Vec::new(),
631                    ours: lines,
632                });
633            }
634        }
635    } else if has_theirs {
636        for &(ns, ne) in t_ins {
637            let lines: Vec<Vec<u8>> = theirs[ns..ne].to_vec();
638            if !lines.is_empty() {
639                hunks.push(Hunk::OnlyTheirs {
640                    base: Vec::new(),
641                    theirs: lines,
642                });
643            }
644        }
645    }
646}
647
648fn adjust_zealous_hunks(hunks: Vec<Hunk>) -> Vec<Hunk> {
649    let mut out: Vec<Hunk> = Vec::new();
650    let mut i = 0usize;
651
652    while i < hunks.len() {
653        let mut consumed = 1usize;
654        let mut transformed: Option<Vec<Hunk>> = None;
655
656        let (pre_insert, mid_idx) = match &hunks[i] {
657            Hunk::OnlyTheirs { base, theirs } if base.is_empty() => {
658                (Some(theirs.as_slice()), i + 1)
659            }
660            _ => (None, i),
661        };
662
663        if let Some(Hunk::OnlyOurs { base, ours }) = hunks.get(mid_idx) {
664            if !base.is_empty() {
665                let post_insert = match hunks.get(mid_idx + 1) {
666                    Some(Hunk::OnlyTheirs { base, theirs }) if base.is_empty() => {
667                        Some(theirs.as_slice())
668                    }
669                    _ => None,
670                };
671
672                let mut prefix_len = 0usize;
673                if let Some(pre) = pre_insert {
674                    if !pre.is_empty() && ours.starts_with(pre) {
675                        prefix_len = pre.len();
676                    }
677                }
678
679                let mut suffix_len = 0usize;
680                if let Some(post) = post_insert {
681                    if !post.is_empty() && ours[prefix_len..].ends_with(post) {
682                        suffix_len = post.len();
683                    }
684                }
685
686                if prefix_len > 0 || suffix_len > 0 {
687                    consumed = if pre_insert.is_some() {
688                        if post_insert.is_some() {
689                            3
690                        } else {
691                            2
692                        }
693                    } else if post_insert.is_some() {
694                        2
695                    } else {
696                        1
697                    };
698
699                    let mut replacement: Vec<Hunk> = Vec::new();
700                    if prefix_len > 0 {
701                        replacement.push(Hunk::Unchanged(ours[..prefix_len].to_vec()));
702                    }
703                    replacement.push(Hunk::Conflict {
704                        base: base.clone(),
705                        ours: ours[prefix_len..ours.len() - suffix_len].to_vec(),
706                        theirs: base.clone(),
707                    });
708                    if suffix_len > 0 {
709                        replacement.push(Hunk::Unchanged(ours[ours.len() - suffix_len..].to_vec()));
710                    }
711                    transformed = Some(replacement);
712                }
713            }
714        }
715
716        if let Some(replacement) = transformed {
717            for h in replacement {
718                push_hunk_with_unchanged_merge(&mut out, h);
719            }
720            i += consumed;
721            continue;
722        }
723
724        push_hunk_with_unchanged_merge(&mut out, hunks[i].clone());
725        i += 1;
726    }
727
728    out
729}
730
731/// If one side replaces base lines and the other appends an insertion immediately after
732/// that base span, `compute_hunks` emits two hunks (`Only*` + trailing insert). Git treats
733/// the combined region as one conflict (e.g. base `hello\n`, ours adds `hello\n`, theirs
734/// replaces with `remove-conflict\n`).
735fn merge_adjacent_replace_and_trailing_insert_conflicts(hunks: Vec<Hunk>) -> Vec<Hunk> {
736    let mut out: Vec<Hunk> = Vec::with_capacity(hunks.len());
737    let mut i = 0usize;
738    while i < hunks.len() {
739        let merged = match (&hunks[i], hunks.get(i + 1)) {
740            (Hunk::OnlyTheirs { base, theirs }, Some(Hunk::OnlyOurs { base: bo, ours: o }))
741                if !base.is_empty() && bo.is_empty() && !o.is_empty() && !theirs.is_empty() =>
742            {
743                Some(Hunk::Conflict {
744                    base: base.clone(),
745                    ours: o.clone(),
746                    theirs: theirs.clone(),
747                })
748            }
749            (
750                Hunk::OnlyOurs { base, ours },
751                Some(Hunk::OnlyTheirs {
752                    base: bt,
753                    theirs: t,
754                }),
755            ) if !base.is_empty() && bt.is_empty() && !t.is_empty() && !ours.is_empty() => {
756                Some(Hunk::Conflict {
757                    base: base.clone(),
758                    ours: ours.clone(),
759                    theirs: t.clone(),
760                })
761            }
762            _ => None,
763        };
764        if let Some(h) = merged {
765            out.push(h);
766            i += 2;
767        } else {
768            out.push(hunks[i].clone());
769            i += 1;
770        }
771    }
772    out
773}
774
775fn coalesce_nearby_conflicts(hunks: Vec<Hunk>, max_gap_lines: usize, enable: bool) -> Vec<Hunk> {
776    if !enable {
777        return hunks;
778    }
779    let mut out: Vec<Hunk> = Vec::new();
780    let mut i = 0usize;
781
782    while i < hunks.len() {
783        let Some(Hunk::Conflict { base, ours, theirs }) = hunks.get(i) else {
784            out.push(hunks[i].clone());
785            i += 1;
786            continue;
787        };
788
789        let mut merged_base = base.clone();
790        let mut merged_ours = ours.clone();
791        let mut merged_theirs = theirs.clone();
792        let mut j = i;
793
794        loop {
795            let Some(Hunk::Unchanged(gap)) = hunks.get(j + 1) else {
796                break;
797            };
798            let Some(Hunk::Conflict {
799                base: next_base,
800                ours: next_ours,
801                theirs: next_theirs,
802            }) = hunks.get(j + 2)
803            else {
804                break;
805            };
806            if gap.len() > max_gap_lines {
807                break;
808            }
809
810            merged_base.extend(gap.iter().cloned());
811            merged_base.extend(next_base.iter().cloned());
812            merged_ours.extend(gap.iter().cloned());
813            merged_ours.extend(next_ours.iter().cloned());
814            merged_theirs.extend(gap.iter().cloned());
815            merged_theirs.extend(next_theirs.iter().cloned());
816            j += 2;
817        }
818
819        out.push(Hunk::Conflict {
820            base: merged_base,
821            ours: merged_ours,
822            theirs: merged_theirs,
823        });
824        i = j + 1;
825    }
826
827    out
828}
829
830fn push_hunk_with_unchanged_merge(out: &mut Vec<Hunk>, hunk: Hunk) {
831    match hunk {
832        Hunk::Unchanged(mut lines) => {
833            if let Some(Hunk::Unchanged(prev)) = out.last_mut() {
834                prev.append(&mut lines);
835            } else if !lines.is_empty() {
836                out.push(Hunk::Unchanged(lines));
837            }
838        }
839        other => out.push(other),
840    }
841}
842
843/// Return the maximum `old_range().end` among all Changed ops that overlap
844/// with `[run_start..current_end)`.  Returns `current_end` if nothing
845/// extends further.
846fn furthest_changed_op_end(ops: &[DiffOp], run_start: usize, current_end: usize) -> usize {
847    let mut max = current_end;
848    for op in ops {
849        if op.tag() == DiffTag::Equal {
850            continue;
851        }
852        let old = op.old_range();
853        if old.start < current_end && old.end > run_start && old.end > max {
854            max = old.end;
855        }
856    }
857    max
858}
859
860/// Collect new (output) lines for base range `[base_start..base_end)`.
861///
862/// For each op whose `old_range` overlaps the range:
863/// - Equal ops contribute their corresponding new-side lines.
864/// - Changed ops contribute their full replacement.
865fn collect_new_lines(
866    ops: &[DiffOp],
867    new: &[Vec<u8>],
868    base_start: usize,
869    base_end: usize,
870) -> Vec<Vec<u8>> {
871    let mut lines = Vec::new();
872    for op in ops {
873        let old = op.old_range();
874        let new_ = op.new_range();
875        if old.is_empty() {
876            continue; // pure insertion, handled separately
877        }
878        if old.end <= base_start || old.start >= base_end {
879            continue; // no overlap
880        }
881        if op.tag() == DiffTag::Equal {
882            let overlap_start = base_start.max(old.start);
883            let overlap_end = base_end.min(old.end);
884            let offset = overlap_start - old.start;
885            let len = overlap_end - overlap_start;
886            for i in offset..offset + len {
887                if new_.start + i < new_.end {
888                    lines.push(new[new_.start + i].clone());
889                }
890            }
891        } else {
892            for i in new_.clone() {
893                lines.push(new[i].clone());
894            }
895        }
896    }
897    lines
898}
899
900/// Run Myers diff on two line slices.
901fn _diff_ops(old: &[Vec<u8>], new: &[Vec<u8>]) -> Vec<DiffOp> {
902    similar::capture_diff_slices(Algorithm::Myers, old, new)
903}
904
905/// Split a byte slice into lines, each including its trailing `\n` if present.
906fn split_lines(data: &[u8]) -> Vec<Vec<u8>> {
907    if data.is_empty() {
908        return Vec::new();
909    }
910    let mut lines = Vec::new();
911    let mut start = 0;
912    for i in 0..data.len() {
913        if data[i] == b'\n' {
914            lines.push(data[start..=i].to_vec());
915            start = i + 1;
916        }
917    }
918    if start < data.len() {
919        lines.push(data[start..].to_vec());
920    }
921    lines
922}
923
924#[derive(Clone, Copy, Debug, Default)]
925struct WhitespaceMode {
926    ignore_all_space: bool,
927    ignore_space_change: bool,
928    ignore_space_at_eol: bool,
929    ignore_cr_at_eol: bool,
930}
931
932fn normalize_lines_for_compare(lines: &[Vec<u8>], mode: &WhitespaceMode) -> Vec<Vec<u8>> {
933    lines
934        .iter()
935        .map(|line| normalize_line_for_compare(line, mode))
936        .collect()
937}
938
939fn normalize_line_for_compare(line: &[u8], mode: &WhitespaceMode) -> Vec<u8> {
940    let mut bytes = line.to_vec();
941
942    if mode.ignore_cr_at_eol && bytes.ends_with(b"\r\n") {
943        let len = bytes.len();
944        bytes.remove(len - 2);
945    }
946
947    if mode.ignore_all_space {
948        return bytes
949            .into_iter()
950            .filter(|b| !b.is_ascii_whitespace())
951            .collect();
952    }
953
954    if mode.ignore_space_change {
955        let mut out = Vec::with_capacity(bytes.len());
956        let mut in_ws = false;
957        for ch in bytes {
958            if ch.is_ascii_whitespace() {
959                if !in_ws {
960                    out.push(b' ');
961                    in_ws = true;
962                }
963            } else {
964                out.push(ch);
965                in_ws = false;
966            }
967        }
968        while out.last().is_some_and(|b| b.is_ascii_whitespace()) {
969            out.pop();
970        }
971        return out;
972    }
973
974    if mode.ignore_space_at_eol {
975        if bytes.last().copied() == Some(b'\n') {
976            let mut body = bytes[..bytes.len() - 1].to_vec();
977            while body.last().is_some_and(|b| b.is_ascii_whitespace()) {
978                body.pop();
979            }
980            body.push(b'\n');
981            bytes = body;
982        } else {
983            while bytes.last().is_some_and(|b| b.is_ascii_whitespace()) {
984                bytes.pop();
985            }
986        }
987    }
988
989    bytes
990}
991
992fn lines_equal_for_compare(left: &[Vec<u8>], right: &[Vec<u8>], mode: &WhitespaceMode) -> bool {
993    if left.len() != right.len() {
994        return false;
995    }
996    left.iter()
997        .zip(right)
998        .all(|(a, b)| normalize_line_for_compare(a, mode) == normalize_line_for_compare(b, mode))
999}
1000
1001/// Returns `true` if the byte slice appears to be binary content.
1002///
1003/// Uses the same heuristic as git: any NUL byte means binary.
1004#[must_use]
1005pub fn is_binary(data: &[u8]) -> bool {
1006    data.contains(&0u8)
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011    use super::*;
1012
1013    fn do_merge(base: &str, ours: &str, theirs: &str) -> (String, usize) {
1014        let input = MergeInput {
1015            base: base.as_bytes(),
1016            ours: ours.as_bytes(),
1017            theirs: theirs.as_bytes(),
1018            label_ours: "ours",
1019            label_base: "base",
1020            label_theirs: "theirs",
1021            favor: MergeFavor::None,
1022            style: ConflictStyle::Merge,
1023            marker_size: 7,
1024            diff_algorithm: None,
1025            ignore_all_space: false,
1026            ignore_space_change: false,
1027            ignore_space_at_eol: false,
1028            ignore_cr_at_eol: false,
1029        };
1030        let out = merge(&input).unwrap();
1031        (String::from_utf8(out.content).unwrap(), out.conflicts)
1032    }
1033
1034    #[test]
1035    fn no_changes() {
1036        let t = "line1\nline2\nline3\n";
1037        let (r, c) = do_merge(t, t, t);
1038        assert_eq!(r, t);
1039        assert_eq!(c, 0);
1040    }
1041
1042    #[test]
1043    fn only_ours() {
1044        let (r, c) = do_merge("a\nb\nc\n", "a\nB\nc\n", "a\nb\nc\n");
1045        assert_eq!(r, "a\nB\nc\n");
1046        assert_eq!(c, 0);
1047    }
1048
1049    #[test]
1050    fn only_theirs() {
1051        let (r, c) = do_merge("a\nb\nc\n", "a\nb\nc\n", "a\nB\nc\n");
1052        assert_eq!(r, "a\nB\nc\n");
1053        assert_eq!(c, 0);
1054    }
1055
1056    #[test]
1057    fn conflict() {
1058        let (r, c) = do_merge("a\nb\nc\n", "a\nX\nc\n", "a\nY\nc\n");
1059        assert_eq!(c, 1);
1060        assert!(r.contains("<<<<<<< ours\nX\n=======\nY\n>>>>>>> theirs"));
1061    }
1062
1063    #[test]
1064    fn conflict_delete_vs_change() {
1065        // ours deletes lines 1-2, theirs changes line 1 → conflict
1066        let base = "a\nb\nc\n";
1067        let ours = "c\n"; // deleted a and b
1068        let theirs = "A\nb\nc\n"; // changed a → A
1069        let (r, c) = do_merge(base, ours, theirs);
1070        assert_eq!(c, 1, "expected conflict, got: {r:?}");
1071    }
1072
1073    #[test]
1074    fn conflict_replace_vs_insert_after_same_line() {
1075        // Base one line; ours duplicates it; theirs replaces it — Git reports content conflict.
1076        let base = "hello\n";
1077        let ours = "hello\nhello\n";
1078        let theirs = "remove-conflict\n";
1079        let (r, c) = do_merge(base, ours, theirs);
1080        assert_eq!(c, 1, "expected conflict, got: {r:?}");
1081    }
1082
1083    #[test]
1084    fn favor_ours() {
1085        let input = MergeInput {
1086            base: b"a\nb\nc\n",
1087            ours: b"a\nX\nc\n",
1088            theirs: b"a\nY\nc\n",
1089            label_ours: "o",
1090            label_base: "b",
1091            label_theirs: "t",
1092            favor: MergeFavor::Ours,
1093            style: ConflictStyle::Merge,
1094            marker_size: 7,
1095            diff_algorithm: None,
1096            ignore_all_space: false,
1097            ignore_space_change: false,
1098            ignore_space_at_eol: false,
1099            ignore_cr_at_eol: false,
1100        };
1101        let out = merge(&input).unwrap();
1102        assert_eq!(out.content, b"a\nX\nc\n");
1103        assert_eq!(out.conflicts, 0);
1104    }
1105
1106    #[test]
1107    fn union_missing_lf() {
1108        let input = MergeInput {
1109            base: b"line1\nline2\nline3",
1110            ours: b"line1\nline2\nline3x",
1111            theirs: b"line1\nline2\nline3y",
1112            label_ours: "o",
1113            label_base: "b",
1114            label_theirs: "t",
1115            favor: MergeFavor::Union,
1116            style: ConflictStyle::Merge,
1117            marker_size: 7,
1118            diff_algorithm: None,
1119            ignore_all_space: false,
1120            ignore_space_change: false,
1121            ignore_space_at_eol: false,
1122            ignore_cr_at_eol: false,
1123        };
1124        let out = merge(&input).unwrap();
1125        // union: line3x\nline3y (newline inserted between no-LF lines)
1126        assert_eq!(out.content, b"line1\nline2\nline3x\nline3y");
1127    }
1128
1129    #[test]
1130    fn zdiff3_interesting_conflict_shape() {
1131        let input = MergeInput {
1132            base: b"1\n2\n3\n4\n5\n6\n7\n8\n9\n",
1133            ours: b"1\n2\n3\n4\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\n7\n8\n9\n",
1134            theirs: b"1\n2\n3\n4\nA\nB\nC\n5\n6\nG\nH\nI\nJ\n7\n8\n9\n",
1135            label_ours: "HEAD",
1136            label_base: "base",
1137            label_theirs: "right^0",
1138            favor: MergeFavor::None,
1139            style: ConflictStyle::ZealousDiff3,
1140            marker_size: 7,
1141            diff_algorithm: None,
1142            ignore_all_space: false,
1143            ignore_space_change: false,
1144            ignore_space_at_eol: false,
1145            ignore_cr_at_eol: false,
1146        };
1147        let out = merge(&input).unwrap();
1148        let rendered = String::from_utf8(out.content).unwrap();
1149        assert_eq!(out.conflicts, 1, "{rendered}");
1150        assert!(rendered.contains("<<<<<<< HEAD\nD\nE\nF\n"), "{rendered}");
1151    }
1152
1153    #[test]
1154    fn preserves_shared_and_superset_insertions() {
1155        let input = MergeInput {
1156            base: b"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n",
1157            ours: b"1\n2\n3\n4\n5\n5.5\n6\n7\n8\n9\n10\n",
1158            theirs: b"1\n2\n3\n4\n5\n5.5\n6\n7\n8\n9\n10\n10.5\n",
1159            label_ours: "ours",
1160            label_base: "base",
1161            label_theirs: "theirs",
1162            favor: MergeFavor::None,
1163            style: ConflictStyle::Merge,
1164            marker_size: 7,
1165            diff_algorithm: None,
1166            ignore_all_space: false,
1167            ignore_space_change: false,
1168            ignore_space_at_eol: false,
1169            ignore_cr_at_eol: false,
1170        };
1171        let base_lines = split_lines(input.base);
1172        let ours_lines = split_lines(input.ours);
1173        let theirs_lines = split_lines(input.theirs);
1174        let ws_mode = WhitespaceMode::default();
1175        let base_compare_lines = normalize_lines_for_compare(&base_lines, &ws_mode);
1176        let ours_compare_lines = normalize_lines_for_compare(&ours_lines, &ws_mode);
1177        let theirs_compare_lines = normalize_lines_for_compare(&theirs_lines, &ws_mode);
1178        let ours_ops = similar::capture_diff_slices(
1179            Algorithm::Myers,
1180            &base_compare_lines,
1181            &ours_compare_lines,
1182        );
1183        let theirs_ops = similar::capture_diff_slices(
1184            Algorithm::Myers,
1185            &base_compare_lines,
1186            &theirs_compare_lines,
1187        );
1188        let hunks = compute_hunks(
1189            &base_lines,
1190            &ours_lines,
1191            &theirs_lines,
1192            &ours_ops,
1193            &theirs_ops,
1194            &ws_mode,
1195        );
1196        assert_eq!(
1197            hunks.len(),
1198            4,
1199            "expected unchanged shared insertion and theirs-only tail insertion"
1200        );
1201        let out = merge(&input).unwrap();
1202        let rendered = String::from_utf8(out.content).unwrap();
1203        assert_eq!(out.conflicts, 0, "{rendered}");
1204        assert_eq!(
1205            rendered, "1\n2\n3\n4\n5\n5.5\n6\n7\n8\n9\n10\n10.5\n",
1206            "{rendered}"
1207        );
1208    }
1209}