Skip to main content

merge_engine/
diff3.rs

1//! Three-way text merge (diff3 algorithm).
2//!
3//! This is the baseline merge strategy used by git. We implement it from scratch
4//! using the `similar` crate for LCS-based diffing, following the classic diff3
5//! algorithm that partitions the file into stable and unstable regions.
6//!
7//! References:
8//! - Khanna, Kuber, Pierce (2007), "A Formal Investigation of Diff3"
9//! - GNU diff3 implementation
10
11use similar::{ChangeTag, TextDiff};
12
13use crate::types::{Diff3Hunk, MergeResult, MergeScenario};
14
15/// Run a three-way merge on line-level text.
16///
17/// Returns a sequence of hunks, each being either stable (all agree),
18/// left-only change, right-only change, or a conflict.
19pub fn diff3_hunks(scenario: &MergeScenario<&str>) -> Vec<Diff3Hunk> {
20    let base_lines: Vec<&str> = scenario.base.lines().collect();
21
22    // Compute per-base-line edits for left and right
23    let left_edits = compute_edits(scenario.base, scenario.left);
24    let right_edits = compute_edits(scenario.base, scenario.right);
25
26    // Walk base lines and merge both edit sequences
27    let mut hunks = Vec::new();
28    let mut bi = 0;
29    let mut lei = 0; // left edit index
30    let mut rei = 0; // right edit index
31
32    while bi < base_lines.len() || lei < left_edits.len() || rei < right_edits.len() {
33        // Handle insertions before the current base line
34        let left_insert = get_insert_at(&left_edits, &mut lei, bi);
35        let right_insert = get_insert_at(&right_edits, &mut rei, bi);
36
37        if !left_insert.is_empty() || !right_insert.is_empty() {
38            if !left_insert.is_empty() && !right_insert.is_empty() {
39                if left_insert == right_insert {
40                    // Both inserted the same lines
41                    hunks.push(Diff3Hunk::LeftChanged(left_insert));
42                } else {
43                    // Both inserted different lines — conflict
44                    hunks.push(Diff3Hunk::Conflict {
45                        base: vec![],
46                        left: left_insert,
47                        right: right_insert,
48                    });
49                }
50            } else if !left_insert.is_empty() {
51                hunks.push(Diff3Hunk::LeftChanged(left_insert));
52            } else {
53                hunks.push(Diff3Hunk::RightChanged(right_insert));
54            }
55        }
56
57        if bi >= base_lines.len() {
58            break;
59        }
60
61        // What did each side do with this base line?
62        let left_action = get_action_at(&left_edits, &mut lei, bi);
63        let right_action = get_action_at(&right_edits, &mut rei, bi);
64
65        match (&left_action, &right_action) {
66            // Both kept the line unchanged
67            (Edit::Keep, Edit::Keep) => {
68                hunks.push(Diff3Hunk::Stable(vec![base_lines[bi].to_string()]));
69            }
70            // Only left changed (right kept)
71            (Edit::Replace(new_lines), Edit::Keep) => {
72                hunks.push(Diff3Hunk::LeftChanged(new_lines.clone()));
73            }
74            (Edit::Delete, Edit::Keep) => {
75                // Left deleted, right kept — left changed to nothing
76                hunks.push(Diff3Hunk::LeftChanged(vec![]));
77            }
78            // Only right changed (left kept)
79            (Edit::Keep, Edit::Replace(new_lines)) => {
80                hunks.push(Diff3Hunk::RightChanged(new_lines.clone()));
81            }
82            (Edit::Keep, Edit::Delete) => {
83                hunks.push(Diff3Hunk::RightChanged(vec![]));
84            }
85            // Both deleted — stable removal
86            (Edit::Delete, Edit::Delete) => {
87                // Both agree to delete
88            }
89            // Both replaced with same content — convergence
90            (Edit::Replace(left_new), Edit::Replace(right_new)) if left_new == right_new => {
91                hunks.push(Diff3Hunk::LeftChanged(left_new.clone()));
92            }
93            // Both changed differently — conflict
94            (Edit::Replace(left_new), Edit::Replace(right_new)) => {
95                hunks.push(Diff3Hunk::Conflict {
96                    base: vec![base_lines[bi].to_string()],
97                    left: left_new.clone(),
98                    right: right_new.clone(),
99                });
100            }
101            (Edit::Replace(left_new), Edit::Delete) => {
102                hunks.push(Diff3Hunk::Conflict {
103                    base: vec![base_lines[bi].to_string()],
104                    left: left_new.clone(),
105                    right: vec![],
106                });
107            }
108            (Edit::Delete, Edit::Replace(right_new)) => {
109                hunks.push(Diff3Hunk::Conflict {
110                    base: vec![base_lines[bi].to_string()],
111                    left: vec![],
112                    right: right_new.clone(),
113                });
114            }
115        }
116
117        bi += 1;
118    }
119
120    coalesce_hunks(hunks)
121}
122
123/// Perform a full three-way merge, returning a single MergeResult.
124pub fn diff3_merge(scenario: &MergeScenario<&str>) -> MergeResult {
125    let hunks = diff3_hunks(scenario);
126
127    let mut merged = String::new();
128    let mut all_conflict_base = Vec::new();
129    let mut all_conflict_left = Vec::new();
130    let mut all_conflict_right = Vec::new();
131    let mut has_conflict = false;
132
133    for hunk in &hunks {
134        match hunk {
135            Diff3Hunk::Stable(lines)
136            | Diff3Hunk::LeftChanged(lines)
137            | Diff3Hunk::RightChanged(lines) => {
138                for line in lines {
139                    merged.push_str(line);
140                    merged.push('\n');
141                }
142            }
143            Diff3Hunk::Conflict { base, left, right } => {
144                has_conflict = true;
145                all_conflict_base.extend(base.iter().cloned());
146                all_conflict_left.extend(left.iter().cloned());
147                all_conflict_right.extend(right.iter().cloned());
148            }
149        }
150    }
151
152    if has_conflict {
153        MergeResult::Conflict {
154            base: all_conflict_base.join("\n"),
155            left: all_conflict_left.join("\n"),
156            right: all_conflict_right.join("\n"),
157        }
158    } else {
159        MergeResult::Resolved(merged)
160    }
161}
162
163/// Extract all conflict regions from a three-way merge.
164pub fn extract_conflicts(scenario: &MergeScenario<&str>) -> Vec<MergeScenario<String>> {
165    let hunks = diff3_hunks(scenario);
166    hunks
167        .into_iter()
168        .filter_map(|h| match h {
169            Diff3Hunk::Conflict { base, left, right } => Some(MergeScenario::new(
170                base.join("\n"),
171                left.join("\n"),
172                right.join("\n"),
173            )),
174            _ => None,
175        })
176        .collect()
177}
178
179// ──────────────────────────────────────────────────────────────
180// Internal: Edit representation and diff computation
181// ──────────────────────────────────────────────────────────────
182
183/// What happened to a base line in one side of the merge.
184#[derive(Debug, Clone, PartialEq, Eq)]
185enum Edit {
186    Keep,
187    Delete,
188    Replace(Vec<String>),
189}
190
191/// Represents an edit operation at a specific base line position.
192#[derive(Debug, Clone)]
193enum EditOp {
194    /// Inserted lines before base line `before_base_idx`.
195    Insert {
196        before_base_idx: usize,
197        lines: Vec<String>,
198    },
199    /// Base line `base_idx` was kept.
200    Kept { base_idx: usize },
201    /// Base line `base_idx` was deleted.
202    Deleted { base_idx: usize },
203    /// Base line `base_idx` was replaced with `lines`.
204    Replaced { base_idx: usize, lines: Vec<String> },
205}
206
207/// Compute edits from base to target, returning a sequence of EditOps.
208fn compute_edits(base: &str, target: &str) -> Vec<EditOp> {
209    let diff = TextDiff::from_lines(base, target);
210    let mut ops = Vec::new();
211    let mut base_idx: usize = 0;
212    let mut pending_deletes: Vec<usize> = Vec::new();
213    let mut pending_inserts: Vec<String> = Vec::new();
214
215    for change in diff.iter_all_changes() {
216        match change.tag() {
217            ChangeTag::Equal => {
218                flush_pending(
219                    &mut ops,
220                    &mut pending_deletes,
221                    &mut pending_inserts,
222                    base_idx,
223                );
224                ops.push(EditOp::Kept { base_idx });
225                base_idx += 1;
226            }
227            ChangeTag::Delete => {
228                // Flush inserts that came before this delete
229                if !pending_inserts.is_empty() && pending_deletes.is_empty() {
230                    ops.push(EditOp::Insert {
231                        before_base_idx: base_idx,
232                        lines: std::mem::take(&mut pending_inserts),
233                    });
234                }
235                pending_deletes.push(base_idx);
236                base_idx += 1;
237            }
238            ChangeTag::Insert => {
239                pending_inserts.push(change.value().trim_end_matches('\n').to_string());
240            }
241        }
242    }
243
244    let base_line_count = base.lines().count();
245    flush_pending(
246        &mut ops,
247        &mut pending_deletes,
248        &mut pending_inserts,
249        base_line_count,
250    );
251
252    ops
253}
254
255/// Flush pending deletes and inserts into the ops list.
256fn flush_pending(
257    ops: &mut Vec<EditOp>,
258    pending_deletes: &mut Vec<usize>,
259    pending_inserts: &mut Vec<String>,
260    next_base_idx: usize,
261) {
262    if !pending_deletes.is_empty() && !pending_inserts.is_empty() {
263        // Delete+Insert = Replace
264        let first_del = pending_deletes[0];
265        let replacement = std::mem::take(pending_inserts);
266        // First deleted line gets the replacement
267        ops.push(EditOp::Replaced {
268            base_idx: first_del,
269            lines: replacement,
270        });
271        // Remaining deleted lines are just deletes
272        for &del_idx in &pending_deletes[1..] {
273            ops.push(EditOp::Deleted { base_idx: del_idx });
274        }
275        pending_deletes.clear();
276    } else if !pending_deletes.is_empty() {
277        for &del_idx in pending_deletes.iter() {
278            ops.push(EditOp::Deleted { base_idx: del_idx });
279        }
280        pending_deletes.clear();
281    } else if !pending_inserts.is_empty() {
282        ops.push(EditOp::Insert {
283            before_base_idx: next_base_idx,
284            lines: std::mem::take(pending_inserts),
285        });
286    }
287}
288
289/// Get any insertions before `base_idx` from the edit list.
290fn get_insert_at(edits: &[EditOp], edit_idx: &mut usize, base_idx: usize) -> Vec<String> {
291    let mut inserted = Vec::new();
292    while *edit_idx < edits.len() {
293        match &edits[*edit_idx] {
294            EditOp::Insert {
295                before_base_idx,
296                lines,
297            } if *before_base_idx == base_idx => {
298                inserted.extend(lines.iter().cloned());
299                *edit_idx += 1;
300            }
301            _ => break,
302        }
303    }
304    inserted
305}
306
307/// Get the action for `base_idx` from the edit list.
308fn get_action_at(edits: &[EditOp], edit_idx: &mut usize, base_idx: usize) -> Edit {
309    if *edit_idx >= edits.len() {
310        return Edit::Keep;
311    }
312    match &edits[*edit_idx] {
313        EditOp::Kept { base_idx: bi } if *bi == base_idx => {
314            *edit_idx += 1;
315            Edit::Keep
316        }
317        EditOp::Deleted { base_idx: bi } if *bi == base_idx => {
318            *edit_idx += 1;
319            Edit::Delete
320        }
321        EditOp::Replaced {
322            base_idx: bi,
323            lines,
324        } if *bi == base_idx => {
325            let lines = lines.clone();
326            *edit_idx += 1;
327            Edit::Replace(lines)
328        }
329        _ => Edit::Keep,
330    }
331}
332
333fn coalesce_hunks(hunks: Vec<Diff3Hunk>) -> Vec<Diff3Hunk> {
334    let mut result: Vec<Diff3Hunk> = Vec::new();
335    for hunk in hunks {
336        let should_merge = matches!(
337            (&hunk, result.last()),
338            (Diff3Hunk::Stable(_), Some(Diff3Hunk::Stable(_)))
339                | (Diff3Hunk::LeftChanged(_), Some(Diff3Hunk::LeftChanged(_)))
340                | (Diff3Hunk::RightChanged(_), Some(Diff3Hunk::RightChanged(_)))
341                | (Diff3Hunk::Conflict { .. }, Some(Diff3Hunk::Conflict { .. }))
342        );
343        if should_merge {
344            match (result.last_mut().unwrap(), hunk) {
345                (Diff3Hunk::Stable(existing), Diff3Hunk::Stable(new)) => existing.extend(new),
346                (Diff3Hunk::LeftChanged(existing), Diff3Hunk::LeftChanged(new)) => {
347                    existing.extend(new)
348                }
349                (Diff3Hunk::RightChanged(existing), Diff3Hunk::RightChanged(new)) => {
350                    existing.extend(new)
351                }
352                (
353                    Diff3Hunk::Conflict {
354                        base: eb,
355                        left: el,
356                        right: er,
357                    },
358                    Diff3Hunk::Conflict {
359                        base: nb,
360                        left: nl,
361                        right: nr,
362                    },
363                ) => {
364                    eb.extend(nb);
365                    el.extend(nl);
366                    er.extend(nr);
367                }
368                _ => unreachable!(),
369            }
370        } else {
371            result.push(hunk);
372        }
373    }
374    result
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    #[test]
382    fn test_no_conflict() {
383        let base = "line1\nline2\nline3";
384        let left = "line1\nmodified_left\nline3";
385        let right = "line1\nline2\nline3_right";
386        let scenario = MergeScenario::new(base, left, right);
387        let result = diff3_merge(&scenario);
388        assert!(result.is_resolved());
389        if let MergeResult::Resolved(content) = &result {
390            assert!(content.contains("modified_left"));
391            assert!(content.contains("line3_right"));
392        }
393    }
394
395    #[test]
396    fn test_identical_changes() {
397        let base = "line1\nline2";
398        let left = "line1\nchanged";
399        let right = "line1\nchanged";
400        let scenario = MergeScenario::new(base, left, right);
401        let result = diff3_merge(&scenario);
402        assert!(result.is_resolved());
403    }
404
405    #[test]
406    fn test_conflict_detection() {
407        let base = "line1\noriginal\nline3";
408        let left = "line1\nleft_change\nline3";
409        let right = "line1\nright_change\nline3";
410        let scenario = MergeScenario::new(base, left, right);
411        let result = diff3_merge(&scenario);
412        assert!(
413            result.is_conflict(),
414            "expected conflict but got: {:?}",
415            result
416        );
417        if let MergeResult::Conflict { left, right, .. } = &result {
418            assert!(left.contains("left_change"));
419            assert!(right.contains("right_change"));
420        }
421    }
422
423    #[test]
424    fn test_multiple_conflicts() {
425        let base = "a\nb\nc";
426        let left = "x\nb\ny";
427        let right = "p\nb\nq";
428        let scenario = MergeScenario::new(base, left, right);
429        let conflicts = extract_conflicts(&scenario);
430        assert!(!conflicts.is_empty(), "should detect at least one conflict");
431    }
432
433    #[test]
434    fn test_left_only_insert() {
435        let base = "line1\nline3";
436        let left = "line1\nline2\nline3";
437        let right = "line1\nline3";
438        let scenario = MergeScenario::new(base, left, right);
439        let result = diff3_merge(&scenario);
440        assert!(result.is_resolved());
441        if let MergeResult::Resolved(content) = &result {
442            assert!(content.contains("line2"));
443        }
444    }
445
446    #[test]
447    fn test_both_insert_different() {
448        let base = "line1\nline3";
449        let left = "line1\nleft_insert\nline3";
450        let right = "line1\nright_insert\nline3";
451        let scenario = MergeScenario::new(base, left, right);
452        let result = diff3_merge(&scenario);
453        // This should be a conflict since both inserted different content
454        // at the same position
455        assert!(
456            result.is_conflict(),
457            "expected conflict for different insertions at same position, got: {:?}",
458            result
459        );
460    }
461
462    #[test]
463    fn test_delete_vs_modify_conflict() {
464        let base = "keep\nmodify_me\nkeep_too";
465        let left = "keep\nmodified_left\nkeep_too";
466        let right = "keep\nkeep_too";
467        let scenario = MergeScenario::new(base, left, right);
468        let result = diff3_merge(&scenario);
469        assert!(
470            result.is_conflict(),
471            "delete vs modify should conflict, got: {:?}",
472            result
473        );
474    }
475}