Skip to main content

llmtxt_core/
three_way_merge.rs

1//! 3-way merge algorithm for conflict-aware document merging.
2//!
3//! Given a common ancestor (`base`) and two diverged versions (`ours` and
4//! `theirs`), produces a merged document.  Regions modified by only one side
5//! are auto-merged.  Regions modified by both sides simultaneously produce a
6//! [`Conflict`] with standard `<<<<<<<` / `=======` / `>>>>>>>` markers in
7//! the merged output.
8
9use crate::diff::structured_diff_native;
10
11// ── Output types ──────────────────────────────────────────────────────────────
12
13/// Statistics describing the outcome of a 3-way merge.
14#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
15#[serde(rename_all = "camelCase")]
16pub struct MergeStats {
17    /// Total lines in the merged output (including conflict markers).
18    pub total_lines: usize,
19    /// Number of lines accepted without conflict.
20    pub auto_merged_lines: usize,
21    /// Number of distinct conflict regions.
22    pub conflict_count: usize,
23}
24
25/// A single conflicting region where both `ours` and `theirs` diverge from `base`.
26#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
27#[serde(rename_all = "camelCase")]
28pub struct Conflict {
29    /// 1-based start line of the conflicting region in `ours`.
30    pub ours_start: usize,
31    /// 1-based end line of the conflicting region in `ours` (inclusive).
32    pub ours_end: usize,
33    /// 1-based start line of the conflicting region in `theirs`.
34    pub theirs_start: usize,
35    /// 1-based end line of the conflicting region in `theirs` (inclusive).
36    pub theirs_end: usize,
37    /// 1-based start line of the conflicting region in `base`.
38    pub base_start: usize,
39    /// 1-based end line of the conflicting region in `base` (inclusive).
40    pub base_end: usize,
41    /// The text from `ours` in this conflicting region.
42    pub ours_content: String,
43    /// The text from `theirs` in this conflicting region.
44    pub theirs_content: String,
45    /// The original text from `base` in this conflicting region.
46    pub base_content: String,
47}
48
49/// Result of a 3-way merge operation.
50#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
51#[serde(rename_all = "camelCase")]
52pub struct ThreeWayMergeResult {
53    /// The merged document content.
54    ///
55    /// When `has_conflicts` is `true`, conflict regions are delimited with
56    /// standard markers:
57    /// ```text
58    /// <<<<<<< ours
59    /// …our lines…
60    /// =======
61    /// …their lines…
62    /// >>>>>>> theirs
63    /// ```
64    pub merged: String,
65    /// `true` when at least one conflict was detected and could not be
66    /// auto-merged.
67    pub has_conflicts: bool,
68    /// Details of each conflict region.
69    pub conflicts: Vec<Conflict>,
70    /// Summary statistics for the merge.
71    pub stats: MergeStats,
72}
73
74// ── Internal change classification ───────────────────────────────────────────
75
76/// The type of edit a diff operation represents relative to `base`.
77#[derive(Debug, Clone, PartialEq)]
78enum Edit {
79    /// This line is unchanged from `base`.
80    Keep,
81    /// This line was removed (appears in `base` but not in the side).
82    Delete,
83}
84
85/// Side edit data: per-base-line edit classification plus inserted lines.
86struct SideEdits {
87    /// For each base line index (0-based): Keep or Delete.
88    line_edit: Vec<Edit>,
89    /// Lines inserted immediately *before* `base_lines[i]`, keyed by base index.
90    inserts_before: Vec<Vec<String>>,
91    /// Lines inserted after all base lines.
92    inserts_after: Vec<String>,
93}
94
95/// Classify each base line — and record any insertions — for one side of the diff.
96///
97/// `inserts_before[i]` contains lines inserted immediately before `base_lines[i]`.
98/// `inserts_after` contains lines appended after all base lines.
99/// `line_edit[i]` is `Keep` when base line `i` survived or `Delete` when it was removed.
100fn compute_side_edits(base_lines: &[&str], side_lines: &[&str]) -> SideEdits {
101    let n = base_lines.len();
102    let diff = structured_diff_native(&base_lines.join("\n"), &side_lines.join("\n"));
103
104    let mut line_edit: Vec<Edit> = vec![Edit::Keep; n];
105    let mut inserts_before: Vec<Vec<String>> = vec![Vec::new(); n];
106    let mut inserts_after: Vec<String> = Vec::new();
107
108    // Walk the structured diff in forward order.  base_idx tracks which base
109    // line we are currently positioned *before* (0-based, i.e. the next base
110    // line that has not yet been consumed).
111    let mut base_idx: usize = 0;
112
113    for entry in &diff.lines {
114        match entry.line_type.as_str() {
115            "context" => {
116                if let Some(old_line) = entry.old_line {
117                    base_idx = old_line as usize; // advance past this base line (1-based → 0-based next)
118                }
119            }
120            "removed" => {
121                if let Some(old_line) = entry.old_line {
122                    let idx = (old_line as usize) - 1;
123                    line_edit[idx] = Edit::Delete;
124                    base_idx = idx + 1;
125                }
126            }
127            "added" => {
128                if base_idx < n {
129                    inserts_before[base_idx].push(entry.content.clone());
130                } else {
131                    inserts_after.push(entry.content.clone());
132                }
133            }
134            _ => {}
135        }
136    }
137
138    SideEdits {
139        line_edit,
140        inserts_before,
141        inserts_after,
142    }
143}
144
145// ── 3-way merge walk ──────────────────────────────────────────────────────────
146
147/// Perform a 3-way merge of `base`, `ours`, and `theirs`.
148///
149/// Returns a [`ThreeWayMergeResult`] with the merged content and conflict metadata.
150pub fn three_way_merge_native(base: &str, ours: &str, theirs: &str) -> ThreeWayMergeResult {
151    let base_lines: Vec<&str> = base.lines().collect();
152    let ours_lines: Vec<&str> = ours.lines().collect();
153    let theirs_lines: Vec<&str> = theirs.lines().collect();
154
155    let n = base_lines.len();
156
157    // Fast path: identical inputs.
158    if ours == theirs {
159        let total = ours_lines.len();
160        return ThreeWayMergeResult {
161            merged: ours.to_string(),
162            has_conflicts: false,
163            conflicts: vec![],
164            stats: MergeStats {
165                total_lines: total,
166                auto_merged_lines: total,
167                conflict_count: 0,
168            },
169        };
170    }
171
172    let our_edits = compute_side_edits(&base_lines, &ours_lines);
173    let their_edits = compute_side_edits(&base_lines, &theirs_lines);
174
175    let mut merged_lines: Vec<String> = Vec::new();
176    let mut conflicts: Vec<Conflict> = Vec::new();
177    let mut auto_merged_lines: usize = 0;
178
179    // ours_line / theirs_line are 1-based running line numbers in the respective sides.
180    let mut ours_line: usize = 1;
181    let mut theirs_line: usize = 1;
182
183    // For each base line i, handle insertions then the base line itself.
184    #[allow(clippy::needless_range_loop)]
185    for i in 0..n {
186        // ── Insertions before base line i ──────────────────────────────────
187        let our_ins = &our_edits.inserts_before[i];
188        let their_ins = &their_edits.inserts_before[i];
189
190        let ins_conflict = !our_ins.is_empty() && !their_ins.is_empty() && our_ins != their_ins;
191        let only_ours_inserts = !our_ins.is_empty() && their_ins.is_empty();
192        let only_theirs_inserts = our_ins.is_empty() && !their_ins.is_empty();
193        let both_agree_inserts =
194            !our_ins.is_empty() && !their_ins.is_empty() && our_ins == their_ins;
195
196        if ins_conflict {
197            let ours_start = ours_line;
198            let ours_end = ours_line + our_ins.len().saturating_sub(1);
199            let theirs_start = theirs_line;
200            let theirs_end = theirs_line + their_ins.len().saturating_sub(1);
201
202            emit_conflict_markers(&mut merged_lines, our_ins, their_ins);
203
204            conflicts.push(Conflict {
205                ours_start,
206                ours_end,
207                theirs_start,
208                theirs_end,
209                base_start: i + 1,
210                base_end: i + 1,
211                ours_content: our_ins.join("\n"),
212                theirs_content: their_ins.join("\n"),
213                base_content: String::new(),
214            });
215
216            ours_line += our_ins.len();
217            theirs_line += their_ins.len();
218        } else if both_agree_inserts {
219            for line in our_ins {
220                merged_lines.push(line.clone());
221            }
222            auto_merged_lines += our_ins.len();
223            ours_line += our_ins.len();
224            theirs_line += their_ins.len();
225        } else if only_ours_inserts {
226            for line in our_ins {
227                merged_lines.push(line.clone());
228            }
229            auto_merged_lines += our_ins.len();
230            ours_line += our_ins.len();
231        } else if only_theirs_inserts {
232            for line in their_ins {
233                merged_lines.push(line.clone());
234            }
235            auto_merged_lines += their_ins.len();
236            theirs_line += their_ins.len();
237        }
238
239        // ── The base line itself ────────────────────────────────────────────
240        let our_edit = &our_edits.line_edit[i];
241        let their_edit = &their_edits.line_edit[i];
242
243        match (our_edit, their_edit) {
244            // Both kept — emit base line unchanged.
245            (Edit::Keep, Edit::Keep) => {
246                merged_lines.push(base_lines[i].to_string());
247                auto_merged_lines += 1;
248                ours_line += 1;
249                theirs_line += 1;
250            }
251
252            // Both deleted — silently omit.
253            (Edit::Delete, Edit::Delete) => {
254                // Base line removed by both sides — skip it.
255                // Neither ours_line nor theirs_line advances (line is gone from both).
256            }
257
258            // Only ours deleted — accept the deletion (one-sided change wins).
259            (Edit::Delete, Edit::Keep) => {
260                theirs_line += 1;
261            }
262
263            // Only theirs deleted — accept the deletion.
264            (Edit::Keep, Edit::Delete) => {
265                ours_line += 1;
266            }
267        }
268    }
269
270    // ── Insertions after all base lines ────────────────────────────────────
271    let our_tail = &our_edits.inserts_after;
272    let their_tail = &their_edits.inserts_after;
273
274    let tail_conflict = !our_tail.is_empty() && !their_tail.is_empty() && our_tail != their_tail;
275
276    if tail_conflict {
277        let ours_start = ours_line;
278        let ours_end = ours_line + our_tail.len().saturating_sub(1);
279        let theirs_start = theirs_line;
280        let theirs_end = theirs_line + their_tail.len().saturating_sub(1);
281
282        emit_conflict_markers(&mut merged_lines, our_tail, their_tail);
283
284        conflicts.push(Conflict {
285            ours_start,
286            ours_end,
287            theirs_start,
288            theirs_end,
289            base_start: n + 1,
290            base_end: n + 1,
291            ours_content: our_tail.join("\n"),
292            theirs_content: their_tail.join("\n"),
293            base_content: String::new(),
294        });
295    } else if !our_tail.is_empty() && their_tail.is_empty() {
296        for line in our_tail {
297            merged_lines.push(line.clone());
298        }
299        auto_merged_lines += our_tail.len();
300    } else if our_tail.is_empty() && !their_tail.is_empty() {
301        for line in their_tail {
302            merged_lines.push(line.clone());
303        }
304        auto_merged_lines += their_tail.len();
305    } else if !our_tail.is_empty() {
306        // Both non-empty and equal — emit once.
307        for line in our_tail {
308            merged_lines.push(line.clone());
309        }
310        auto_merged_lines += our_tail.len();
311    }
312
313    let total_lines = merged_lines.len();
314    let conflict_count = conflicts.len();
315    let has_conflicts = conflict_count > 0;
316
317    ThreeWayMergeResult {
318        merged: merged_lines.join("\n"),
319        has_conflicts,
320        conflicts,
321        stats: MergeStats {
322            total_lines,
323            auto_merged_lines,
324            conflict_count,
325        },
326    }
327}
328
329/// Append standard conflict marker block to `out`.
330fn emit_conflict_markers(out: &mut Vec<String>, ours: &[String], theirs: &[String]) {
331    out.push("<<<<<<< ours".to_string());
332    for line in ours {
333        out.push(line.clone());
334    }
335    out.push("=======".to_string());
336    for line in theirs {
337        out.push(line.clone());
338    }
339    out.push(">>>>>>> theirs".to_string());
340}
341
342// ── Public WASM-friendly entry points ────────────────────────────────────────
343
344/// JSON-serializing wrapper around [`three_way_merge_native`].
345///
346/// Returns a JSON-serialized [`ThreeWayMergeResult`] on success, or a
347/// JSON error object on serialization failure.
348pub fn three_way_merge(base: &str, ours: &str, theirs: &str) -> String {
349    let result = three_way_merge_native(base, ours, theirs);
350    serde_json::to_string(&result)
351        .unwrap_or_else(|e| format!(r#"{{"error":"three_way_merge serialization failed: {e}"}}"#))
352}
353
354// ── Tests ─────────────────────────────────────────────────────────────────────
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359
360    fn merge(base: &str, ours: &str, theirs: &str) -> ThreeWayMergeResult {
361        three_way_merge_native(base, ours, theirs)
362    }
363
364    fn lines(s: &str) -> Vec<&str> {
365        s.lines().collect()
366    }
367
368    // ── clean merge: no conflicts ─────────────────────────────────────────────
369
370    #[test]
371    fn test_clean_merge_different_lines_modified() {
372        // ours modifies line 2, theirs modifies line 3 — no overlap.
373        let base = "A\nB\nC";
374        let ours = "A\nB_modified\nC";
375        let theirs = "A\nB\nC_modified";
376        let result = merge(base, ours, theirs);
377        assert!(!result.has_conflicts, "Expected no conflict");
378        assert_eq!(result.conflicts.len(), 0);
379        let m = lines(&result.merged);
380        assert!(m.contains(&"A"));
381        assert!(m.contains(&"B_modified"));
382        assert!(m.contains(&"C_modified"));
383    }
384
385    #[test]
386    fn test_clean_merge_ours_adds_theirs_unchanged() {
387        // ours inserts a new line before base line 2; theirs is identical to base.
388        let base = "line1\nline3";
389        let ours = "line1\nline2\nline3";
390        let theirs = "line1\nline3";
391        let result = merge(base, ours, theirs);
392        assert!(!result.has_conflicts, "Expected no conflict");
393        let m = lines(&result.merged);
394        assert_eq!(m, vec!["line1", "line2", "line3"]);
395    }
396
397    #[test]
398    fn test_clean_merge_theirs_adds_ours_unchanged() {
399        let base = "line1\nline3";
400        let ours = "line1\nline3";
401        let theirs = "line1\nline2\nline3";
402        let result = merge(base, ours, theirs);
403        assert!(!result.has_conflicts, "Expected no conflict");
404        let m = lines(&result.merged);
405        assert_eq!(m, vec!["line1", "line2", "line3"]);
406    }
407
408    // ── conflict: both modify same line ───────────────────────────────────────
409
410    #[test]
411    fn test_conflict_both_modify_same_line() {
412        let base = "line1\nshared_line\nline3";
413        let ours = "line1\nours_version\nline3";
414        let theirs = "line1\ntheirs_version\nline3";
415        let result = merge(base, ours, theirs);
416        assert!(result.has_conflicts);
417        assert_eq!(result.conflicts.len(), 1);
418        let merged = &result.merged;
419        assert!(merged.contains("<<<<<<< ours"));
420        assert!(merged.contains("ours_version"));
421        assert!(merged.contains("======="));
422        assert!(merged.contains("theirs_version"));
423        assert!(merged.contains(">>>>>>> theirs"));
424    }
425
426    // ── one-sided edit ────────────────────────────────────────────────────────
427
428    #[test]
429    fn test_one_sided_edit_ours_only() {
430        let base = "A\nB\nC";
431        let ours = "A\nB_ours\nC";
432        let theirs = "A\nB\nC"; // unchanged
433        let result = merge(base, ours, theirs);
434        assert!(!result.has_conflicts);
435        let m = lines(&result.merged);
436        assert!(m.contains(&"B_ours"), "ours change should be accepted");
437    }
438
439    #[test]
440    fn test_one_sided_edit_theirs_only() {
441        let base = "A\nB\nC";
442        let ours = "A\nB\nC"; // unchanged
443        let theirs = "A\nB_theirs\nC";
444        let result = merge(base, ours, theirs);
445        assert!(!result.has_conflicts);
446        let m = lines(&result.merged);
447        assert!(m.contains(&"B_theirs"), "theirs change should be accepted");
448    }
449
450    // ── empty base ────────────────────────────────────────────────────────────
451
452    #[test]
453    fn test_empty_base_both_add_same() {
454        let base = "";
455        let ours = "new content";
456        let theirs = "new content";
457        let result = merge(base, ours, theirs);
458        // Identical inputs → fast path, no conflict.
459        assert!(!result.has_conflicts);
460        assert_eq!(result.merged, "new content");
461    }
462
463    #[test]
464    fn test_empty_base_both_add_different() {
465        let base = "";
466        let ours = "ours content";
467        let theirs = "theirs content";
468        let result = merge(base, ours, theirs);
469        // Both add different content to empty base — conflict.
470        assert!(result.has_conflicts);
471        assert!(result.merged.contains("<<<<<<< ours"));
472        assert!(result.merged.contains("ours content"));
473        assert!(result.merged.contains("theirs content"));
474    }
475
476    // ── deletion + modification conflict ──────────────────────────────────────
477
478    #[test]
479    fn test_both_delete_same_line() {
480        let base = "keep\ndelete_me\nkeep2";
481        let ours = "keep\nkeep2";
482        let theirs = "keep\nkeep2";
483        let result = merge(base, ours, theirs);
484        assert!(!result.has_conflicts);
485        let m = lines(&result.merged);
486        assert!(!m.contains(&"delete_me"), "deleted line must not appear");
487        assert_eq!(m, vec!["keep", "keep2"]);
488    }
489
490    #[test]
491    fn test_ours_deletes_theirs_keeps() {
492        let base = "A\nB\nC";
493        let ours = "A\nC"; // deleted B
494        let theirs = "A\nB\nC"; // kept B
495        let result = merge(base, ours, theirs);
496        // One-sided deletion: ours wins (auto-merge).
497        assert!(!result.has_conflicts);
498        let m = lines(&result.merged);
499        assert!(
500            !m.contains(&"B"),
501            "B was deleted by ours and should not appear"
502        );
503    }
504
505    // ── multiple conflicts ────────────────────────────────────────────────────
506
507    #[test]
508    fn test_multiple_conflicts() {
509        let base = "A\nB\nC\nD\nE";
510        let ours = "A\nB_ours\nC\nD_ours\nE";
511        let theirs = "A\nB_theirs\nC\nD_theirs\nE";
512        let result = merge(base, ours, theirs);
513        assert!(result.has_conflicts);
514        assert_eq!(result.conflicts.len(), 2, "Expected 2 conflicts");
515        let marker_count = result.merged.matches("<<<<<<< ours").count();
516        assert_eq!(marker_count, 2);
517    }
518
519    // ── conflict markers are properly formatted ───────────────────────────────
520
521    #[test]
522    fn test_conflict_marker_format() {
523        let base = "x";
524        let ours = "x_ours";
525        let theirs = "x_theirs";
526        let result = merge(base, ours, theirs);
527        assert!(result.has_conflicts);
528        let m = &result.merged;
529        let ours_pos = m.find("<<<<<<< ours").expect("missing ours marker");
530        let sep_pos = m.find("=======").expect("missing separator");
531        let theirs_pos = m.find(">>>>>>> theirs").expect("missing theirs marker");
532        assert!(ours_pos < sep_pos, "ours marker must precede separator");
533        assert!(sep_pos < theirs_pos, "separator must precede theirs marker");
534    }
535
536    // ── stats accuracy ────────────────────────────────────────────────────────
537
538    #[test]
539    fn test_stats_no_conflict() {
540        let base = "A\nB\nC";
541        let ours = "A\nB_ours\nC";
542        let theirs = "A\nB\nC_theirs";
543        let result = merge(base, ours, theirs);
544        assert_eq!(result.stats.conflict_count, 0);
545        assert!(result.stats.auto_merged_lines > 0);
546        assert_eq!(result.stats.total_lines, result.merged.lines().count());
547    }
548
549    #[test]
550    fn test_stats_with_conflict() {
551        let base = "A\nB\nC";
552        let ours = "A\nB_ours\nC";
553        let theirs = "A\nB_theirs\nC";
554        let result = merge(base, ours, theirs);
555        assert_eq!(result.stats.conflict_count, 1);
556        assert_eq!(result.stats.total_lines, result.merged.lines().count());
557    }
558
559    // ── JSON serialization ────────────────────────────────────────────────────
560
561    #[test]
562    fn test_json_output_shape() {
563        let base = "a\nb";
564        let ours = "a\nb_ours";
565        let theirs = "a\nb_theirs";
566        let json = three_way_merge(base, ours, theirs);
567        let parsed: serde_json::Value = serde_json::from_str(&json).unwrap();
568        assert!(parsed["merged"].is_string());
569        assert!(parsed["hasConflicts"].is_boolean());
570        assert!(parsed["conflicts"].is_array());
571        assert!(parsed["stats"]["conflictCount"].is_number());
572        assert!(parsed["stats"]["autoMergedLines"].is_number());
573        assert!(parsed["stats"]["totalLines"].is_number());
574    }
575
576    // ── identical inputs fast path ────────────────────────────────────────────
577
578    #[test]
579    fn test_identical_ours_and_theirs() {
580        let base = "A\nB\nC";
581        let ours = "A\nX\nC";
582        let theirs = "A\nX\nC"; // same as ours
583        let result = merge(base, ours, theirs);
584        assert!(!result.has_conflicts);
585        assert_eq!(result.merged, ours);
586    }
587
588    // ── insertion conflict at same position ───────────────────────────────────
589
590    #[test]
591    fn test_insertion_conflict_same_position() {
592        // Both insert before line 2 (C), but with different content.
593        let base = "A\nC";
594        let ours = "A\nB_ours\nC";
595        let theirs = "A\nB_theirs\nC";
596        let result = merge(base, ours, theirs);
597        assert!(result.has_conflicts);
598        assert!(result.merged.contains("B_ours"));
599        assert!(result.merged.contains("B_theirs"));
600    }
601}