Skip to main content

agent_doc/
merge.rs

1//! 3-way merge with append-friendly conflict resolution.
2//!
3//! Uses `git merge-file --diff3` for the base merge, then post-processes
4//! to auto-resolve append-only conflicts (where both sides added content
5//! at the same position without modifying existing lines).
6
7use anyhow::{Context, Result};
8use std::process::Command;
9
10/// CRDT-based merge: conflict-free merge using Yrs CRDT.
11///
12/// Returns (merged_text, new_crdt_state).
13/// `base_state` is the CRDT state from the last write (None on first use).
14pub fn merge_contents_crdt(
15    base_state: Option<&[u8]>,
16    ours: &str,
17    theirs: &str,
18) -> Result<(String, Vec<u8>)> {
19    let merged = crate::crdt::merge(base_state, ours, theirs)
20        .context("CRDT merge failed")?;
21    // Build fresh CRDT state from the merged result
22    let doc = crate::crdt::CrdtDoc::from_text(&merged);
23    let state = doc.encode_state();
24    eprintln!("[write] CRDT merge successful — no conflicts possible.");
25    Ok((merged, state))
26}
27
28/// 3-way merge using `git merge-file --diff3`.
29///
30/// Returns merged content. Append-only conflicts are auto-resolved by
31/// concatenating both additions (ours first, then theirs).
32/// True conflicts (where existing content was modified differently)
33/// retain standard conflict markers.
34pub fn merge_contents(base: &str, ours: &str, theirs: &str) -> Result<String> {
35    let tmp = tempfile::TempDir::new()
36        .context("failed to create temp dir for merge")?;
37
38    let base_path = tmp.path().join("base");
39    let ours_path = tmp.path().join("ours");
40    let theirs_path = tmp.path().join("theirs");
41
42    std::fs::write(&base_path, base)?;
43    std::fs::write(&ours_path, ours)?;
44    std::fs::write(&theirs_path, theirs)?;
45
46    let output = Command::new("git")
47        .current_dir(tmp.path())
48        .args([
49            "merge-file",
50            "-p",
51            "--diff3",
52            "-L", "agent-response",
53            "-L", "original",
54            "-L", "your-edits",
55            &ours_path.to_string_lossy(),
56            &base_path.to_string_lossy(),
57            &theirs_path.to_string_lossy(),
58        ])
59        .output()?;
60
61    let merged = String::from_utf8(output.stdout)
62        .map_err(|e| anyhow::anyhow!("merge produced invalid UTF-8: {}", e))?;
63
64    if output.status.success() {
65        eprintln!("[write] Merge successful — user edits preserved.");
66        return Ok(merged);
67    }
68
69    if output.status.code() == Some(1) {
70        // Conflicts detected — try append-friendly resolution
71        let (resolved, remaining_conflicts) = resolve_append_conflicts(&merged);
72        if remaining_conflicts {
73            eprintln!("[write] WARNING: True merge conflicts remain. Please resolve conflict markers manually.");
74        } else {
75            eprintln!("[write] Merge conflicts auto-resolved (append-friendly).");
76        }
77        return Ok(resolved);
78    }
79
80    anyhow::bail!(
81        "git merge-file failed: {}",
82        String::from_utf8_lossy(&output.stderr)
83    )
84}
85
86/// Resolve append-only conflicts in `git merge-file --diff3` output.
87///
88/// With `--diff3`, conflict blocks look like:
89/// ```text
90/// <<<<<<< agent-response
91/// content added by agent
92/// ||||||| original
93/// (empty if both sides only appended)
94/// =======
95/// content added by user
96/// >>>>>>> your-edits
97/// ```
98///
99/// When the "original" section is empty (both sides added at the same
100/// insertion point without modifying existing content), auto-resolve by
101/// concatenating: ours (agent) first, then theirs (user).
102///
103/// Returns (resolved_content, has_remaining_conflicts).
104fn resolve_append_conflicts(merged: &str) -> (String, bool) {
105    let mut result = String::new();
106    let mut has_remaining = false;
107    let lines: Vec<&str> = merged.lines().collect();
108    let len = lines.len();
109    let mut i = 0;
110
111    while i < len {
112        if !lines[i].starts_with("<<<<<<< ") {
113            result.push_str(lines[i]);
114            result.push('\n');
115            i += 1;
116            continue;
117        }
118
119        // Parse conflict block
120        let conflict_start = i;
121        i += 1; // skip <<<<<<< marker
122
123        // Collect "ours" section
124        let mut ours_lines: Vec<&str> = Vec::new();
125        while i < len && !lines[i].starts_with("||||||| ") && !lines[i].starts_with("=======") {
126            ours_lines.push(lines[i]);
127            i += 1;
128        }
129
130        // Collect "original" section (diff3)
131        let mut original_lines: Vec<&str> = Vec::new();
132        if i < len && lines[i].starts_with("||||||| ") {
133            i += 1; // skip ||||||| marker
134            while i < len && !lines[i].starts_with("=======") {
135                original_lines.push(lines[i]);
136                i += 1;
137            }
138        }
139
140        // Skip ======= marker
141        if i < len && lines[i].starts_with("=======") {
142            i += 1;
143        }
144
145        // Collect "theirs" section
146        let mut theirs_lines: Vec<&str> = Vec::new();
147        while i < len && !lines[i].starts_with(">>>>>>> ") {
148            theirs_lines.push(lines[i]);
149            i += 1;
150        }
151
152        // Skip >>>>>>> marker
153        if i < len && lines[i].starts_with(">>>>>>> ") {
154            i += 1;
155        }
156
157        // Check if append-only: original section is empty or whitespace-only
158        let is_append_only = original_lines.iter().all(|l| l.trim().is_empty());
159
160        if is_append_only {
161            // Auto-resolve: ours (agent) first, then theirs (user)
162            for line in &ours_lines {
163                result.push_str(line);
164                result.push('\n');
165            }
166            for line in &theirs_lines {
167                result.push_str(line);
168                result.push('\n');
169            }
170        } else {
171            // True conflict — preserve markers
172            has_remaining = true;
173            result.push_str(lines[conflict_start]);
174            result.push('\n');
175            for line in &ours_lines {
176                result.push_str(line);
177                result.push('\n');
178            }
179            // Reconstruct ||||||| section
180            if !original_lines.is_empty() {
181                result.push_str("||||||| original\n");
182                for line in &original_lines {
183                    result.push_str(line);
184                    result.push('\n');
185                }
186            }
187            result.push_str("=======\n");
188            for line in &theirs_lines {
189                result.push_str(line);
190                result.push('\n');
191            }
192            result.push_str(">>>>>>> your-edits\n");
193        }
194    }
195
196    // Handle trailing: if original didn't end with newline but we added one
197    if !merged.ends_with('\n') && result.ends_with('\n') {
198        result.pop();
199    }
200
201    (result, has_remaining)
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn resolve_append_only_conflict() {
210        let merged = "\
211Before conflict
212<<<<<<< agent-response
213Agent added this line.
214||||||| original
215=======
216User added this line.
217>>>>>>> your-edits
218After conflict
219";
220        let (resolved, has_remaining) = resolve_append_conflicts(merged);
221        assert!(!has_remaining);
222        assert!(resolved.contains("Agent added this line."));
223        assert!(resolved.contains("User added this line."));
224        assert!(!resolved.contains("<<<<<<<"));
225        assert!(!resolved.contains(">>>>>>>"));
226        // Agent content comes before user content
227        let agent_pos = resolved.find("Agent added this line.").unwrap();
228        let user_pos = resolved.find("User added this line.").unwrap();
229        assert!(agent_pos < user_pos);
230    }
231
232    #[test]
233    fn preserve_true_conflict() {
234        let merged = "\
235<<<<<<< agent-response
236Agent changed this.
237||||||| original
238Original line that both sides modified.
239=======
240User changed this differently.
241>>>>>>> your-edits
242";
243        let (resolved, has_remaining) = resolve_append_conflicts(merged);
244        assert!(has_remaining);
245        assert!(resolved.contains("<<<<<<<"));
246        assert!(resolved.contains(">>>>>>>"));
247        assert!(resolved.contains("Original line that both sides modified."));
248    }
249
250    #[test]
251    fn mixed_append_and_true_conflicts() {
252        let merged = "\
253Clean line.
254<<<<<<< agent-response
255Agent appended here.
256||||||| original
257=======
258User appended here.
259>>>>>>> your-edits
260Middle line.
261<<<<<<< agent-response
262Agent rewrote this.
263||||||| original
264Was originally this.
265=======
266User rewrote this differently.
267>>>>>>> your-edits
268End line.
269";
270        let (resolved, has_remaining) = resolve_append_conflicts(merged);
271        assert!(has_remaining);
272        // Append-only conflict was resolved
273        assert!(resolved.contains("Agent appended here."));
274        assert!(resolved.contains("User appended here."));
275        // True conflict kept markers
276        assert!(resolved.contains("<<<<<<<"));
277        assert!(resolved.contains("Was originally this."));
278    }
279
280    #[test]
281    fn no_conflicts_passthrough() {
282        let merged = "Line one.\nLine two.\nLine three.\n";
283        let (resolved, has_remaining) = resolve_append_conflicts(merged);
284        assert!(!has_remaining);
285        assert_eq!(resolved, merged);
286    }
287
288    #[test]
289    fn multiline_append_conflict() {
290        let merged = "\
291<<<<<<< agent-response
292Agent line 1.
293Agent line 2.
294Agent line 3.
295||||||| original
296=======
297User line 1.
298User line 2.
299>>>>>>> your-edits
300";
301        let (resolved, has_remaining) = resolve_append_conflicts(merged);
302        assert!(!has_remaining);
303        assert!(resolved.contains("Agent line 1.\nAgent line 2.\nAgent line 3.\n"));
304        assert!(resolved.contains("User line 1.\nUser line 2.\n"));
305        // Agent before user
306        assert!(resolved.find("Agent line 1.").unwrap() < resolved.find("User line 1.").unwrap());
307    }
308
309    #[test]
310    fn merge_contents_clean() {
311        let base = "Line 1\nLine 2\n";
312        let ours = "Line 1\nLine 2\nAgent added\n";
313        let theirs = "Line 1\nLine 2\n";
314        let result = merge_contents(base, ours, theirs).unwrap();
315        assert!(result.contains("Agent added"));
316    }
317
318    #[test]
319    fn crdt_merge_agent_and_user_append() {
320        let base = "# Doc\n\nBase content.\n";
321        let ours = "# Doc\n\nBase content.\n\nAgent response.\n";
322        let theirs = "# Doc\n\nBase content.\n\nUser addition.\n";
323
324        let base_doc = crate::crdt::CrdtDoc::from_text(base);
325        let base_state = base_doc.encode_state();
326
327        let (merged, _state) = merge_contents_crdt(Some(&base_state), ours, theirs).unwrap();
328        assert!(merged.contains("Agent response."));
329        assert!(merged.contains("User addition."));
330        assert!(merged.contains("Base content."));
331        assert!(!merged.contains("<<<<<<<"));
332    }
333
334    #[test]
335    fn crdt_merge_concurrent_same_line() {
336        let base = "Line 1\nLine 3\n";
337        let ours = "Line 1\nAgent\nLine 3\n";
338        let theirs = "Line 1\nUser\nLine 3\n";
339
340        let base_doc = crate::crdt::CrdtDoc::from_text(base);
341        let base_state = base_doc.encode_state();
342
343        let (merged, _state) = merge_contents_crdt(Some(&base_state), ours, theirs).unwrap();
344        // Both preserved, deterministic ordering, no conflict
345        assert!(merged.contains("Agent"));
346        assert!(merged.contains("User"));
347        assert!(merged.contains("Line 1"));
348        assert!(merged.contains("Line 3"));
349    }
350
351    #[test]
352    fn crdt_merge_no_base_state_bootstrap() {
353        let ours = "Agent content.\n";
354        let theirs = "User content.\n";
355
356        let (merged, state) = merge_contents_crdt(None, ours, theirs).unwrap();
357        assert!(merged.contains("Agent content."));
358        assert!(merged.contains("User content."));
359        assert!(!state.is_empty());
360    }
361
362    #[test]
363    fn crdt_merge_one_side_unchanged() {
364        let base = "Original.\n";
365        let base_doc = crate::crdt::CrdtDoc::from_text(base);
366        let base_state = base_doc.encode_state();
367
368        let ours = "Original.\nAgent added.\n";
369        let (merged, _) = merge_contents_crdt(Some(&base_state), ours, base).unwrap();
370        assert_eq!(merged, ours);
371    }
372
373    #[test]
374    fn merge_contents_both_append() {
375        let base = "Line 1\n";
376        let ours = "Line 1\nAgent response\n";
377        let theirs = "Line 1\nUser edit\n";
378        let result = merge_contents(base, ours, theirs).unwrap();
379        // Both should be present, no conflict markers
380        assert!(result.contains("Agent response"));
381        assert!(result.contains("User edit"));
382        assert!(!result.contains("<<<<<<<"));
383    }
384
385    /// Regression test: CRDT state must include user edits from the merge.
386    ///
387    /// Bug: After a merge cycle where the user edited concurrently, the CRDT
388    /// state was rebuilt from `content_ours` (agent-only) instead of the merged
389    /// state. On the next cycle, the merge saw user edits as new insertions
390    /// relative to the stale base, producing duplicate text.
391    ///
392    /// This test simulates two consecutive merge cycles:
393    /// 1. Agent writes response while user edits concurrently → merge
394    /// 2. Agent writes another response using the CRDT state from cycle 1
395    ///
396    /// With the bug, cycle 2 would duplicate the user's edit from cycle 1.
397    #[test]
398    fn crdt_state_includes_user_edits_no_duplicates() {
399        // --- Cycle 1: Initial state, agent responds, user edits concurrently ---
400        let initial = "Why were the videos not public?\n";
401        let initial_doc = crate::crdt::CrdtDoc::from_text(initial);
402        let initial_state = initial_doc.encode_state();
403
404        // Agent appends a response
405        let ours_cycle1 = "Why were the videos not public?\nAlways publish public videos.\n";
406        // User also edits concurrently (adds a line)
407        let theirs_cycle1 = "Why were the videos not public?\nuser-edit-abc\n";
408
409        let (merged1, state1) = merge_contents_crdt(
410            Some(&initial_state), ours_cycle1, theirs_cycle1
411        ).unwrap();
412
413        // Both edits present after cycle 1
414        assert!(merged1.contains("Always publish public videos."), "missing agent response");
415        assert!(merged1.contains("user-edit-abc"), "missing user edit");
416
417        // --- Cycle 2: Agent writes another response, no concurrent user edits ---
418        // The agent's new content_ours includes the full merged result + new text
419        let ours_cycle2 = format!("{}...unless explicitly set to private.\n", merged1);
420        // No user edits this time — theirs is the same as what was written to disk
421        let theirs_cycle2 = merged1.clone();
422
423        let (merged2, _state2) = merge_contents_crdt(
424            Some(&state1), &ours_cycle2, &theirs_cycle2
425        ).unwrap();
426
427        // The user's edit should appear exactly ONCE, not duplicated
428        let edit_count = merged2.matches("user-edit-abc").count();
429        assert_eq!(
430            edit_count, 1,
431            "User edit duplicated! Appeared {} times in:\n{}",
432            edit_count, merged2
433        );
434
435        // Agent's content from both cycles should be present
436        assert!(merged2.contains("Always publish public videos."));
437        assert!(merged2.contains("...unless explicitly set to private."));
438    }
439
440    /// Regression test: Multiple flush cycles with concurrent user edits.
441    ///
442    /// Simulates the streaming checkpoint pattern where the agent flushes
443    /// partial responses multiple times while the user keeps editing.
444    #[test]
445    fn crdt_multi_flush_no_duplicates() {
446        let base = "# Doc\n\nQuestion here.\n";
447        let base_doc = crate::crdt::CrdtDoc::from_text(base);
448        let state0 = base_doc.encode_state();
449
450        // Flush 1: Agent starts responding, user adds a note
451        let ours1 = "# Doc\n\nQuestion here.\n\n### Re: Answer\n\nFirst paragraph.\n";
452        let theirs1 = "# Doc\n\nQuestion here.\n\n> user note\n";
453        let (merged1, state1) = merge_contents_crdt(Some(&state0), ours1, theirs1).unwrap();
454        assert!(merged1.contains("First paragraph."));
455        assert!(merged1.contains("> user note"));
456
457        // Flush 2: Agent continues, user adds another note
458        let ours2 = format!("{}\nSecond paragraph.\n", merged1);
459        let theirs2 = format!("{}\n> another note\n", merged1);
460        let (merged2, _state2) = merge_contents_crdt(Some(&state1), &ours2, &theirs2).unwrap();
461
462        // Each piece of content appears exactly once
463        assert_eq!(merged2.matches("First paragraph.").count(), 1,
464            "First paragraph duplicated in:\n{}", merged2);
465        assert_eq!(merged2.matches("> user note").count(), 1,
466            "User note duplicated in:\n{}", merged2);
467        assert!(merged2.contains("Second paragraph."));
468        assert!(merged2.contains("> another note"));
469    }
470}