Skip to main content

agent_doc/
merge.rs

1//! 3-way merge with append-friendly conflict resolution.
2//!
3//! Uses `git merge-file --diff3` for the base merge, then post-processes
4//! to auto-resolve append-only conflicts (where both sides added content
5//! at the same position without modifying existing lines).
6
7use anyhow::{Context, Result};
8use std::process::Command;
9
10/// CRDT-based merge: conflict-free merge using Yrs CRDT.
11///
12/// Returns (merged_text, new_crdt_state).
13/// `base_state` is the CRDT state from the last write (None on first use).
14/// Agent content (client_id=2) naturally appears before human content (client_id=1)
15/// at the same insertion point — no post-merge reorder needed.
16pub fn merge_contents_crdt(
17    base_state: Option<&[u8]>,
18    ours: &str,
19    theirs: &str,
20) -> Result<(String, Vec<u8>)> {
21    let merged = crate::crdt::merge(base_state, ours, theirs)
22        .context("CRDT merge failed")?;
23    // Build fresh CRDT state from the merged result
24    let doc = crate::crdt::CrdtDoc::from_text(&merged);
25    let state = doc.encode_state();
26    eprintln!("[write] CRDT merge successful — no conflicts possible.");
27    Ok((merged, state))
28}
29
30/// 3-way merge using `git merge-file --diff3`.
31///
32/// Returns merged content. Append-only conflicts are auto-resolved by
33/// concatenating both additions (ours first, then theirs).
34/// True conflicts (where existing content was modified differently)
35/// retain standard conflict markers.
36pub fn merge_contents(base: &str, ours: &str, theirs: &str) -> Result<String> {
37    let tmp = tempfile::TempDir::new()
38        .context("failed to create temp dir for merge")?;
39
40    let base_path = tmp.path().join("base");
41    let ours_path = tmp.path().join("ours");
42    let theirs_path = tmp.path().join("theirs");
43
44    std::fs::write(&base_path, base)?;
45    std::fs::write(&ours_path, ours)?;
46    std::fs::write(&theirs_path, theirs)?;
47
48    let output = Command::new("git")
49        .current_dir(tmp.path())
50        .args([
51            "merge-file",
52            "-p",
53            "--diff3",
54            "-L", "agent-response",
55            "-L", "original",
56            "-L", "your-edits",
57            &ours_path.to_string_lossy(),
58            &base_path.to_string_lossy(),
59            &theirs_path.to_string_lossy(),
60        ])
61        .output()?;
62
63    let merged = String::from_utf8(output.stdout)
64        .map_err(|e| anyhow::anyhow!("merge produced invalid UTF-8: {}", e))?;
65
66    if output.status.success() {
67        eprintln!("[write] Merge successful — user edits preserved.");
68        return Ok(merged);
69    }
70
71    if output.status.code() == Some(1) {
72        // Conflicts detected — try append-friendly resolution
73        let (resolved, remaining_conflicts) = resolve_append_conflicts(&merged);
74        if remaining_conflicts {
75            eprintln!("[write] WARNING: True merge conflicts remain. Please resolve conflict markers manually.");
76        } else {
77            eprintln!("[write] Merge conflicts auto-resolved (append-friendly).");
78        }
79        return Ok(resolved);
80    }
81
82    anyhow::bail!(
83        "git merge-file failed: {}",
84        String::from_utf8_lossy(&output.stderr)
85    )
86}
87
88/// Resolve append-only conflicts in `git merge-file --diff3` output.
89///
90/// With `--diff3`, conflict blocks look like:
91/// ```text
92/// <<<<<<< agent-response
93/// content added by agent
94/// ||||||| original
95/// (empty if both sides only appended)
96/// =======
97/// content added by user
98/// >>>>>>> your-edits
99/// ```
100///
101/// When the "original" section is empty (both sides added at the same
102/// insertion point without modifying existing content), auto-resolve by
103/// concatenating: ours (agent) first, then theirs (user).
104///
105/// Returns (resolved_content, has_remaining_conflicts).
106fn resolve_append_conflicts(merged: &str) -> (String, bool) {
107    let mut result = String::new();
108    let mut has_remaining = false;
109    let lines: Vec<&str> = merged.lines().collect();
110    let len = lines.len();
111    let mut i = 0;
112
113    while i < len {
114        if !lines[i].starts_with("<<<<<<< ") {
115            result.push_str(lines[i]);
116            result.push('\n');
117            i += 1;
118            continue;
119        }
120
121        // Parse conflict block
122        let conflict_start = i;
123        i += 1; // skip <<<<<<< marker
124
125        // Collect "ours" section
126        let mut ours_lines: Vec<&str> = Vec::new();
127        while i < len && !lines[i].starts_with("||||||| ") && !lines[i].starts_with("=======") {
128            ours_lines.push(lines[i]);
129            i += 1;
130        }
131
132        // Collect "original" section (diff3)
133        let mut original_lines: Vec<&str> = Vec::new();
134        if i < len && lines[i].starts_with("||||||| ") {
135            i += 1; // skip ||||||| marker
136            while i < len && !lines[i].starts_with("=======") {
137                original_lines.push(lines[i]);
138                i += 1;
139            }
140        }
141
142        // Skip ======= marker
143        if i < len && lines[i].starts_with("=======") {
144            i += 1;
145        }
146
147        // Collect "theirs" section
148        let mut theirs_lines: Vec<&str> = Vec::new();
149        while i < len && !lines[i].starts_with(">>>>>>> ") {
150            theirs_lines.push(lines[i]);
151            i += 1;
152        }
153
154        // Skip >>>>>>> marker
155        if i < len && lines[i].starts_with(">>>>>>> ") {
156            i += 1;
157        }
158
159        // Check if append-only: original section is empty or whitespace-only
160        let is_append_only = original_lines.iter().all(|l| l.trim().is_empty());
161
162        if is_append_only {
163            // Auto-resolve: ours (agent) first, then theirs (user)
164            for line in &ours_lines {
165                result.push_str(line);
166                result.push('\n');
167            }
168            for line in &theirs_lines {
169                result.push_str(line);
170                result.push('\n');
171            }
172        } else {
173            // True conflict — preserve markers
174            has_remaining = true;
175            result.push_str(lines[conflict_start]);
176            result.push('\n');
177            for line in &ours_lines {
178                result.push_str(line);
179                result.push('\n');
180            }
181            // Reconstruct ||||||| section
182            if !original_lines.is_empty() {
183                result.push_str("||||||| original\n");
184                for line in &original_lines {
185                    result.push_str(line);
186                    result.push('\n');
187                }
188            }
189            result.push_str("=======\n");
190            for line in &theirs_lines {
191                result.push_str(line);
192                result.push('\n');
193            }
194            result.push_str(">>>>>>> your-edits\n");
195        }
196    }
197
198    // Handle trailing: if original didn't end with newline but we added one
199    if !merged.ends_with('\n') && result.ends_with('\n') {
200        result.pop();
201    }
202
203    (result, has_remaining)
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209
210    #[test]
211    fn resolve_append_only_conflict() {
212        let merged = "\
213Before conflict
214<<<<<<< agent-response
215Agent added this line.
216||||||| original
217=======
218User added this line.
219>>>>>>> your-edits
220After conflict
221";
222        let (resolved, has_remaining) = resolve_append_conflicts(merged);
223        assert!(!has_remaining);
224        assert!(resolved.contains("Agent added this line."));
225        assert!(resolved.contains("User added this line."));
226        assert!(!resolved.contains("<<<<<<<"));
227        assert!(!resolved.contains(">>>>>>>"));
228        // Agent content comes before user content
229        let agent_pos = resolved.find("Agent added this line.").unwrap();
230        let user_pos = resolved.find("User added this line.").unwrap();
231        assert!(agent_pos < user_pos);
232    }
233
234    #[test]
235    fn preserve_true_conflict() {
236        let merged = "\
237<<<<<<< agent-response
238Agent changed this.
239||||||| original
240Original line that both sides modified.
241=======
242User changed this differently.
243>>>>>>> your-edits
244";
245        let (resolved, has_remaining) = resolve_append_conflicts(merged);
246        assert!(has_remaining);
247        assert!(resolved.contains("<<<<<<<"));
248        assert!(resolved.contains(">>>>>>>"));
249        assert!(resolved.contains("Original line that both sides modified."));
250    }
251
252    #[test]
253    fn mixed_append_and_true_conflicts() {
254        let merged = "\
255Clean line.
256<<<<<<< agent-response
257Agent appended here.
258||||||| original
259=======
260User appended here.
261>>>>>>> your-edits
262Middle line.
263<<<<<<< agent-response
264Agent rewrote this.
265||||||| original
266Was originally this.
267=======
268User rewrote this differently.
269>>>>>>> your-edits
270End line.
271";
272        let (resolved, has_remaining) = resolve_append_conflicts(merged);
273        assert!(has_remaining);
274        // Append-only conflict was resolved
275        assert!(resolved.contains("Agent appended here."));
276        assert!(resolved.contains("User appended here."));
277        // True conflict kept markers
278        assert!(resolved.contains("<<<<<<<"));
279        assert!(resolved.contains("Was originally this."));
280    }
281
282    #[test]
283    fn no_conflicts_passthrough() {
284        let merged = "Line one.\nLine two.\nLine three.\n";
285        let (resolved, has_remaining) = resolve_append_conflicts(merged);
286        assert!(!has_remaining);
287        assert_eq!(resolved, merged);
288    }
289
290    #[test]
291    fn multiline_append_conflict() {
292        let merged = "\
293<<<<<<< agent-response
294Agent line 1.
295Agent line 2.
296Agent line 3.
297||||||| original
298=======
299User line 1.
300User line 2.
301>>>>>>> your-edits
302";
303        let (resolved, has_remaining) = resolve_append_conflicts(merged);
304        assert!(!has_remaining);
305        assert!(resolved.contains("Agent line 1.\nAgent line 2.\nAgent line 3.\n"));
306        assert!(resolved.contains("User line 1.\nUser line 2.\n"));
307        // Agent before user
308        assert!(resolved.find("Agent line 1.").unwrap() < resolved.find("User line 1.").unwrap());
309    }
310
311    #[test]
312    fn merge_contents_clean() {
313        let base = "Line 1\nLine 2\n";
314        let ours = "Line 1\nLine 2\nAgent added\n";
315        let theirs = "Line 1\nLine 2\n";
316        let result = merge_contents(base, ours, theirs).unwrap();
317        assert!(result.contains("Agent added"));
318    }
319
320    #[test]
321    fn crdt_merge_agent_and_user_append() {
322        let base = "# Doc\n\nBase content.\n";
323        let ours = "# Doc\n\nBase content.\n\nAgent response.\n";
324        let theirs = "# Doc\n\nBase content.\n\nUser addition.\n";
325
326        let base_doc = crate::crdt::CrdtDoc::from_text(base);
327        let base_state = base_doc.encode_state();
328
329        let (merged, _state) = merge_contents_crdt(Some(&base_state), ours, theirs).unwrap();
330        assert!(merged.contains("Agent response."));
331        assert!(merged.contains("User addition."));
332        assert!(merged.contains("Base content."));
333        assert!(!merged.contains("<<<<<<<"));
334    }
335
336    #[test]
337    fn crdt_merge_concurrent_same_line() {
338        let base = "Line 1\nLine 3\n";
339        let ours = "Line 1\nAgent\nLine 3\n";
340        let theirs = "Line 1\nUser\nLine 3\n";
341
342        let base_doc = crate::crdt::CrdtDoc::from_text(base);
343        let base_state = base_doc.encode_state();
344
345        let (merged, _state) = merge_contents_crdt(Some(&base_state), ours, theirs).unwrap();
346        // Both preserved, deterministic ordering, no conflict
347        assert!(merged.contains("Agent"));
348        assert!(merged.contains("User"));
349        assert!(merged.contains("Line 1"));
350        assert!(merged.contains("Line 3"));
351    }
352
353    #[test]
354    fn crdt_merge_no_base_state_bootstrap() {
355        let ours = "Agent content.\n";
356        let theirs = "User content.\n";
357
358        let (merged, state) = merge_contents_crdt(None, ours, theirs).unwrap();
359        assert!(merged.contains("Agent content."));
360        assert!(merged.contains("User content."));
361        assert!(!state.is_empty());
362    }
363
364    #[test]
365    fn crdt_merge_one_side_unchanged() {
366        let base = "Original.\n";
367        let base_doc = crate::crdt::CrdtDoc::from_text(base);
368        let base_state = base_doc.encode_state();
369
370        let ours = "Original.\nAgent added.\n";
371        let (merged, _) = merge_contents_crdt(Some(&base_state), ours, base).unwrap();
372        assert_eq!(merged, ours);
373    }
374
375    #[test]
376    fn merge_contents_both_append() {
377        let base = "Line 1\n";
378        let ours = "Line 1\nAgent response\n";
379        let theirs = "Line 1\nUser edit\n";
380        let result = merge_contents(base, ours, theirs).unwrap();
381        // Both should be present, no conflict markers
382        assert!(result.contains("Agent response"));
383        assert!(result.contains("User edit"));
384        assert!(!result.contains("<<<<<<<"));
385    }
386
387    /// Regression test: CRDT state must include user edits from the merge.
388    ///
389    /// Bug: After a merge cycle where the user edited concurrently, the CRDT
390    /// state was rebuilt from `content_ours` (agent-only) instead of the merged
391    /// state. On the next cycle, the merge saw user edits as new insertions
392    /// relative to the stale base, producing duplicate text.
393    ///
394    /// This test simulates two consecutive merge cycles:
395    /// 1. Agent writes response while user edits concurrently → merge
396    /// 2. Agent writes another response using the CRDT state from cycle 1
397    ///
398    /// With the bug, cycle 2 would duplicate the user's edit from cycle 1.
399    #[test]
400    fn crdt_state_includes_user_edits_no_duplicates() {
401        // --- Cycle 1: Initial state, agent responds, user edits concurrently ---
402        let initial = "Why were the videos not public?\n";
403        let initial_doc = crate::crdt::CrdtDoc::from_text(initial);
404        let initial_state = initial_doc.encode_state();
405
406        // Agent appends a response
407        let ours_cycle1 = "Why were the videos not public?\nAlways publish public videos.\n";
408        // User also edits concurrently (adds a line)
409        let theirs_cycle1 = "Why were the videos not public?\nuser-edit-abc\n";
410
411        let (merged1, state1) = merge_contents_crdt(
412            Some(&initial_state), ours_cycle1, theirs_cycle1
413        ).unwrap();
414
415        // Both edits present after cycle 1
416        assert!(merged1.contains("Always publish public videos."), "missing agent response");
417        assert!(merged1.contains("user-edit-abc"), "missing user edit");
418
419        // --- Cycle 2: Agent writes another response, no concurrent user edits ---
420        // The agent's new content_ours includes the full merged result + new text
421        let ours_cycle2 = format!("{}...unless explicitly set to private.\n", merged1);
422        // No user edits this time — theirs is the same as what was written to disk
423        let theirs_cycle2 = merged1.clone();
424
425        let (merged2, _state2) = merge_contents_crdt(
426            Some(&state1), &ours_cycle2, &theirs_cycle2
427        ).unwrap();
428
429        // The user's edit should appear exactly ONCE, not duplicated
430        let edit_count = merged2.matches("user-edit-abc").count();
431        assert_eq!(
432            edit_count, 1,
433            "User edit duplicated! Appeared {} times in:\n{}",
434            edit_count, merged2
435        );
436
437        // Agent's content from both cycles should be present
438        assert!(merged2.contains("Always publish public videos."));
439        assert!(merged2.contains("...unless explicitly set to private."));
440    }
441
442    /// Regression test: Multiple flush cycles with concurrent user edits.
443    ///
444    /// Simulates the streaming checkpoint pattern where the agent flushes
445    /// partial responses multiple times while the user keeps editing.
446    #[test]
447    fn crdt_multi_flush_no_duplicates() {
448        let base = "# Doc\n\nQuestion here.\n";
449        let base_doc = crate::crdt::CrdtDoc::from_text(base);
450        let state0 = base_doc.encode_state();
451
452        // Flush 1: Agent starts responding, user adds a note
453        let ours1 = "# Doc\n\nQuestion here.\n\n### Re: Answer\n\nFirst paragraph.\n";
454        let theirs1 = "# Doc\n\nQuestion here.\n\n> user note\n";
455        let (merged1, state1) = merge_contents_crdt(Some(&state0), ours1, theirs1).unwrap();
456        assert!(merged1.contains("First paragraph."));
457        assert!(merged1.contains("> user note"));
458
459        // Flush 2: Agent continues, user adds another note
460        let ours2 = format!("{}\nSecond paragraph.\n", merged1);
461        let theirs2 = format!("{}\n> another note\n", merged1);
462        let (merged2, _state2) = merge_contents_crdt(Some(&state1), &ours2, &theirs2).unwrap();
463
464        // Each piece of content appears exactly once
465        assert_eq!(merged2.matches("First paragraph.").count(), 1,
466            "First paragraph duplicated in:\n{}", merged2);
467        assert_eq!(merged2.matches("> user note").count(), 1,
468            "User note duplicated in:\n{}", merged2);
469        assert!(merged2.contains("Second paragraph."));
470        assert!(merged2.contains("> another note"));
471    }
472}