agent-doc 0.32.3

//! # Module: merge
//!
//! ## Spec
//! - `merge_contents_crdt(base_state, ours, theirs)`: conflict-free merge using Yrs CRDT.
//!   Delegates to `crdt::merge`, then encodes a fresh CRDT state from the merged result.
//!   Agent content (client_id=1) is ordered before human content (client_id=2) at the same
//!   insertion point by Yrs' native client-ID ordering — no post-merge reorder needed.
//!   Returns `(merged_text, new_crdt_state_bytes)`.
//! - `merge_contents(base, ours, theirs)`: 3-way merge via `git merge-file --diff3`.
//!   Auto-resolves append-only conflicts (empty original section) by concatenating ours then
//!   theirs. Preserves standard conflict markers only for true conflicts where existing lines
//!   were modified differently by both sides. Returns the merged content string.
//! - `resolve_append_conflicts(merged)` (private): post-processes `--diff3` conflict blocks.
//!   Scans each `<<<<<<< / ||||||| / ======= / >>>>>>>` block; if the `|||||||` section is
//!   empty/whitespace-only, auto-resolves by emitting ours then theirs. Otherwise keeps all
//!   markers intact. Returns `(resolved_content, has_remaining_conflicts)`.
//!
//! ## Agentic Contracts
//! - `merge_contents_crdt` never returns conflict markers — CRDT guarantees a conflict-free
//!   result for all concurrent append/edit combinations.
//! - `merge_contents_crdt` always returns a non-empty `state` vec usable as `base_state` in
//!   the next merge cycle; state encodes the full merged text, including user edits.
//! - `merge_contents` returns `Ok` even when conflicts remain — callers must inspect the
//!   content for `<<<<<<<` markers if they need to detect unresolved conflicts.
//! - When `base_state` is `None`, `merge_contents_crdt` bootstraps from an empty CRDT doc
//!   and still produces a valid merged result.
//! - Agent content always appears before user content at the same insertion point.
//! - Returned CRDT state after a merge includes all user edits from the merge; using it as
//!   the base for the next cycle will not duplicate those edits.
//!
//! ## Evals
//! - `resolve_append_only_conflict`: both sides append at same point, original empty →
//!   markers removed, agent content before user content, no `<<<<<<<` in output.
//! - `preserve_true_conflict`: both sides modify same original line →
//!   conflict markers preserved, original section present in output.
//! - `mixed_append_and_true_conflicts`: one append-only block + one true-conflict block →
//!   append-only resolved, true-conflict keeps markers.
//! - `no_conflicts_passthrough`: no conflict markers in input → output identical to input.
//! - `multiline_append_conflict`: multi-line append-only block → all lines preserved,
//!   agent lines before user lines.
//! - `merge_contents_clean`: ours adds a line, theirs unchanged → agent addition present,
//!   no markers.
//! - `merge_contents_both_append`: both sides add different lines → both present, no markers.
//! - `crdt_merge_agent_and_user_append`: agent and user both append different content →
//!   both preserved, no conflict markers, base content intact.
//! - `crdt_merge_concurrent_same_line`: both sides insert at same position →
//!   both preserved, deterministic order, no conflict.
//! - `crdt_merge_no_base_state_bootstrap`: `None` base state → valid merge, non-empty state.
//! - `crdt_merge_one_side_unchanged`: only agent appended, theirs = base →
//!   merged equals ours exactly.
//! - `crdt_state_includes_user_edits_no_duplicates`: two consecutive merge cycles with
//!   concurrent user edit in cycle 1 → user edit appears exactly once in cycle 2 output.
//! - `crdt_multi_flush_no_duplicates`: multiple streaming flush cycles with concurrent user
//!   notes → each content piece appears exactly once across all flushes.

use anyhow::{Context, Result};
use std::process::Command;

/// CRDT-based merge: conflict-free merge using Yrs CRDT.
///
/// Returns (merged_text, new_crdt_state).
/// `base_state` is the CRDT state from the last write (None on first use).
/// Agent content (client_id=2) naturally appears before human content (client_id=1)
/// at the same insertion point — no post-merge reorder needed.
pub fn merge_contents_crdt(
    base_state: Option<&[u8]>,
    ours: &str,
    theirs: &str,
) -> Result<(String, Vec<u8>)> {
    let merged = crate::crdt::merge(base_state, ours, theirs)
        .context("CRDT merge failed")?;
    // Build fresh CRDT state from the merged result
    let doc = crate::crdt::CrdtDoc::from_text(&merged);
    let state = doc.encode_state();
    eprintln!("[write] CRDT merge successful — no conflicts possible.");
    Ok((merged, state))
}

/// 3-way merge using `git merge-file --diff3`.
///
/// Returns merged content. Append-only conflicts are auto-resolved by
/// concatenating both additions (ours first, then theirs).
/// True conflicts (where existing content was modified differently)
/// retain standard conflict markers.
pub fn merge_contents(base: &str, ours: &str, theirs: &str) -> Result<String> {
    let tmp = tempfile::TempDir::new()
        .context("failed to create temp dir for merge")?;

    let base_path = tmp.path().join("base");
    let ours_path = tmp.path().join("ours");
    let theirs_path = tmp.path().join("theirs");

    std::fs::write(&base_path, base)?;
    std::fs::write(&ours_path, ours)?;
    std::fs::write(&theirs_path, theirs)?;

    let output = Command::new("git")
        .current_dir(tmp.path())
        .args([
            "merge-file",
            "-p",
            "--diff3",
            "-L", "agent-response",
            "-L", "original",
            "-L", "your-edits",
            &ours_path.to_string_lossy(),
            &base_path.to_string_lossy(),
            &theirs_path.to_string_lossy(),
        ])
        .output()?;

    let merged = String::from_utf8(output.stdout)
        .map_err(|e| anyhow::anyhow!("merge produced invalid UTF-8: {}", e))?;

    if output.status.success() {
        eprintln!("[write] Merge successful — user edits preserved.");
        return Ok(merged);
    }

    if output.status.code() == Some(1) {
        // Conflicts detected — try append-friendly resolution
        let (resolved, remaining_conflicts) = resolve_append_conflicts(&merged);
        if remaining_conflicts {
            eprintln!("[write] WARNING: True merge conflicts remain. Please resolve conflict markers manually.");
        } else {
            eprintln!("[write] Merge conflicts auto-resolved (append-friendly).");
        }
        return Ok(resolved);
    }

    anyhow::bail!(
        "git merge-file failed: {}",
        String::from_utf8_lossy(&output.stderr)
    )
}

/// Resolve append-only conflicts in `git merge-file --diff3` output.
///
/// With `--diff3`, conflict blocks look like:
/// ```text
/// <<<<<<< agent-response
/// content added by agent
/// ||||||| original
/// (empty if both sides only appended)
/// =======
/// content added by user
/// >>>>>>> your-edits
/// ```
///
/// When the "original" section is empty (both sides added at the same
/// insertion point without modifying existing content), auto-resolve by
/// concatenating: ours (agent) first, then theirs (user).
///
/// Returns (resolved_content, has_remaining_conflicts).
fn resolve_append_conflicts(merged: &str) -> (String, bool) {
    let mut result = String::new();
    let mut has_remaining = false;
    let lines: Vec<&str> = merged.lines().collect();
    let len = lines.len();
    let mut i = 0;

    while i < len {
        if !lines[i].starts_with("<<<<<<< ") {
            result.push_str(lines[i]);
            result.push('\n');
            i += 1;
            continue;
        }

        // Parse conflict block
        let conflict_start = i;
        i += 1; // skip <<<<<<< marker

        // Collect "ours" section
        let mut ours_lines: Vec<&str> = Vec::new();
        while i < len && !lines[i].starts_with("||||||| ") && !lines[i].starts_with("=======") {
            ours_lines.push(lines[i]);
            i += 1;
        }

        // Collect "original" section (diff3)
        let mut original_lines: Vec<&str> = Vec::new();
        if i < len && lines[i].starts_with("||||||| ") {
            i += 1; // skip ||||||| marker
            while i < len && !lines[i].starts_with("=======") {
                original_lines.push(lines[i]);
                i += 1;
            }
        }

        // Skip ======= marker
        if i < len && lines[i].starts_with("=======") {
            i += 1;
        }

        // Collect "theirs" section
        let mut theirs_lines: Vec<&str> = Vec::new();
        while i < len && !lines[i].starts_with(">>>>>>> ") {
            theirs_lines.push(lines[i]);
            i += 1;
        }

        // Skip >>>>>>> marker
        if i < len && lines[i].starts_with(">>>>>>> ") {
            i += 1;
        }

        // Check if append-only: original section is empty or whitespace-only
        let is_append_only = original_lines.iter().all(|l| l.trim().is_empty());

        if is_append_only {
            // Auto-resolve: ours (agent) first, then theirs (user)
            for line in &ours_lines {
                result.push_str(line);
                result.push('\n');
            }
            for line in &theirs_lines {
                result.push_str(line);
                result.push('\n');
            }
        } else {
            // True conflict — preserve markers
            has_remaining = true;
            result.push_str(lines[conflict_start]);
            result.push('\n');
            for line in &ours_lines {
                result.push_str(line);
                result.push('\n');
            }
            // Reconstruct ||||||| section
            if !original_lines.is_empty() {
                result.push_str("||||||| original\n");
                for line in &original_lines {
                    result.push_str(line);
                    result.push('\n');
                }
            }
            result.push_str("=======\n");
            for line in &theirs_lines {
                result.push_str(line);
                result.push('\n');
            }
            result.push_str(">>>>>>> your-edits\n");
        }
    }

    // Handle trailing: if original didn't end with newline but we added one
    if !merged.ends_with('\n') && result.ends_with('\n') {
        result.pop();
    }

    (result, has_remaining)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn resolve_append_only_conflict() {
        let merged = "\
Before conflict
<<<<<<< agent-response
Agent added this line.
||||||| original
=======
User added this line.
>>>>>>> your-edits
After conflict
";
        let (resolved, has_remaining) = resolve_append_conflicts(merged);
        assert!(!has_remaining);
        assert!(resolved.contains("Agent added this line."));
        assert!(resolved.contains("User added this line."));
        assert!(!resolved.contains("<<<<<<<"));
        assert!(!resolved.contains(">>>>>>>"));
        // Agent content comes before user content
        let agent_pos = resolved.find("Agent added this line.").unwrap();
        let user_pos = resolved.find("User added this line.").unwrap();
        assert!(agent_pos < user_pos);
    }

    #[test]
    fn preserve_true_conflict() {
        let merged = "\
<<<<<<< agent-response
Agent changed this.
||||||| original
Original line that both sides modified.
=======
User changed this differently.
>>>>>>> your-edits
";
        let (resolved, has_remaining) = resolve_append_conflicts(merged);
        assert!(has_remaining);
        assert!(resolved.contains("<<<<<<<"));
        assert!(resolved.contains(">>>>>>>"));
        assert!(resolved.contains("Original line that both sides modified."));
    }

    #[test]
    fn mixed_append_and_true_conflicts() {
        let merged = "\
Clean line.
<<<<<<< agent-response
Agent appended here.
||||||| original
=======
User appended here.
>>>>>>> your-edits
Middle line.
<<<<<<< agent-response
Agent rewrote this.
||||||| original
Was originally this.
=======
User rewrote this differently.
>>>>>>> your-edits
End line.
";
        let (resolved, has_remaining) = resolve_append_conflicts(merged);
        assert!(has_remaining);
        // Append-only conflict was resolved
        assert!(resolved.contains("Agent appended here."));
        assert!(resolved.contains("User appended here."));
        // True conflict kept markers
        assert!(resolved.contains("<<<<<<<"));
        assert!(resolved.contains("Was originally this."));
    }

    #[test]
    fn no_conflicts_passthrough() {
        let merged = "Line one.\nLine two.\nLine three.\n";
        let (resolved, has_remaining) = resolve_append_conflicts(merged);
        assert!(!has_remaining);
        assert_eq!(resolved, merged);
    }

    #[test]
    fn multiline_append_conflict() {
        let merged = "\
<<<<<<< agent-response
Agent line 1.
Agent line 2.
Agent line 3.
||||||| original
=======
User line 1.
User line 2.
>>>>>>> your-edits
";
        let (resolved, has_remaining) = resolve_append_conflicts(merged);
        assert!(!has_remaining);
        assert!(resolved.contains("Agent line 1.\nAgent line 2.\nAgent line 3.\n"));
        assert!(resolved.contains("User line 1.\nUser line 2.\n"));
        // Agent before user
        assert!(resolved.find("Agent line 1.").unwrap() < resolved.find("User line 1.").unwrap());
    }

    #[test]
    fn merge_contents_clean() {
        let base = "Line 1\nLine 2\n";
        let ours = "Line 1\nLine 2\nAgent added\n";
        let theirs = "Line 1\nLine 2\n";
        let result = merge_contents(base, ours, theirs).unwrap();
        assert!(result.contains("Agent added"));
    }

    #[test]
    fn crdt_merge_agent_and_user_append() {
        let base = "# Doc\n\nBase content.\n";
        let ours = "# Doc\n\nBase content.\n\nAgent response.\n";
        let theirs = "# Doc\n\nBase content.\n\nUser addition.\n";

        let base_doc = crate::crdt::CrdtDoc::from_text(base);
        let base_state = base_doc.encode_state();

        let (merged, _state) = merge_contents_crdt(Some(&base_state), ours, theirs).unwrap();
        assert!(merged.contains("Agent response."));
        assert!(merged.contains("User addition."));
        assert!(merged.contains("Base content."));
        assert!(!merged.contains("<<<<<<<"));
    }

    #[test]
    fn crdt_merge_concurrent_same_line() {
        let base = "Line 1\nLine 3\n";
        let ours = "Line 1\nAgent\nLine 3\n";
        let theirs = "Line 1\nUser\nLine 3\n";

        let base_doc = crate::crdt::CrdtDoc::from_text(base);
        let base_state = base_doc.encode_state();

        let (merged, _state) = merge_contents_crdt(Some(&base_state), ours, theirs).unwrap();
        // Both preserved, deterministic ordering, no conflict
        assert!(merged.contains("Agent"));
        assert!(merged.contains("User"));
        assert!(merged.contains("Line 1"));
        assert!(merged.contains("Line 3"));
    }

    #[test]
    fn crdt_merge_no_base_state_bootstrap() {
        let ours = "Agent content.\n";
        let theirs = "User content.\n";

        let (merged, state) = merge_contents_crdt(None, ours, theirs).unwrap();
        assert!(merged.contains("Agent content."));
        assert!(merged.contains("User content."));
        assert!(!state.is_empty());
    }

    #[test]
    fn crdt_merge_one_side_unchanged() {
        let base = "Original.\n";
        let base_doc = crate::crdt::CrdtDoc::from_text(base);
        let base_state = base_doc.encode_state();

        let ours = "Original.\nAgent added.\n";
        let (merged, _) = merge_contents_crdt(Some(&base_state), ours, base).unwrap();
        assert_eq!(merged, ours);
    }

    #[test]
    fn merge_contents_both_append() {
        let base = "Line 1\n";
        let ours = "Line 1\nAgent response\n";
        let theirs = "Line 1\nUser edit\n";
        let result = merge_contents(base, ours, theirs).unwrap();
        // Both should be present, no conflict markers
        assert!(result.contains("Agent response"));
        assert!(result.contains("User edit"));
        assert!(!result.contains("<<<<<<<"));
    }

    /// Regression test: CRDT state must include user edits from the merge.
    ///
    /// Bug: After a merge cycle where the user edited concurrently, the CRDT
    /// state was rebuilt from `content_ours` (agent-only) instead of the merged
    /// state. On the next cycle, the merge saw user edits as new insertions
    /// relative to the stale base, producing duplicate text.
    ///
    /// This test simulates two consecutive merge cycles:
    /// 1. Agent writes response while user edits concurrently → merge
    /// 2. Agent writes another response using the CRDT state from cycle 1
    ///
    /// With the bug, cycle 2 would duplicate the user's edit from cycle 1.
    #[test]
    fn crdt_state_includes_user_edits_no_duplicates() {
        // --- Cycle 1: Initial state, agent responds, user edits concurrently ---
        let initial = "Why were the videos not public?\n";
        let initial_doc = crate::crdt::CrdtDoc::from_text(initial);
        let initial_state = initial_doc.encode_state();

        // Agent appends a response
        let ours_cycle1 = "Why were the videos not public?\nAlways publish public videos.\n";
        // User also edits concurrently (adds a line)
        let theirs_cycle1 = "Why were the videos not public?\nuser-edit-abc\n";

        let (merged1, state1) = merge_contents_crdt(
            Some(&initial_state), ours_cycle1, theirs_cycle1
        ).unwrap();

        // Both edits present after cycle 1
        assert!(merged1.contains("Always publish public videos."), "missing agent response");
        assert!(merged1.contains("user-edit-abc"), "missing user edit");

        // --- Cycle 2: Agent writes another response, no concurrent user edits ---
        // The agent's new content_ours includes the full merged result + new text
        let ours_cycle2 = format!("{}...unless explicitly set to private.\n", merged1);
        // No user edits this time — theirs is the same as what was written to disk
        let theirs_cycle2 = merged1.clone();

        let (merged2, _state2) = merge_contents_crdt(
            Some(&state1), &ours_cycle2, &theirs_cycle2
        ).unwrap();

        // The user's edit should appear exactly ONCE, not duplicated
        let edit_count = merged2.matches("user-edit-abc").count();
        assert_eq!(
            edit_count, 1,
            "User edit duplicated! Appeared {} times in:\n{}",
            edit_count, merged2
        );

        // Agent's content from both cycles should be present
        assert!(merged2.contains("Always publish public videos."));
        assert!(merged2.contains("...unless explicitly set to private."));
    }

    /// Regression test: Multiple flush cycles with concurrent user edits.
    ///
    /// Simulates the streaming checkpoint pattern where the agent flushes
    /// partial responses multiple times while the user keeps editing.
    #[test]
    fn crdt_multi_flush_no_duplicates() {
        let base = "# Doc\n\nQuestion here.\n";
        let base_doc = crate::crdt::CrdtDoc::from_text(base);
        let state0 = base_doc.encode_state();

        // Flush 1: Agent starts responding, user adds a note
        let ours1 = "# Doc\n\nQuestion here.\n\n### Re: Answer\n\nFirst paragraph.\n";
        let theirs1 = "# Doc\n\nQuestion here.\n\n> user note\n";
        let (merged1, state1) = merge_contents_crdt(Some(&state0), ours1, theirs1).unwrap();
        assert!(merged1.contains("First paragraph."));
        assert!(merged1.contains("> user note"));

        // Flush 2: Agent continues, user adds another note
        let ours2 = format!("{}\nSecond paragraph.\n", merged1);
        let theirs2 = format!("{}\n> another note\n", merged1);
        let (merged2, _state2) = merge_contents_crdt(Some(&state1), &ours2, &theirs2).unwrap();

        // Each piece of content appears exactly once
        assert_eq!(merged2.matches("First paragraph.").count(), 1,
            "First paragraph duplicated in:\n{}", merged2);
        assert_eq!(merged2.matches("> user note").count(), 1,
            "User note duplicated in:\n{}", merged2);
        assert!(merged2.contains("Second paragraph."));
        assert!(merged2.contains("> another note"));
    }
}