Skip to main content

semantic_rag_merge/
lib.rs

1//! Semantic-RAG-Merge: Semantic Merging with RAG and LLM Arbiter
2//!
3//! This crate provides the semantic version control features for AIVCS,
4//! allowing intelligent merging of divergent agent states and memory.
5//!
6//! ## Layer 3 - VCS Logic
7//!
8//! Focus: Semantic conflict resolution and memory synthesis.
9
10use anyhow::Result;
11use oxidized_state::{CommitId, MemoryRecord, SurrealHandle};
12use serde::{Deserialize, Serialize};
13
14/// Difference between two memory vector stores
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct VectorStoreDelta {
17    /// Memories only in commit A
18    pub only_in_a: Vec<MemoryRecord>,
19    /// Memories only in commit B
20    pub only_in_b: Vec<MemoryRecord>,
21    /// Memories that are identical in both A and B
22    pub identical: Vec<MemoryRecord>,
23    /// Memories that differ between A and B (same key, different content)
24    pub conflicts: Vec<MemoryConflict>,
25}
26
27/// A conflict between two memory records
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct MemoryConflict {
30    /// Memory key
31    pub key: String,
32    /// Memory from commit A
33    pub memory_a: MemoryRecord,
34    /// Memory from commit B
35    pub memory_b: MemoryRecord,
36}
37
38/// Result of automatic conflict resolution
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct AutoResolvedValue {
41    /// The resolved value
42    pub value: String,
43    /// Which branch the resolution favored (if any)
44    pub favored_branch: Option<String>,
45    /// Reasoning for the resolution
46    pub reasoning: String,
47    /// Confidence score (0.0 - 1.0)
48    pub confidence: f32,
49}
50
51/// Result of a semantic merge operation
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct MergeResult {
54    /// The new merge commit ID
55    pub merge_commit_id: CommitId,
56    /// Number of automatic resolutions
57    pub auto_resolved: usize,
58    /// Any conflicts that couldn't be auto-resolved
59    pub manual_conflicts: Vec<MemoryConflict>,
60    /// Summary of the merge
61    pub summary: String,
62}
63
64/// Diff memory vectors between two commits
65///
66/// # TDD: test_memory_diff_shows_only_new_vectors
67pub async fn diff_memory_vectors(
68    handle: &SurrealHandle,
69    commit_a: &str,
70    commit_b: &str,
71) -> Result<VectorStoreDelta> {
72    let memories_a = handle.get_memories(commit_a).await?;
73    let memories_b = handle.get_memories(commit_b).await?;
74
75    let keys_a: std::collections::HashSet<_> = memories_a.iter().map(|m| &m.key).collect();
76    let keys_b: std::collections::HashSet<_> = memories_b.iter().map(|m| &m.key).collect();
77
78    let only_in_a: Vec<_> = memories_a
79        .iter()
80        .filter(|m| !keys_b.contains(&m.key))
81        .cloned()
82        .collect();
83
84    let only_in_b: Vec<_> = memories_b
85        .iter()
86        .filter(|m| !keys_a.contains(&m.key))
87        .cloned()
88        .collect();
89
90    // Find conflicts (same key, different content) and identical memories
91    let mut conflicts = Vec::new();
92    let mut identical = Vec::new();
93    for mem_a in &memories_a {
94        if let Some(mem_b) = memories_b.iter().find(|m| m.key == mem_a.key) {
95            if mem_a.content != mem_b.content {
96                conflicts.push(MemoryConflict {
97                    key: mem_a.key.clone(),
98                    memory_a: mem_a.clone(),
99                    memory_b: mem_b.clone(),
100                });
101            } else {
102                identical.push(mem_a.clone());
103            }
104        }
105    }
106
107    Ok(VectorStoreDelta {
108        only_in_a,
109        only_in_b,
110        identical,
111        conflicts,
112    })
113}
114
115/// Resolve a state conflict using LLM Arbiter
116///
117/// # TDD: test_arbiter_resolves_value_conflict_based_on_CoT
118pub async fn resolve_conflict_state(
119    _trace_a: &[serde_json::Value],
120    _trace_b: &[serde_json::Value],
121    conflict: &MemoryConflict,
122) -> Result<AutoResolvedValue> {
123    // TODO: Implement LLM-based conflict resolution
124    // For now, use a simple heuristic: prefer the longer content
125
126    let (value, favored, reasoning) =
127        if conflict.memory_a.content.len() >= conflict.memory_b.content.len() {
128            (
129                conflict.memory_a.content.clone(),
130                Some("A".to_string()),
131                "Chose branch A: more detailed content".to_string(),
132            )
133        } else {
134            (
135                conflict.memory_b.content.clone(),
136                Some("B".to_string()),
137                "Chose branch B: more detailed content".to_string(),
138            )
139        };
140
141    Ok(AutoResolvedValue {
142        value,
143        favored_branch: favored,
144        reasoning,
145        confidence: 0.6, // Low confidence for heuristic resolution
146    })
147}
148
149/// Synthesize two memory stores into one
150///
151/// # TDD: test_merge_synthesizes_two_memories_into_one_new_commit
152pub async fn synthesize_memory(
153    handle: &SurrealHandle,
154    commit_a: &str,
155    commit_b: &str,
156    new_commit_id: &str,
157) -> Result<Vec<MemoryRecord>> {
158    let delta = diff_memory_vectors(handle, commit_a, commit_b).await?;
159
160    let mut merged_memories = Vec::new();
161
162    // Include all memories unique to A
163    for mut mem in delta.only_in_a {
164        mem.commit_id = new_commit_id.to_string();
165        mem.id = None;
166        merged_memories.push(mem);
167    }
168
169    // Include all memories unique to B
170    for mut mem in delta.only_in_b {
171        mem.commit_id = new_commit_id.to_string();
172        mem.id = None;
173        merged_memories.push(mem);
174    }
175
176    // Include identical memories
177    for mut mem in delta.identical {
178        mem.commit_id = new_commit_id.to_string();
179        mem.id = None;
180        merged_memories.push(mem);
181    }
182
183    // Resolve conflicts
184    for conflict in delta.conflicts {
185        let resolved = resolve_conflict_state(&[], &[], &conflict).await?;
186        let merged_mem = MemoryRecord::new(new_commit_id, &conflict.key, &resolved.value)
187            .with_metadata(serde_json::json!({
188                "merged_from": [commit_a, commit_b],
189                "resolution": resolved.reasoning,
190                "confidence": resolved.confidence,
191            }));
192        merged_memories.push(merged_mem);
193    }
194
195    Ok(merged_memories)
196}
197
198/// Perform a semantic merge of two branches
199pub async fn semantic_merge(
200    handle: &SurrealHandle,
201    commit_a: &str,
202    commit_b: &str,
203    message: &str,
204    author: &str,
205) -> Result<MergeResult> {
206    // Create the merge commit ID
207    let state_data = format!("merge:{}:{}", commit_a, commit_b);
208    let merge_commit_id = CommitId::from_state(state_data.as_bytes());
209
210    // Synthesize memories
211    let merged_memories =
212        synthesize_memory(handle, commit_a, commit_b, &merge_commit_id.hash).await?;
213
214    // Save merged memories
215    for mem in &merged_memories {
216        handle.save_memory(mem).await?;
217    }
218
219    // Create merge commit record
220    let commit = oxidized_state::CommitRecord::new(
221        merge_commit_id.clone(),
222        vec![commit_a.to_string(), commit_b.to_string()],
223        message,
224        author,
225    );
226    handle.save_commit(&commit).await?;
227
228    // Save graph edges for both parents
229    handle
230        .save_commit_graph_edge(&merge_commit_id.hash, commit_a)
231        .await?;
232    handle
233        .save_commit_graph_edge(&merge_commit_id.hash, commit_b)
234        .await?;
235
236    // Get delta for summary
237    let delta = diff_memory_vectors(handle, commit_a, commit_b).await?;
238
239    Ok(MergeResult {
240        merge_commit_id,
241        auto_resolved: delta.conflicts.len(),
242        manual_conflicts: vec![], // All resolved automatically for now
243        summary: format!(
244            "Merged {} memories from A, {} from B, resolved {} conflicts",
245            delta.only_in_a.len(),
246            delta.only_in_b.len(),
247            delta.conflicts.len()
248        ),
249    })
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[tokio::test]
257    async fn test_memory_diff_shows_only_new_vectors() {
258        let handle = SurrealHandle::setup_db().await.unwrap();
259
260        // Create memories for commit A
261        let mem_a1 = MemoryRecord::new("commit-a", "shared-key", "shared content");
262        let mem_a2 = MemoryRecord::new("commit-a", "only-a-key", "only in A");
263        handle.save_memory(&mem_a1).await.unwrap();
264        handle.save_memory(&mem_a2).await.unwrap();
265
266        // Create memories for commit B
267        let mem_b1 = MemoryRecord::new("commit-b", "shared-key", "shared content");
268        let mem_b2 = MemoryRecord::new("commit-b", "only-b-key", "only in B");
269        handle.save_memory(&mem_b1).await.unwrap();
270        handle.save_memory(&mem_b2).await.unwrap();
271
272        let delta = diff_memory_vectors(&handle, "commit-a", "commit-b")
273            .await
274            .unwrap();
275
276        assert_eq!(delta.only_in_a.len(), 1);
277        assert_eq!(delta.only_in_a[0].key, "only-a-key");
278
279        assert_eq!(delta.only_in_b.len(), 1);
280        assert_eq!(delta.only_in_b[0].key, "only-b-key");
281
282        assert_eq!(delta.conflicts.len(), 0); // Same content = no conflict
283    }
284
285    #[tokio::test]
286    async fn test_memory_diff_detects_conflicts() {
287        let handle = SurrealHandle::setup_db().await.unwrap();
288
289        let mem_a = MemoryRecord::new("commit-a", "conflict-key", "content version A");
290        let mem_b = MemoryRecord::new("commit-b", "conflict-key", "content version B");
291        handle.save_memory(&mem_a).await.unwrap();
292        handle.save_memory(&mem_b).await.unwrap();
293
294        let delta = diff_memory_vectors(&handle, "commit-a", "commit-b")
295            .await
296            .unwrap();
297
298        assert_eq!(delta.conflicts.len(), 1);
299        assert_eq!(delta.conflicts[0].key, "conflict-key");
300    }
301
302    #[tokio::test]
303    async fn test_arbiter_resolves_value_conflict_based_on_cot() {
304        let conflict = MemoryConflict {
305            key: "test-key".to_string(),
306            memory_a: MemoryRecord::new("a", "test-key", "short"),
307            memory_b: MemoryRecord::new("b", "test-key", "longer content here"),
308        };
309
310        let resolved = resolve_conflict_state(&[], &[], &conflict).await.unwrap();
311
312        assert!(resolved.confidence > 0.0);
313        assert!(resolved.favored_branch.is_some());
314        assert!(!resolved.reasoning.is_empty());
315    }
316
317    #[tokio::test]
318    async fn test_merge_synthesizes_two_memories_into_one_new_commit() {
319        let handle = SurrealHandle::setup_db().await.unwrap();
320
321        // Create commit IDs
322        let commit_id_a = oxidized_state::CommitId::from_state(b"branch-a");
323        let commit_id_b = oxidized_state::CommitId::from_state(b"branch-b");
324
325        // Create commits with divergent memories
326        let commit_a = oxidized_state::CommitRecord::new(
327            commit_id_a.clone(),
328            vec![],
329            "Branch A commit",
330            "agent-a",
331        );
332        handle.save_commit(&commit_a).await.unwrap();
333
334        let commit_b = oxidized_state::CommitRecord::new(
335            commit_id_b.clone(),
336            vec![],
337            "Branch B commit",
338            "agent-b",
339        );
340        handle.save_commit(&commit_b).await.unwrap();
341
342        // Add unique memories to each branch
343        let mem_a_only =
344            MemoryRecord::new(&commit_id_a.hash, "learned-from-a", "Strategy A knowledge");
345        let mem_b_only =
346            MemoryRecord::new(&commit_id_b.hash, "learned-from-b", "Strategy B knowledge");
347        let mem_conflict_a = MemoryRecord::new(&commit_id_a.hash, "shared-key", "short");
348        let mem_conflict_b = MemoryRecord::new(
349            &commit_id_b.hash,
350            "shared-key",
351            "longer and more detailed content",
352        );
353
354        handle.save_memory(&mem_a_only).await.unwrap();
355        handle.save_memory(&mem_b_only).await.unwrap();
356        handle.save_memory(&mem_conflict_a).await.unwrap();
357        handle.save_memory(&mem_conflict_b).await.unwrap();
358
359        // Perform semantic merge
360        let result = semantic_merge(
361            &handle,
362            &commit_id_a.hash,
363            &commit_id_b.hash,
364            "Merge A and B",
365            "agent-git",
366        )
367        .await
368        .unwrap();
369
370        // Verify merge commit was created
371        assert!(!result.merge_commit_id.hash.is_empty());
372
373        // Verify all memories were synthesized
374        let merged_memories = handle
375            .get_memories(&result.merge_commit_id.hash)
376            .await
377            .unwrap();
378
379        // Should have 3 memories: 2 unique + 1 resolved conflict
380        assert_eq!(merged_memories.len(), 3, "Expected 3 merged memories");
381
382        // Verify unique memories were preserved
383        let keys: Vec<_> = merged_memories.iter().map(|m| m.key.as_str()).collect();
384        assert!(
385            keys.contains(&"learned-from-a"),
386            "Missing memory from branch A"
387        );
388        assert!(
389            keys.contains(&"learned-from-b"),
390            "Missing memory from branch B"
391        );
392        assert!(keys.contains(&"shared-key"), "Missing resolved conflict");
393
394        // Verify conflict was resolved (longer content should win with heuristic)
395        let resolved = merged_memories
396            .iter()
397            .find(|m| m.key == "shared-key")
398            .unwrap();
399        assert!(
400            resolved.content.contains("longer") || resolved.content.contains("detailed"),
401            "Conflict resolution should favor more detailed content"
402        );
403
404        // Verify summary is informative
405        assert!(
406            result.summary.contains("2") || result.summary.contains("memories"),
407            "Summary should mention merged memories"
408        );
409    }
410}