Skip to main content

void_core/ops/
merge_base.rs

1//! Merge base (LCA) computation for void repositories.
2//!
3//! This module implements finding the lowest common ancestor (LCA) of two commits,
4//! which is used as the merge base for three-way merges.
5
6use std::collections::{HashMap, HashSet, VecDeque};
7
8use crate::crypto::KeyVault;
9use crate::metadata::{parse_commit, Commit};
10use crate::store::{FsStore, ObjectStoreExt};
11use crate::cid::{ToVoidCid, VoidCid};
12use crate::{cid, Result};
13use void_crypto::EncryptedCommit;
14
15/// Loads and decrypts a commit from the store.
16///
17/// Decrypts the commit envelope and parses the CBOR payload.
18fn load_commit(store: &FsStore, vault: &KeyVault, commit_cid: &VoidCid) -> Result<Commit> {
19    let cid_str = commit_cid.to_string();
20    let blob: EncryptedCommit = store.get_blob(commit_cid).map_err(|e| {
21        crate::VoidError::NotFound(format!("commit {} not found: {}", cid_str, e))
22    })?;
23    let (decrypted, _reader) =
24        vault.open_commit(&blob)
25            .map_err(|e| {
26                crate::VoidError::Serialization(format!(
27                    "failed to decrypt commit {} (encrypted_size={} bytes): {}",
28                    cid_str,
29                    blob.as_bytes().len(),
30                    e
31                ))
32            })?;
33    // Parse commit
34    parse_commit(&decrypted).map_err(|e| {
35        crate::VoidError::Serialization(format!(
36            "failed to parse commit {} (decrypted_size={} bytes): {}",
37            cid_str,
38            decrypted.len(),
39            e
40        ))
41    })
42}
43
44/// Finds the merge base (lowest common ancestor) of two commits.
45///
46/// Algorithm: BFS from both commits simultaneously. The first commit visited
47/// by both traversals is the LCA. If multiple LCAs exist at the same depth,
48/// picks the one with the earliest timestamp.
49///
50/// # Arguments
51/// * `store` - The object store containing commit objects
52/// * `vault` - The KeyVault for decrypting commits
53/// * `commit_a` - First commit CID
54/// * `commit_b` - Second commit CID
55///
56/// # Returns
57/// * `Ok(Some(cid))` - The merge base CID
58/// * `Ok(None)` - No common ancestor (unrelated histories)
59/// * `Err(_)` - Error loading commits
60pub fn find_merge_base(
61    store: &FsStore,
62    vault: &KeyVault,
63    commit_a: &VoidCid,
64    commit_b: &VoidCid,
65) -> Result<Option<VoidCid>> {
66    // If commits are the same, the merge base is that commit
67    if commit_a == commit_b {
68        return Ok(Some(*commit_a));
69    }
70
71    // Track which commits each side has visited
72    // Value is the timestamp of the commit (for tie-breaking)
73    let mut visited_a: HashMap<String, u64> = HashMap::new();
74    let mut visited_b: HashMap<String, u64> = HashMap::new();
75
76    // BFS queues for each side
77    let mut queue_a: VecDeque<VoidCid> = VecDeque::new();
78    let mut queue_b: VecDeque<VoidCid> = VecDeque::new();
79
80    queue_a.push_back(*commit_a);
81    queue_b.push_back(*commit_b);
82
83    // Track candidates found at the current depth
84    // (cid_string, timestamp)
85    let mut candidates: Vec<(String, u64)> = Vec::new();
86
87    // BFS level by level from both sides
88    while !queue_a.is_empty() || !queue_b.is_empty() {
89        // Process one level from side A
90        let level_size_a = queue_a.len();
91        for _ in 0..level_size_a {
92            if let Some(current) = queue_a.pop_front() {
93                let current_str = current.to_string();
94
95                // Skip if already visited by this side
96                if visited_a.contains_key(&current_str) {
97                    continue;
98                }
99
100                let commit = load_commit(store, vault, &current)?;
101                visited_a.insert(current_str.clone(), commit.timestamp);
102
103                // Check if this commit was visited by side B
104                if visited_b.contains_key(&current_str) {
105                    candidates.push((current_str.clone(), commit.timestamp));
106                }
107
108                // Add parents to queue
109                for parent in &commit.parents {
110                    if !parent.as_bytes().is_empty() {
111                        let parent_cid = parent.to_void_cid()?;
112                        let parent_str = parent_cid.to_string();
113                        if !visited_a.contains_key(&parent_str) {
114                            queue_a.push_back(parent_cid);
115                        }
116                    }
117                }
118            }
119        }
120
121        // Process one level from side B
122        let level_size_b = queue_b.len();
123        for _ in 0..level_size_b {
124            if let Some(current) = queue_b.pop_front() {
125                let current_str = current.to_string();
126
127                // Skip if already visited by this side
128                if visited_b.contains_key(&current_str) {
129                    continue;
130                }
131
132                let commit = load_commit(store, vault, &current)?;
133                visited_b.insert(current_str.clone(), commit.timestamp);
134
135                // Check if this commit was visited by side A
136                if visited_a.contains_key(&current_str) {
137                    candidates.push((current_str.clone(), commit.timestamp));
138                }
139
140                // Add parents to queue
141                for parent in &commit.parents {
142                    if !parent.as_bytes().is_empty() {
143                        let parent_cid = parent.to_void_cid()?;
144                        let parent_str = parent_cid.to_string();
145                        if !visited_b.contains_key(&parent_str) {
146                            queue_b.push_back(parent_cid);
147                        }
148                    }
149                }
150            }
151        }
152
153        // If we found candidates at this depth, pick the one with earliest timestamp
154        if !candidates.is_empty() {
155            // Sort by timestamp (earliest first), deduplicate by CID
156            let mut seen: HashSet<String> = HashSet::new();
157            candidates.retain(|(cid_str, _)| seen.insert(cid_str.clone()));
158            candidates.sort_by_key(|(_, ts)| *ts);
159
160            let (best_cid_str, _) = &candidates[0];
161            return Ok(Some(cid::parse(best_cid_str)?));
162        }
163    }
164
165    // No common ancestor found
166    Ok(None)
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172    use crate::crypto::{self, encrypt_with_envelope, generate_key_nonce, KeyVault};
173    use crate::metadata::Commit;
174    use crate::store::ObjectStoreExt;
175    use camino::Utf8PathBuf;
176    use tempfile::TempDir;
177    use void_crypto::{CommitCid, EncryptedCommit, MetadataCid};
178
179    fn temp_store() -> (FsStore, TempDir) {
180        let dir = TempDir::new().unwrap();
181        let store = FsStore::new(Utf8PathBuf::try_from(dir.path().to_path_buf()).unwrap()).unwrap();
182        (store, dir)
183    }
184
185    fn store_commit(store: &FsStore, key: &[u8; 32], commit: &Commit) -> VoidCid {
186        let bytes = crate::support::cbor_to_vec(commit).unwrap();
187        let nonce = generate_key_nonce();
188        let encrypted = encrypt_with_envelope(key, &nonce, &bytes, crypto::AAD_COMMIT).unwrap();
189        store.put_blob(&EncryptedCommit::from_bytes(encrypted)).unwrap()
190    }
191
192    #[test]
193    fn same_commit_returns_itself() {
194        let (store, _dir) = temp_store();
195        let key = [0x42u8; 32];
196        let vault = KeyVault::new(key).expect("key derivation should not fail");
197
198        let commit = Commit::new(None, MetadataCid::from_bytes(vec![1, 2, 3]), 1000, "initial".into());
199        let cid = store_commit(&store, &key, &commit);
200
201        let result = find_merge_base(&store, &vault, &cid, &cid).unwrap();
202        assert_eq!(result, Some(cid));
203    }
204
205    #[test]
206    fn linear_history_base_is_older_commit() {
207        // History: A <- B <- C
208        // merge_base(A, C) = A
209        // merge_base(B, C) = B
210        let (store, _dir) = temp_store();
211        let key = [0x42u8; 32];
212        let vault = KeyVault::new(key).expect("key derivation should not fail");
213
214        // A: initial commit
215        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
216        let cid_a = store_commit(&store, &key, &commit_a);
217
218        // B: child of A
219        let commit_b = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
220        let cid_b = store_commit(&store, &key, &commit_b);
221
222        // C: child of B
223        let commit_c = Commit::new(Some(CommitCid::from_bytes(cid_b.to_bytes())), MetadataCid::from_bytes(vec![3]), 3000, "C".into());
224        let cid_c = store_commit(&store, &key, &commit_c);
225
226        // merge_base(A, C) = A
227        let result = find_merge_base(&store, &vault, &cid_a, &cid_c).unwrap();
228        assert_eq!(result, Some(cid_a));
229
230        // merge_base(B, C) = B
231        let result = find_merge_base(&store, &vault, &cid_b, &cid_c).unwrap();
232        assert_eq!(result, Some(cid_b));
233
234        // merge_base(C, A) = A (order shouldn't matter)
235        let result = find_merge_base(&store, &vault, &cid_c, &cid_a).unwrap();
236        assert_eq!(result, Some(cid_a));
237    }
238
239    #[test]
240    fn diverged_branches_single_merge_base() {
241        // History:
242        //       B
243        //      /
244        // A --+
245        //      \
246        //       C
247        // merge_base(B, C) = A
248        let (store, _dir) = temp_store();
249        let key = [0x42u8; 32];
250        let vault = KeyVault::new(key).expect("key derivation should not fail");
251
252        // A: initial commit
253        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
254        let cid_a = store_commit(&store, &key, &commit_a);
255
256        // B: child of A
257        let commit_b = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
258        let cid_b = store_commit(&store, &key, &commit_b);
259
260        // C: also child of A (diverged branch)
261        let commit_c = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![3]), 2500, "C".into());
262        let cid_c = store_commit(&store, &key, &commit_c);
263
264        // merge_base(B, C) = A
265        let result = find_merge_base(&store, &vault, &cid_b, &cid_c).unwrap();
266        assert_eq!(result, Some(cid_a));
267
268        // merge_base(C, B) = A (order shouldn't matter)
269        let result = find_merge_base(&store, &vault, &cid_c, &cid_b).unwrap();
270        assert_eq!(result, Some(cid_a));
271    }
272
273    #[test]
274    fn deeper_divergence() {
275        // History:
276        //       B -- D
277        //      /
278        // A --+
279        //      \
280        //       C -- E
281        // merge_base(D, E) = A
282        let (store, _dir) = temp_store();
283        let key = [0x42u8; 32];
284        let vault = KeyVault::new(key).expect("key derivation should not fail");
285
286        // A: initial commit
287        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
288        let cid_a = store_commit(&store, &key, &commit_a);
289
290        // B: child of A
291        let commit_b = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
292        let cid_b = store_commit(&store, &key, &commit_b);
293
294        // C: also child of A (diverged branch)
295        let commit_c = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![3]), 2500, "C".into());
296        let cid_c = store_commit(&store, &key, &commit_c);
297
298        // D: child of B
299        let commit_d = Commit::new(Some(CommitCid::from_bytes(cid_b.to_bytes())), MetadataCid::from_bytes(vec![4]), 3000, "D".into());
300        let cid_d = store_commit(&store, &key, &commit_d);
301
302        // E: child of C
303        let commit_e = Commit::new(Some(CommitCid::from_bytes(cid_c.to_bytes())), MetadataCid::from_bytes(vec![5]), 3500, "E".into());
304        let cid_e = store_commit(&store, &key, &commit_e);
305
306        // merge_base(D, E) = A
307        let result = find_merge_base(&store, &vault, &cid_d, &cid_e).unwrap();
308        assert_eq!(result, Some(cid_a));
309    }
310
311    #[test]
312    fn unrelated_histories_returns_none() {
313        // Two completely separate commit chains with no common ancestor
314        let (store, _dir) = temp_store();
315        let key = [0x42u8; 32];
316        let vault = KeyVault::new(key).expect("key derivation should not fail");
317
318        // Chain 1: A <- B
319        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
320        let cid_a = store_commit(&store, &key, &commit_a);
321
322        let commit_b = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
323        let cid_b = store_commit(&store, &key, &commit_b);
324
325        // Chain 2: X <- Y (completely unrelated)
326        let commit_x = Commit::new(None, MetadataCid::from_bytes(vec![10]), 1500, "X".into());
327        let cid_x = store_commit(&store, &key, &commit_x);
328
329        let commit_y = Commit::new(Some(CommitCid::from_bytes(cid_x.to_bytes())), MetadataCid::from_bytes(vec![11]), 2500, "Y".into());
330        let cid_y = store_commit(&store, &key, &commit_y);
331
332        // merge_base(B, Y) = None
333        let result = find_merge_base(&store, &vault, &cid_b, &cid_y).unwrap();
334        assert_eq!(result, None);
335    }
336
337    #[test]
338    fn multiple_lcas_picks_earliest_timestamp() {
339        // Diamond pattern with merge:
340        //       B
341        //      / \
342        // A --+   +-- D (merge of B and C)
343        //      \ /
344        //       C
345        //
346        // Then two branches from D:
347        //       E
348        //      /
349        // D --+
350        //      \
351        //       F
352        //
353        // merge_base(E, F) = D
354        let (store, _dir) = temp_store();
355        let key = [0x42u8; 32];
356        let vault = KeyVault::new(key).expect("key derivation should not fail");
357
358        // A: initial commit
359        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
360        let cid_a = store_commit(&store, &key, &commit_a);
361
362        // B: child of A
363        let commit_b = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
364        let cid_b = store_commit(&store, &key, &commit_b);
365
366        // C: also child of A
367        let commit_c = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![3]), 2500, "C".into());
368        let cid_c = store_commit(&store, &key, &commit_c);
369
370        // D: merge of B and C
371        let commit_d = Commit::new_merge(
372            vec![CommitCid::from_bytes(cid_b.to_bytes()), CommitCid::from_bytes(cid_c.to_bytes())],
373            MetadataCid::from_bytes(vec![4]),
374            3000,
375            "D (merge)".into(),
376        );
377        let cid_d = store_commit(&store, &key, &commit_d);
378
379        // E: child of D
380        let commit_e = Commit::new(Some(CommitCid::from_bytes(cid_d.to_bytes())), MetadataCid::from_bytes(vec![5]), 4000, "E".into());
381        let cid_e = store_commit(&store, &key, &commit_e);
382
383        // F: also child of D
384        let commit_f = Commit::new(Some(CommitCid::from_bytes(cid_d.to_bytes())), MetadataCid::from_bytes(vec![6]), 4500, "F".into());
385        let cid_f = store_commit(&store, &key, &commit_f);
386
387        // merge_base(E, F) = D
388        let result = find_merge_base(&store, &vault, &cid_e, &cid_f).unwrap();
389        assert_eq!(result, Some(cid_d));
390    }
391
392    #[test]
393    fn criss_cross_merge_picks_earliest() {
394        // Criss-cross pattern:
395        //
396        //     B ----+
397        //    / \     \
398        //   A   X     Y
399        //    \ /     /
400        //     C ----+
401        //
402        // Where X merges B+C and Y merges C+B
403        // Then branches from X and Y:
404        //   X -- P
405        //   Y -- Q
406        //
407        // merge_base(P, Q) should find a common ancestor
408        // In this case, both B and C are valid LCAs at the same depth
409        // We pick the one with earliest timestamp (B at 2000 vs C at 2500)
410        let (store, _dir) = temp_store();
411        let key = [0x42u8; 32];
412        let vault = KeyVault::new(key).expect("key derivation should not fail");
413
414        // A: initial commit
415        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
416        let cid_a = store_commit(&store, &key, &commit_a);
417
418        // B: child of A (timestamp 2000)
419        let commit_b = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
420        let cid_b = store_commit(&store, &key, &commit_b);
421
422        // C: also child of A (timestamp 2500)
423        let commit_c = Commit::new(Some(CommitCid::from_bytes(cid_a.to_bytes())), MetadataCid::from_bytes(vec![3]), 2500, "C".into());
424        let cid_c = store_commit(&store, &key, &commit_c);
425
426        // X: merge of B and C
427        let commit_x = Commit::new_merge(
428            vec![CommitCid::from_bytes(cid_b.to_bytes()), CommitCid::from_bytes(cid_c.to_bytes())],
429            MetadataCid::from_bytes(vec![4]),
430            3000,
431            "X (merge B+C)".into(),
432        );
433        let cid_x = store_commit(&store, &key, &commit_x);
434
435        // Y: merge of C and B (different order)
436        let commit_y = Commit::new_merge(
437            vec![CommitCid::from_bytes(cid_c.to_bytes()), CommitCid::from_bytes(cid_b.to_bytes())],
438            MetadataCid::from_bytes(vec![5]),
439            3500,
440            "Y (merge C+B)".into(),
441        );
442        let cid_y = store_commit(&store, &key, &commit_y);
443
444        // P: child of X
445        let commit_p = Commit::new(Some(CommitCid::from_bytes(cid_x.to_bytes())), MetadataCid::from_bytes(vec![6]), 4000, "P".into());
446        let cid_p = store_commit(&store, &key, &commit_p);
447
448        // Q: child of Y
449        let commit_q = Commit::new(Some(CommitCid::from_bytes(cid_y.to_bytes())), MetadataCid::from_bytes(vec![7]), 4500, "Q".into());
450        let cid_q = store_commit(&store, &key, &commit_q);
451
452        // merge_base(P, Q) - should find B or C
453        // Both are valid LCAs; we pick the one with earliest timestamp (B at 2000)
454        let result = find_merge_base(&store, &vault, &cid_p, &cid_q).unwrap();
455        assert!(result.is_some());
456        let result_cid = result.unwrap();
457        // Either B or C is acceptable as they're both valid LCAs
458        // Our algorithm picks earliest timestamp, so B
459        assert_eq!(result_cid, cid_b);
460    }
461}