Skip to main content

void_core/ops/
traversal.rs

1//! Commit graph traversal utilities.
2//!
3//! Provides a reusable BFS-based commit walker that correctly handles
4//! merge commits with multiple parents. This module exists to prevent
5//! the common bug of only following the first parent.
6//!
7//! # Example
8//!
9//! ```rust,ignore
10//! use void_core::ops::traversal::{CommitWalker, WalkOptions};
11//!
12//! let walker = CommitWalker::new(&store, &key, WalkOptions::from_head(&void_dir)?)?;
13//!
14//! for result in walker.take(100) {
15//!     let (cid, commit, reader) = result?;
16//!     println!("Commit: {} - {}", cid, commit.message);
17//! }
18//! ```
19
20use std::collections::{HashSet, VecDeque};
21use std::fs;
22
23use camino::Utf8Path;
24
25use crate::crypto::reader::CommitReader;
26use crate::crypto::KeyVault;
27use crate::metadata::Commit;
28use crate::store::{FsStore, ObjectStoreExt};
29use crate::cid::ToVoidCid;
30use crate::{cid as void_cid, refs, Result, VoidCid, VoidError};
31
32// ============================================================================
33// Walk Order
34// ============================================================================
35
36/// Walk order for commit traversal.
37///
38/// Controls the direction of traversal through the commit graph.
39#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
40pub enum WalkOrder {
41    /// BFS from heads toward roots (newest first) - default.
42    ///
43    /// This is the natural order for `git log` style output, showing
44    /// recent commits first.
45    #[default]
46    HeadToRoot,
47
48    /// Topological order from roots toward heads (oldest first).
49    ///
50    /// Useful for migrations that need to process commits in dependency
51    /// order, ensuring parents are processed before children.
52    RootToHead,
53}
54
55/// Options for commit traversal.
56#[derive(Debug, Clone)]
57pub struct WalkOptions {
58    /// Starting commit CIDs for the walk.
59    pub starting_points: Vec<VoidCid>,
60    /// Maximum number of commits to visit (None = unlimited).
61    pub max_commits: Option<usize>,
62    /// Walk order (default: HeadToRoot).
63    pub order: WalkOrder,
64}
65
66impl WalkOptions {
67    /// Create walk options starting from HEAD.
68    pub fn from_head(void_dir: &Utf8Path) -> Result<Self> {
69        let mut starting_points = Vec::new();
70
71        if let Some(head_commit_cid) = refs::resolve_head(void_dir)? {
72            let head_cid = void_cid::from_bytes(head_commit_cid.as_bytes())?;
73            starting_points.push(head_cid);
74        }
75
76        Ok(Self {
77            starting_points,
78            max_commits: None,
79            order: WalkOrder::default(),
80        })
81    }
82
83    /// Create walk options starting from HEAD and all branch heads.
84    ///
85    /// This is useful for operations that need complete coverage of all
86    /// commits in the repository, including those on feature branches.
87    pub fn from_all_refs(void_dir: &Utf8Path) -> Result<Self> {
88        let mut starting_points = Vec::new();
89
90        // Add HEAD
91        if let Some(head_commit_cid) = refs::resolve_head(void_dir)? {
92            if let Ok(head_cid) = void_cid::from_bytes(head_commit_cid.as_bytes()) {
93                starting_points.push(head_cid);
94            }
95        }
96
97        // Add all branch heads
98        let refs_dir = void_dir.join("refs/heads");
99        if refs_dir.exists() {
100            if let Ok(entries) = fs::read_dir(&refs_dir) {
101                for entry in entries.flatten() {
102                    if let Ok(content) = fs::read_to_string(entry.path()) {
103                        if let Ok(branch_cid) = void_cid::parse(content.trim()) {
104                            // Avoid duplicates (HEAD might point to same commit as a branch)
105                            if !starting_points.iter().any(|c| c == &branch_cid) {
106                                starting_points.push(branch_cid);
107                            }
108                        }
109                    }
110                }
111            }
112        }
113
114        Ok(Self {
115            starting_points,
116            max_commits: None,
117            order: WalkOrder::default(),
118        })
119    }
120
121    /// Create walk options starting from a specific commit.
122    pub fn from_commit(commit_cid: VoidCid) -> Self {
123        Self {
124            starting_points: vec![commit_cid],
125            max_commits: None,
126            order: WalkOrder::default(),
127        }
128    }
129
130    /// Create walk options starting from multiple commits.
131    pub fn from_commits(commit_cids: Vec<VoidCid>) -> Self {
132        Self {
133            starting_points: commit_cids,
134            max_commits: None,
135            order: WalkOrder::default(),
136        }
137    }
138
139    /// Set maximum number of commits to visit.
140    pub fn with_max_commits(mut self, max: usize) -> Self {
141        self.max_commits = Some(max);
142        self
143    }
144
145    /// Set the walk order.
146    ///
147    /// Default is `HeadToRoot` (BFS from newest to oldest).
148    /// Use `RootToHead` for topological traversal from oldest to newest.
149    pub fn with_order(mut self, order: WalkOrder) -> Self {
150        self.order = order;
151        self
152    }
153}
154
155/// A visited commit with its parsed data.
156pub struct WalkedCommit {
157    /// The commit CID as a string.
158    pub cid_str: String,
159    /// The parsed CID.
160    pub cid: VoidCid,
161    /// The decrypted and parsed commit.
162    pub commit: Commit,
163    /// Reader for decrypting child objects (metadata, shards).
164    pub reader: CommitReader,
165}
166
167/// BFS-based commit graph walker.
168///
169/// Correctly handles merge commits by visiting all parents, not just the first.
170/// Uses a visited set to handle diamond patterns where multiple paths converge.
171///
172/// # Design Notes
173///
174/// This walker uses BFS (breadth-first search) rather than DFS because:
175/// 1. BFS visits commits in roughly chronological order (newer first)
176/// 2. BFS is more intuitive for git-like history visualization
177/// 3. BFS with a max_commits limit gives you the N most recent commits
178pub struct CommitWalker<'a> {
179    store: &'a FsStore,
180    vault: &'a KeyVault,
181    queue: VecDeque<VoidCid>,
182    visited: HashSet<String>,
183    commits_returned: usize,
184    max_commits: Option<usize>,
185}
186
187impl<'a> CommitWalker<'a> {
188    /// Create a new commit walker.
189    pub fn new(store: &'a FsStore, vault: &'a KeyVault, options: WalkOptions) -> Self {
190        let mut queue = VecDeque::new();
191        for cid in options.starting_points {
192            queue.push_back(cid);
193        }
194
195        Self {
196            store,
197            vault,
198            queue,
199            visited: HashSet::default(),
200            commits_returned: 0,
201            max_commits: options.max_commits,
202        }
203    }
204
205    /// Check if the walker has reached its limit.
206    fn at_limit(&self) -> bool {
207        self.max_commits
208            .map(|max| self.commits_returned >= max)
209            .unwrap_or(false)
210    }
211}
212
213impl Iterator for CommitWalker<'_> {
214    type Item = Result<WalkedCommit>;
215
216    fn next(&mut self) -> Option<Self::Item> {
217        // Stop if we've hit the limit
218        if self.at_limit() {
219            return None;
220        }
221
222        while let Some(commit_cid) = self.queue.pop_front() {
223            let cid_str = commit_cid.to_string();
224
225            // Skip if already visited (handles diamond merges)
226            if self.visited.contains(&cid_str) {
227                continue;
228            }
229            self.visited.insert(cid_str.clone());
230
231            // Load and decrypt the commit
232            let encrypted: void_crypto::EncryptedCommit = match self.store.get_blob(&commit_cid) {
233                Ok(data) => data,
234                Err(e) => {
235                    return Some(Err(VoidError::NotFound(format!(
236                        "commit {} not found: {}",
237                        cid_str, e
238                    ))))
239                }
240            };
241
242            let (commit_bytes, reader) = match CommitReader::open_with_vault(self.vault, &encrypted) {
243                Ok(r) => r,
244                Err(e) => {
245                    return Some(Err(VoidError::Decryption(format!(
246                        "failed to decrypt commit {}: {}",
247                        cid_str, e
248                    ))))
249                }
250            };
251
252            let commit = match commit_bytes.parse() {
253                Ok(c) => c,
254                Err(e) => {
255                    return Some(Err(VoidError::Serialization(format!(
256                        "failed to parse commit {}: {}",
257                        cid_str, e
258                    ))))
259                }
260            };
261
262            // Queue ALL parents for BFS traversal (the key fix!)
263            for parent_bytes in &commit.parents {
264                if !parent_bytes.as_bytes().is_empty() {
265                    if let Ok(parent_cid) = parent_bytes.to_void_cid() {
266                        let parent_str = parent_cid.to_string();
267                        if !self.visited.contains(&parent_str) {
268                            self.queue.push_back(parent_cid);
269                        }
270                    }
271                }
272            }
273
274            self.commits_returned += 1;
275
276            return Some(Ok(WalkedCommit {
277                cid_str,
278                cid: commit_cid,
279                commit,
280                reader,
281            }));
282        }
283
284        None
285    }
286}
287
288/// Convenience function to walk commits from HEAD.
289///
290/// # Example
291///
292/// ```rust,ignore
293/// for result in walk_from_head(&store, &key, &void_dir, Some(100))? {
294///     let walked = result?;
295///     println!("{}: {}", walked.cid_str, walked.commit.message);
296/// }
297/// ```
298pub fn walk_from_head<'a>(
299    store: &'a FsStore,
300    vault: &'a KeyVault,
301    void_dir: &Utf8Path,
302    max_commits: Option<usize>,
303) -> Result<CommitWalker<'a>> {
304    let mut options = WalkOptions::from_head(void_dir)?;
305    if let Some(max) = max_commits {
306        options = options.with_max_commits(max);
307    }
308    Ok(CommitWalker::new(store, vault, options))
309}
310
311/// Convenience function to walk all commits from all refs.
312///
313/// This includes commits on all branches, not just HEAD.
314pub fn walk_all_refs<'a>(
315    store: &'a FsStore,
316    vault: &'a KeyVault,
317    void_dir: &Utf8Path,
318    max_commits: Option<usize>,
319) -> Result<CommitWalker<'a>> {
320    let mut options = WalkOptions::from_all_refs(void_dir)?;
321    if let Some(max) = max_commits {
322        options = options.with_max_commits(max);
323    }
324    Ok(CommitWalker::new(store, vault, options))
325}
326
327/// Walk commits in topological order (root-to-head / oldest first).
328///
329/// This is useful for migrations that need to process commits in dependency
330/// order, ensuring each commit's parents have already been processed.
331///
332/// # Implementation
333///
334/// This function performs a full BFS traversal to collect all reachable commits,
335/// then reverses the order. For very large repositories, this may use significant
336/// memory.
337///
338/// # Example
339///
340/// ```rust,ignore
341/// // Process commits from oldest to newest
342/// for walked in walk_topological(&store, &key, &void_dir, None)? {
343///     let walked = walked?;
344///     // Parent commits have already been yielded
345///     migrate_commit(&walked.commit)?;
346/// }
347/// ```
348pub fn walk_topological(
349    store: &FsStore,
350    vault: &KeyVault,
351    void_dir: &Utf8Path,
352    max_commits: Option<usize>,
353) -> Result<impl Iterator<Item = Result<WalkedCommit>>> {
354    let options = WalkOptions::from_all_refs(void_dir)?
355        .with_order(WalkOrder::RootToHead);
356
357    let walker = CommitWalker::new(store, vault, options);
358
359    // Collect all commits via BFS (head-to-root order)
360    let mut commits: Vec<Result<WalkedCommit>> = walker.collect();
361
362    // Reverse to get root-to-head order
363    commits.reverse();
364
365    // Apply max_commits limit if specified
366    if let Some(max) = max_commits {
367        commits.truncate(max);
368    }
369
370    Ok(commits.into_iter())
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376    use crate::crypto::{encrypt_with_envelope, generate_key_nonce, KeyVault};
377    use crate::metadata::Commit;
378    use camino::Utf8PathBuf;
379    use tempfile::TempDir;
380    use void_crypto::{CommitCid, EncryptedCommit, MetadataCid};
381
382    fn temp_store() -> (FsStore, TempDir) {
383        let dir = TempDir::new().unwrap();
384        let store = FsStore::new(Utf8PathBuf::try_from(dir.path().to_path_buf()).unwrap()).unwrap();
385        (store, dir)
386    }
387
388    fn store_commit(store: &FsStore, key: &[u8; 32], commit: &Commit) -> VoidCid {
389        let bytes = crate::support::cbor_to_vec(commit).unwrap();
390        let nonce = generate_key_nonce();
391        let encrypted = encrypt_with_envelope(key, &nonce, &bytes, crate::crypto::AAD_COMMIT).unwrap();
392        store.put_blob(&EncryptedCommit::from_bytes(encrypted)).unwrap()
393    }
394
395    #[test]
396    fn walks_linear_history() {
397        let (store, _dir) = temp_store();
398        let key = [0x42u8; 32];
399        let vault = KeyVault::new(key).expect("key derivation should not fail");
400
401        // Create linear history: A <- B <- C
402        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
403        let cid_a = store_commit(&store, &key, &commit_a);
404
405        let commit_b = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_a))), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
406        let cid_b = store_commit(&store, &key, &commit_b);
407
408        let commit_c = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_b))), MetadataCid::from_bytes(vec![3]), 3000, "C".into());
409        let cid_c = store_commit(&store, &key, &commit_c);
410
411        // Walk from C
412        let options = WalkOptions::from_commit(cid_c);
413        let walker = CommitWalker::new(&store, &vault, options);
414
415        let commits: Vec<_> = walker.map(|r| r.unwrap().commit.message).collect();
416        assert_eq!(commits, vec!["C", "B", "A"]);
417    }
418
419    #[test]
420    fn walks_all_parents_of_merge() {
421        let (store, _dir) = temp_store();
422        let key = [0x42u8; 32];
423        let vault = KeyVault::new(key).expect("key derivation should not fail");
424
425        // Create diamond:
426        //       B
427        //      / \
428        // A --+   +-- D
429        //      \ /
430        //       C
431        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
432        let cid_a = store_commit(&store, &key, &commit_a);
433
434        let commit_b = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_a))), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
435        let cid_b = store_commit(&store, &key, &commit_b);
436
437        let commit_c = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_a))), MetadataCid::from_bytes(vec![3]), 2500, "C".into());
438        let cid_c = store_commit(&store, &key, &commit_c);
439
440        // D is a merge of B and C
441        let commit_d = Commit::new_merge(
442            vec![CommitCid::from_bytes(void_cid::to_bytes(&cid_b)), CommitCid::from_bytes(void_cid::to_bytes(&cid_c))],
443            MetadataCid::from_bytes(vec![4]),
444            3000,
445            "D".into(),
446        );
447        let cid_d = store_commit(&store, &key, &commit_d);
448
449        // Walk from D - should visit all 4 commits
450        let options = WalkOptions::from_commit(cid_d);
451        let walker = CommitWalker::new(&store, &vault, options);
452
453        let commits: Vec<_> = walker.map(|r| r.unwrap().commit.message).collect();
454        assert_eq!(commits.len(), 4);
455        assert!(commits.contains(&"A".to_string()));
456        assert!(commits.contains(&"B".to_string()));
457        assert!(commits.contains(&"C".to_string()));
458        assert!(commits.contains(&"D".to_string()));
459    }
460
461    #[test]
462    fn respects_max_commits() {
463        let (store, _dir) = temp_store();
464        let key = [0x42u8; 32];
465        let vault = KeyVault::new(key).expect("key derivation should not fail");
466
467        // Create 5-commit linear history
468        let mut last_cid: Option<VoidCid> = None;
469        for i in 0..5 {
470            let parent = last_cid.as_ref().map(|c| CommitCid::from_bytes(void_cid::to_bytes(c)));
471            let commit = Commit::new(parent, MetadataCid::from_bytes(vec![i]), (i as u64) * 1000, format!("Commit {}", i));
472            last_cid = Some(store_commit(&store, &key, &commit));
473        }
474
475        // Walk with max 3 commits
476        let options = WalkOptions::from_commit(last_cid.unwrap()).with_max_commits(3);
477        let walker = CommitWalker::new(&store, &vault, options);
478
479        let commits: Vec<_> = walker.collect();
480        assert_eq!(commits.len(), 3);
481    }
482
483    #[test]
484    fn handles_diamond_without_duplicate_visits() {
485        let (store, _dir) = temp_store();
486        let key = [0x42u8; 32];
487        let vault = KeyVault::new(key).expect("key derivation should not fail");
488
489        // Same diamond as above
490        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
491        let cid_a = store_commit(&store, &key, &commit_a);
492
493        let commit_b = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_a))), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
494        let cid_b = store_commit(&store, &key, &commit_b);
495
496        let commit_c = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_a))), MetadataCid::from_bytes(vec![3]), 2500, "C".into());
497        let cid_c = store_commit(&store, &key, &commit_c);
498
499        let commit_d = Commit::new_merge(
500            vec![CommitCid::from_bytes(void_cid::to_bytes(&cid_b)), CommitCid::from_bytes(void_cid::to_bytes(&cid_c))],
501            MetadataCid::from_bytes(vec![4]),
502            3000,
503            "D".into(),
504        );
505        let cid_d = store_commit(&store, &key, &commit_d);
506
507        let options = WalkOptions::from_commit(cid_d);
508        let walker = CommitWalker::new(&store, &vault, options);
509
510        let commits: Vec<_> = walker.map(|r| r.unwrap().commit.message).collect();
511
512        // A should only appear once despite being reachable via both B and C
513        let a_count = commits.iter().filter(|m| *m == "A").count();
514        assert_eq!(a_count, 1);
515    }
516
517    #[test]
518    fn walk_order_enum_default() {
519        assert_eq!(WalkOrder::default(), WalkOrder::HeadToRoot);
520    }
521
522    #[test]
523    fn with_order_sets_order() {
524        let options = WalkOptions::from_commit(VoidCid::from(cid::Cid::default()))
525            .with_order(WalkOrder::RootToHead);
526        assert_eq!(options.order, WalkOrder::RootToHead);
527    }
528
529    #[test]
530    fn walks_linear_history_root_to_head() {
531        let (store, _dir) = temp_store();
532        let key = [0x42u8; 32];
533        let vault = KeyVault::new(key).expect("key derivation should not fail");
534
535        // Create linear history: A <- B <- C
536        let commit_a = Commit::new(None, MetadataCid::from_bytes(vec![1]), 1000, "A".into());
537        let cid_a = store_commit(&store, &key, &commit_a);
538
539        let commit_b = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_a))), MetadataCid::from_bytes(vec![2]), 2000, "B".into());
540        let cid_b = store_commit(&store, &key, &commit_b);
541
542        let commit_c = Commit::new(Some(CommitCid::from_bytes(void_cid::to_bytes(&cid_b))), MetadataCid::from_bytes(vec![3]), 3000, "C".into());
543        let cid_c = store_commit(&store, &key, &commit_c);
544
545        // Walk from C with RootToHead order - should be reversed (oldest first)
546        let options = WalkOptions::from_commit(cid_c).with_order(WalkOrder::RootToHead);
547        let walker = CommitWalker::new(&store, &vault, options);
548
549        // Note: The walker itself doesn't change behavior for RootToHead -
550        // that's handled by walk_topological which collects and reverses.
551        // This test verifies the option is stored correctly.
552        let commits: Vec<_> = walker.map(|r| r.unwrap().commit.message).collect();
553        // BFS order is still C, B, A (the actual reversal happens in walk_topological)
554        assert_eq!(commits, vec!["C", "B", "A"]);
555    }
556}