gix_fsck/
lib.rs

1//! A library for performing object database integrity and connectivity checks
2#![deny(rust_2018_idioms, unsafe_code, missing_docs)]
3
4use std::collections::VecDeque;
5
6use gix_hash::ObjectId;
7use gix_hashtable::HashSet;
8use gix_object::{tree::EntryKind, Exists, FindExt, Kind};
9
10/// Perform a connectivity check.
11pub struct Connectivity<T, F>
12where
13    T: FindExt + Exists,
14    F: FnMut(&ObjectId, Kind),
15{
16    /// ODB handle to use for the check
17    db: T,
18    /// Closure to invoke when a missing object is encountered
19    missing_cb: F,
20    /// Set of Object IDs already (or about to be) scanned during the check
21    seen: HashSet,
22    /// A buffer to keep a single object at a time.
23    buf: Vec<u8>,
24}
25
26impl<T, F> Connectivity<T, F>
27where
28    T: FindExt + Exists,
29    F: FnMut(&ObjectId, Kind),
30{
31    /// Instantiate a connectivity check.
32    pub fn new(db: T, missing_cb: F) -> Connectivity<T, F> {
33        Connectivity {
34            db,
35            missing_cb,
36            seen: HashSet::default(),
37            buf: Default::default(),
38        }
39    }
40
41    /// Run the connectivity check on the provided commit `oid`.
42    ///
43    /// ### Algorithm
44    ///
45    /// Walk the trees and blobs referenced by the commit and verify they exist in the ODB.
46    /// Any objects previously encountered by this instance will be skipped silently.
47    /// Any referenced blobs that are not present in the ODB will result in a call to the  `missing_cb`.
48    /// Missing commits or trees will cause an error to be returned.
49    ///     - TODO: consider how to handle a missing commit (invoke `missing_cb`, or possibly return a Result?)
50    pub fn check_commit(&mut self, oid: &ObjectId) -> Result<(), gix_object::find::existing_object::Error> {
51        // Attempt to insert the commit ID in the set, and if already present, return immediately
52        if !self.seen.insert(*oid) {
53            return Ok(());
54        }
55        // Obtain the commit's tree ID
56        let tree_id = {
57            let commit = self.db.find_commit(oid, &mut self.buf)?;
58            commit.tree()
59        };
60
61        let mut tree_ids = VecDeque::from_iter(Some(tree_id));
62        while let Some(tree_id) = tree_ids.pop_front() {
63            if self.seen.insert(tree_id) {
64                self.check_tree(&tree_id, &mut tree_ids);
65            }
66        }
67
68        Ok(())
69    }
70
71    /// Blobs are checked right away, trees are stored in `tree_ids` for the parent to iterate them, and only
72    /// if they have not been `seen` yet.
73    fn check_tree(&mut self, oid: &ObjectId, tree_ids: &mut VecDeque<ObjectId>) {
74        let Ok(tree) = self.db.find_tree(oid, &mut self.buf) else {
75            (self.missing_cb)(oid, Kind::Tree);
76            return;
77        };
78
79        for entry_ref in tree.entries.iter() {
80            match entry_ref.mode.kind() {
81                EntryKind::Tree => {
82                    let tree_id = entry_ref.oid.to_owned();
83                    tree_ids.push_back(tree_id);
84                }
85                EntryKind::Blob | EntryKind::BlobExecutable | EntryKind::Link => {
86                    let blob_id = entry_ref.oid.to_owned();
87                    if self.seen.insert(blob_id) {
88                        check_blob(&self.db, &blob_id, &mut self.missing_cb);
89                    }
90                }
91                EntryKind::Commit => {
92                    // Skip submodules as they wouldn't be in this repository!
93                }
94            }
95        }
96    }
97}
98
99fn check_blob<F>(db: impl Exists, oid: &ObjectId, mut missing_cb: F)
100where
101    F: FnMut(&ObjectId, Kind),
102{
103    if !db.exists(oid) {
104        missing_cb(oid, Kind::Blob);
105    }
106}