Skip to main content

git_forensic/
unreachable.rs

1//! Reachability analysis: objects present in the store but reachable from no
2//! ref. Such an object is residue of deleted or rewritten history — it remains
3//! resurrectable from the object store until garbage-collected. Commits are the
4//! most telling (a whole dropped line of development); blobs and trees are
5//! lower-signal. An examiner follows these leads; they are never a verdict.
6
7use std::collections::HashSet;
8
9use forensicnomicon::report::{Category, Evidence, Observation, Severity};
10use git_core::{GitHash, GitRepo, ObjectKind, Result};
11
12/// An object present in the store yet reachable from no ref.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct UnreachableObject {
15    /// The unreachable object's hash.
16    pub object: GitHash,
17    /// Its kind as a lowercase git type (`commit`, `tree`, `blob`, `tag`).
18    pub kind: String,
19}
20
21impl Observation for UnreachableObject {
22    fn severity(&self) -> Option<Severity> {
23        // A resurrectable dropped commit (deleted/rewritten history) is the
24        // sharper lead; a loose blob/tree is lower signal.
25        if self.kind == "commit" {
26            Some(Severity::Medium)
27        } else {
28            Some(Severity::Low)
29        }
30    }
31
32    fn code(&self) -> &'static str {
33        "GIT-UNREACHABLE-OBJECT"
34    }
35
36    fn category(&self) -> Category {
37        // Residue of deleted/rewritten history.
38        Category::Residue
39    }
40
41    fn note(&self) -> String {
42        format!(
43            "{} object reachable from no ref; consistent with deleted or rewritten \
44             history (the object remains resurrectable from the store until gc)",
45            self.kind
46        )
47    }
48
49    fn evidence(&self) -> Vec<Evidence> {
50        vec![
51            Evidence {
52                field: "object".into(),
53                value: self.object.to_hex(),
54                location: None,
55            },
56            Evidence {
57                field: "kind".into(),
58                value: self.kind.clone(),
59                location: None,
60            },
61        ]
62    }
63}
64
65fn kind_str(kind: ObjectKind) -> &'static str {
66    match kind {
67        ObjectKind::Commit => "commit",
68        ObjectKind::Tree => "tree",
69        ObjectKind::Blob => "blob",
70        ObjectKind::Tag => "tag",
71    }
72}
73
74/// Compute the set of objects reachable from every ref tip.
75///
76/// Walks commit → parents + tree, tree → entries (subtrees + blobs). A tag is
77/// opaque (git-core does not parse tag targets), so a tag tip contributes only
78/// itself; this is sound for the common branch/HEAD tips used in practice.
79///
80/// Objects that fail to read or parse are recorded as reached-but-not-expanded
81/// rather than aborting the walk — robustness on a damaged store beats a hard
82/// failure.
83fn reachable_set(repo: &GitRepo) -> HashSet<GitHash> {
84    let mut reached = HashSet::new();
85    let mut stack: Vec<GitHash> = repo.all_refs().into_iter().map(|(_, h)| h).collect();
86
87    while let Some(hash) = stack.pop() {
88        if !reached.insert(hash) {
89            continue;
90        }
91        let Ok(obj) = repo.read_object(&hash) else {
92            continue;
93        };
94        match obj.kind {
95            ObjectKind::Commit => {
96                if let Ok(commit) = repo.read_commit(&hash) {
97                    stack.push(commit.tree);
98                    stack.extend(commit.parents);
99                }
100            }
101            ObjectKind::Tree => {
102                if let Ok(tree) = repo.read_tree(&hash) {
103                    stack.extend(tree.entries.into_iter().map(|e| e.hash));
104                }
105            }
106            // Blobs are leaves; tags are opaque here.
107            ObjectKind::Blob | ObjectKind::Tag => {}
108        }
109    }
110    reached
111}
112
113/// Audit `repo` for objects reachable from no ref (`all_objects − reachable`).
114///
115/// # Errors
116/// Propagates a [`git_core`] error from object enumeration.
117pub fn audit_unreachable(repo: &GitRepo) -> Result<Vec<UnreachableObject>> {
118    let reached = reachable_set(repo);
119    let mut out = Vec::new();
120    for hash in repo.all_objects()? {
121        if reached.contains(&hash) {
122            continue;
123        }
124        // Determine the kind for grading; an unreadable object is reported as
125        // "unknown" rather than dropped, so its presence is still surfaced.
126        let kind = repo
127            .read_object(&hash)
128            .map_or("unknown", |o| kind_str(o.kind))
129            .to_string();
130        out.push(UnreachableObject { object: hash, kind });
131    }
132    Ok(out)
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138
139    #[test]
140    fn note_and_grading_depend_on_kind() {
141        let commit = UnreachableObject {
142            object: GitHash::from_hex("0123456789abcdef0123456789abcdef01234567").unwrap(),
143            kind: "commit".into(),
144        };
145        let blob = UnreachableObject {
146            object: GitHash::from_hex("89abcdef0123456789abcdef0123456789abcdef").unwrap(),
147            kind: "blob".into(),
148        };
149        assert_eq!(commit.severity(), Some(Severity::Medium));
150        assert_eq!(blob.severity(), Some(Severity::Low));
151        assert_eq!(commit.code(), "GIT-UNREACHABLE-OBJECT");
152        assert_eq!(commit.category(), Category::Residue);
153        assert!(commit.note().contains("commit"));
154    }
155}