gix_commitgraph/file/
verify.rs1use std::{
3 cmp::{max, min},
4 collections::HashMap,
5 path::Path,
6};
7
8use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX};
9
10#[derive(thiserror::Error, Debug)]
12#[allow(missing_docs)]
13pub enum Error<E: std::error::Error + 'static> {
14 #[error(transparent)]
15 Commit(#[from] file::commit::Error),
16 #[error("commit at file position {pos} has invalid ID {id}")]
17 CommitId {
18 id: gix_hash::ObjectId,
19 pos: file::Position,
20 },
21 #[error("commit at file position {pos} with ID {id} is out of order relative to its predecessor with ID {predecessor_id}")]
22 CommitsOutOfOrder {
23 id: gix_hash::ObjectId,
24 pos: file::Position,
25 predecessor_id: gix_hash::ObjectId,
26 },
27 #[error("commit-graph filename should be {0}")]
28 Filename(String),
29 #[error("commit {id} has invalid generation {generation}")]
30 Generation { generation: u32, id: gix_hash::ObjectId },
31 #[error("checksum mismatch: expected {expected}, got {actual}")]
32 Mismatch {
33 actual: gix_hash::ObjectId,
34 expected: gix_hash::ObjectId,
35 },
36 #[error("{0}")]
37 Processor(#[source] E),
38 #[error("commit {id} has invalid root tree ID {root_tree_id}")]
39 RootTreeId {
40 id: gix_hash::ObjectId,
41 root_tree_id: gix_hash::ObjectId,
42 },
43}
44
45#[derive(Clone, Debug, Eq, PartialEq)]
47#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
48pub struct Outcome {
49 pub max_generation: u32,
51 pub min_generation: u32,
53 pub max_parents: u32,
55 pub num_commits: u32,
57 pub parent_counts: HashMap<u32, u32>,
59}
60
61impl File {
63 pub fn checksum(&self) -> &gix_hash::oid {
65 gix_hash::oid::from_bytes_unchecked(&self.data[self.data.len() - self.hash_len..])
66 }
67
68 pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>>
72 where
73 E: std::error::Error + 'static,
74 Processor: FnMut(&file::Commit<'a>) -> Result<(), E>,
75 {
76 self.verify_checksum()
77 .map_err(|(actual, expected)| Error::Mismatch { actual, expected })?;
78 verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?;
79
80 let null_id = self.object_hash().null_ref();
81
82 let mut stats = Outcome {
83 max_generation: 0,
84 max_parents: 0,
85 min_generation: GENERATION_NUMBER_INFINITY,
86 num_commits: self.num_commits(),
87 parent_counts: HashMap::new(),
88 };
89
90 let mut prev_id: &gix_hash::oid = null_id;
92 for commit in self.iter_commits() {
93 if commit.id() <= prev_id {
94 if commit.id() == null_id {
95 return Err(Error::CommitId {
96 pos: commit.position(),
97 id: commit.id().into(),
98 });
99 }
100 return Err(Error::CommitsOutOfOrder {
101 pos: commit.position(),
102 id: commit.id().into(),
103 predecessor_id: prev_id.into(),
104 });
105 }
106 if commit.root_tree_id() == null_id {
107 return Err(Error::RootTreeId {
108 id: commit.id().into(),
109 root_tree_id: commit.root_tree_id().into(),
110 });
111 }
112 if commit.generation() > GENERATION_NUMBER_MAX {
113 return Err(Error::Generation {
114 generation: commit.generation(),
115 id: commit.id().into(),
116 });
117 }
118
119 processor(&commit).map_err(Error::Processor)?;
120
121 stats.max_generation = max(stats.max_generation, commit.generation());
122 stats.min_generation = min(stats.min_generation, commit.generation());
123 let parent_count = commit
124 .iter_parents()
125 .try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))
126 .map_err(Error::Commit)?;
127 *stats.parent_counts.entry(parent_count).or_insert(0) += 1;
128 prev_id = commit.id();
129 }
130
131 if stats.min_generation == GENERATION_NUMBER_INFINITY {
132 stats.min_generation = 0;
133 }
134
135 Ok(stats)
136 }
137
138 pub fn verify_checksum(&self) -> Result<gix_hash::ObjectId, (gix_hash::ObjectId, gix_hash::ObjectId)> {
143 let data_len_without_trailer = self.data.len() - self.hash_len;
148 let mut hasher = gix_features::hash::hasher(self.object_hash());
149 hasher.update(&self.data[..data_len_without_trailer]);
150 let actual = gix_hash::ObjectId::from_bytes_or_panic(hasher.digest().as_ref());
151
152 let expected = self.checksum();
153 if actual == expected {
154 Ok(actual)
155 } else {
156 Err((actual, expected.into()))
157 }
158 }
159}
160
161fn verify_split_chain_filename_hash(path: &Path, expected: &gix_hash::oid) -> Result<(), String> {
164 path.file_name()
165 .and_then(std::ffi::OsStr::to_str)
166 .and_then(|filename| filename.strip_suffix(".graph"))
167 .and_then(|stem| stem.strip_prefix("graph-"))
168 .map_or(Ok(()), |hex| match gix_hash::ObjectId::from_hex(hex.as_bytes()) {
169 Ok(actual) if actual == expected => Ok(()),
170 _ => Err(format!("graph-{}.graph", expected.to_hex())),
171 })
172}