Struct git_commitgraph::file::File
source · pub struct File { /* private fields */ }
Expand description
A single commit-graph file.
All operations on a File
are local to that graph file. Since a commit graph can span multiple
files, all interesting graph operations belong on Graph
.
Implementations§
source§impl File
impl File
Access
sourcepub fn base_graph_count(&self) -> u8
pub fn base_graph_count(&self) -> u8
The number of base graphs that this file depends on.
Examples found in repository?
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn verify_integrity<E>(
&self,
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
{
if self.files.len() > 256 {
// A file in a split chain can only have up to 255 base files.
return Err(Error::TooManyFiles(self.files.len()));
}
let mut stats = Outcome {
longest_path_length: None,
num_commits: 0,
parent_counts: BTreeMap::new(),
};
let mut max_generation = 0u32;
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
// a separate loop, e.g. self.iter_sorted_ids().
let mut file_start_pos = graph::Position(0);
for (file_index, file) in self.files.iter().enumerate() {
if usize::from(file.base_graph_count()) != file_index {
return Err(Error::BaseGraphCount {
actual: file.base_graph_count(),
expected: file_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
for (base_graph_index, (expected, actual)) in self.files[..file_index]
.iter()
.map(|base_file| base_file.checksum())
.zip(file.iter_base_graph_ids())
.enumerate()
{
if actual != expected {
return Err(Error::BaseGraphId {
actual: actual.into(),
expected: expected.into(),
index: base_graph_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
}
let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits());
let file_stats = file
.traverse(|commit| {
let mut max_parent_generation = 0u32;
for parent_pos in commit.iter_parents() {
let parent_pos = parent_pos.map_err(Error::Commit)?;
if parent_pos >= next_file_start_pos {
return Err(Error::ParentOutOfRange {
parent_pos,
id: commit.id().into(),
max_valid_pos: graph::Position(next_file_start_pos.0 - 1),
});
}
let parent = self.commit_at(parent_pos);
max_parent_generation = max(max_parent_generation, parent.generation());
}
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
// generation should be GENERATION_NUMBER_MAX too.
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
if commit.generation() != expected_generation {
return Err(Error::Generation {
actual: commit.generation(),
expected: expected_generation,
id: commit.id().into(),
});
}
processor(commit).map_err(Error::Processor)?;
Ok(())
})
.map_err(|err| Error::File {
err: match err {
file::verify::Error::Processor(e) => return e,
file::verify::Error::RootTreeId { id, root_tree_id } => {
file::verify::Error::RootTreeId { id, root_tree_id }
}
file::verify::Error::Mismatch { actual, expected } => {
file::verify::Error::Mismatch { actual, expected }
}
file::verify::Error::Generation { generation, id } => {
file::verify::Error::Generation { generation, id }
}
file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
} => file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
},
},
path: file.path().to_owned(),
})?;
max_generation = max(max_generation, file_stats.max_generation);
stats.num_commits += file_stats.num_commits;
for (key, value) in file_stats.parent_counts.into_iter() {
*stats.parent_counts.entry(key).or_insert(0) += value;
}
file_start_pos = next_file_start_pos;
}
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
Some(max_generation.saturating_sub(1))
} else {
None
};
Ok(stats)
}
sourcepub fn commit_at(&self, pos: Position) -> Commit<'_>
pub fn commit_at(&self, pos: Position) -> Commit<'_>
Returns the commit data for the commit located at the given lexigraphical position.
pos
must range from 0 to self.num_commits().
Panics
Panics if pos
is out of bounds.
Examples found in repository?
More examples
sourcepub fn object_hash(&self) -> Kind
pub fn object_hash(&self) -> Kind
The kind of hash used in this File.
Note that it is always conforming to the hash used in the owning repository.
Examples found in repository?
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
pub fn new(files: Vec<File>) -> Result<Self, Error> {
let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum();
if num_commits > u64::from(MAX_COMMITS) {
return Err(Error::TooManyCommits(num_commits));
}
for window in files.windows(2) {
let f1 = &window[0];
let f2 = &window[1];
if f1.object_hash() != f2.object_hash() {
return Err(Error::HashVersionMismatch {
path1: f1.path().to_owned(),
hash1: f1.object_hash(),
path2: f2.path().to_owned(),
hash2: f2.object_hash(),
});
}
}
Ok(Self { files })
}
More examples
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
Processor: FnMut(&file::Commit<'a>) -> Result<(), E>,
{
self.verify_checksum()
.map_err(|(actual, expected)| Error::Mismatch { actual, expected })?;
verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?;
let null_id = self.object_hash().null_ref();
let mut stats = Outcome {
max_generation: 0,
max_parents: 0,
min_generation: GENERATION_NUMBER_INFINITY,
num_commits: self.num_commits(),
parent_counts: HashMap::new(),
};
// TODO: Verify self.fan values as we go.
let mut prev_id: &git_hash::oid = null_id;
for commit in self.iter_commits() {
if commit.id() <= prev_id {
if commit.id() == null_id {
return Err(Error::CommitId {
pos: commit.position(),
id: commit.id().into(),
});
}
return Err(Error::CommitsOutOfOrder {
pos: commit.position(),
id: commit.id().into(),
predecessor_id: prev_id.into(),
});
}
if commit.root_tree_id() == null_id {
return Err(Error::RootTreeId {
id: commit.id().into(),
root_tree_id: commit.root_tree_id().into(),
});
}
if commit.generation() > GENERATION_NUMBER_MAX {
return Err(Error::Generation {
generation: commit.generation(),
id: commit.id().into(),
});
}
processor(&commit).map_err(Error::Processor)?;
stats.max_generation = max(stats.max_generation, commit.generation());
stats.min_generation = min(stats.min_generation, commit.generation());
let parent_count = commit
.iter_parents()
.try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))
.map_err(Error::Commit)?;
*stats.parent_counts.entry(parent_count).or_insert(0) += 1;
prev_id = commit.id();
}
if stats.min_generation == GENERATION_NUMBER_INFINITY {
stats.min_generation = 0;
}
Ok(stats)
}
/// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing
/// checksum itself.
///
/// Return the actual checksum on success or `(actual checksum, expected checksum)` if there is a mismatch.
pub fn verify_checksum(&self) -> Result<git_hash::ObjectId, (git_hash::ObjectId, git_hash::ObjectId)> {
// Even though we could use git_features::hash::bytes_of_file(…), this would require using our own
// Error type to support io::Error and Mismatch. As we only gain progress, there probably isn't much value
// as these files are usually small enough to process them in less than a second, even for the large ones.
// But it's possible, once a progress instance is passed.
let data_len_without_trailer = self.data.len() - self.hash_len;
let mut hasher = git_features::hash::hasher(self.object_hash());
hasher.update(&self.data[..data_len_without_trailer]);
let actual = git_hash::ObjectId::from(hasher.digest().as_ref());
let expected = self.checksum();
if actual == expected {
Ok(actual)
} else {
Err((actual, expected.into()))
}
}
sourcepub fn id_at(&self, pos: Position) -> &oid
pub fn id_at(&self, pos: Position) -> &oid
Returns an object id at the given index in our list of (sorted) hashes. The position ranges from 0 to self.num_commits()
Examples found in repository?
More examples
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
pub fn iter_ids(&self) -> impl Iterator<Item = &git_hash::oid> {
(0..self.num_commits()).map(move |i| self.id_at(file::Position(i)))
}
/// Translate the given object hash to its position within this file, if present.
// copied from git-odb/src/pack/index/ext
pub fn lookup(&self, id: impl AsRef<git_hash::oid>) -> Option<file::Position> {
let id = id.as_ref();
let first_byte = usize::from(id.first_byte());
let mut upper_bound = self.fan[first_byte];
let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 };
while lower_bound < upper_bound {
let mid = (lower_bound + upper_bound) / 2;
let mid_sha = self.id_at(file::Position(mid));
use std::cmp::Ordering::*;
match id.cmp(mid_sha) {
Less => upper_bound = mid,
Equal => return Some(file::Position(mid)),
Greater => lower_bound = mid + 1,
}
}
None
}
sourcepub fn iter_base_graph_ids(&self) -> impl Iterator<Item = &oid>
pub fn iter_base_graph_ids(&self) -> impl Iterator<Item = &oid>
Return an iterator over all object hashes stored in the base graph.
Examples found in repository?
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn verify_integrity<E>(
&self,
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
{
if self.files.len() > 256 {
// A file in a split chain can only have up to 255 base files.
return Err(Error::TooManyFiles(self.files.len()));
}
let mut stats = Outcome {
longest_path_length: None,
num_commits: 0,
parent_counts: BTreeMap::new(),
};
let mut max_generation = 0u32;
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
// a separate loop, e.g. self.iter_sorted_ids().
let mut file_start_pos = graph::Position(0);
for (file_index, file) in self.files.iter().enumerate() {
if usize::from(file.base_graph_count()) != file_index {
return Err(Error::BaseGraphCount {
actual: file.base_graph_count(),
expected: file_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
for (base_graph_index, (expected, actual)) in self.files[..file_index]
.iter()
.map(|base_file| base_file.checksum())
.zip(file.iter_base_graph_ids())
.enumerate()
{
if actual != expected {
return Err(Error::BaseGraphId {
actual: actual.into(),
expected: expected.into(),
index: base_graph_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
}
let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits());
let file_stats = file
.traverse(|commit| {
let mut max_parent_generation = 0u32;
for parent_pos in commit.iter_parents() {
let parent_pos = parent_pos.map_err(Error::Commit)?;
if parent_pos >= next_file_start_pos {
return Err(Error::ParentOutOfRange {
parent_pos,
id: commit.id().into(),
max_valid_pos: graph::Position(next_file_start_pos.0 - 1),
});
}
let parent = self.commit_at(parent_pos);
max_parent_generation = max(max_parent_generation, parent.generation());
}
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
// generation should be GENERATION_NUMBER_MAX too.
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
if commit.generation() != expected_generation {
return Err(Error::Generation {
actual: commit.generation(),
expected: expected_generation,
id: commit.id().into(),
});
}
processor(commit).map_err(Error::Processor)?;
Ok(())
})
.map_err(|err| Error::File {
err: match err {
file::verify::Error::Processor(e) => return e,
file::verify::Error::RootTreeId { id, root_tree_id } => {
file::verify::Error::RootTreeId { id, root_tree_id }
}
file::verify::Error::Mismatch { actual, expected } => {
file::verify::Error::Mismatch { actual, expected }
}
file::verify::Error::Generation { generation, id } => {
file::verify::Error::Generation { generation, id }
}
file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
} => file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
},
},
path: file.path().to_owned(),
})?;
max_generation = max(max_generation, file_stats.max_generation);
stats.num_commits += file_stats.num_commits;
for (key, value) in file_stats.parent_counts.into_iter() {
*stats.parent_counts.entry(key).or_insert(0) += value;
}
file_start_pos = next_file_start_pos;
}
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
Some(max_generation.saturating_sub(1))
} else {
None
};
Ok(stats)
}
sourcepub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>>
pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>>
return an iterator over all commits in this file.
Examples found in repository?
More examples
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
Processor: FnMut(&file::Commit<'a>) -> Result<(), E>,
{
self.verify_checksum()
.map_err(|(actual, expected)| Error::Mismatch { actual, expected })?;
verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?;
let null_id = self.object_hash().null_ref();
let mut stats = Outcome {
max_generation: 0,
max_parents: 0,
min_generation: GENERATION_NUMBER_INFINITY,
num_commits: self.num_commits(),
parent_counts: HashMap::new(),
};
// TODO: Verify self.fan values as we go.
let mut prev_id: &git_hash::oid = null_id;
for commit in self.iter_commits() {
if commit.id() <= prev_id {
if commit.id() == null_id {
return Err(Error::CommitId {
pos: commit.position(),
id: commit.id().into(),
});
}
return Err(Error::CommitsOutOfOrder {
pos: commit.position(),
id: commit.id().into(),
predecessor_id: prev_id.into(),
});
}
if commit.root_tree_id() == null_id {
return Err(Error::RootTreeId {
id: commit.id().into(),
root_tree_id: commit.root_tree_id().into(),
});
}
if commit.generation() > GENERATION_NUMBER_MAX {
return Err(Error::Generation {
generation: commit.generation(),
id: commit.id().into(),
});
}
processor(&commit).map_err(Error::Processor)?;
stats.max_generation = max(stats.max_generation, commit.generation());
stats.min_generation = min(stats.min_generation, commit.generation());
let parent_count = commit
.iter_parents()
.try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))
.map_err(Error::Commit)?;
*stats.parent_counts.entry(parent_count).or_insert(0) += 1;
prev_id = commit.id();
}
if stats.min_generation == GENERATION_NUMBER_INFINITY {
stats.min_generation = 0;
}
Ok(stats)
}
sourcepub fn iter_ids(&self) -> impl Iterator<Item = &oid>
pub fn iter_ids(&self) -> impl Iterator<Item = &oid>
Return an iterator over all object hashes stored in this file.
sourcepub fn lookup(&self, id: impl AsRef<oid>) -> Option<Position>
pub fn lookup(&self, id: impl AsRef<oid>) -> Option<Position>
Translate the given object hash to its position within this file, if present.
Examples found in repository?
55 56 57 58 59 60 61 62 63 64 65 66 67 68
fn lookup_by_id(&self, id: &git_hash::oid) -> Option<LookupByIdResult<'_>> {
let mut current_file_start = 0;
for file in &self.files {
if let Some(lex_pos) = file.lookup(id) {
return Some(LookupByIdResult {
file,
file_pos: lex_pos,
graph_pos: graph::Position(current_file_start + lex_pos.0),
});
}
current_file_start += file.num_commits();
}
None
}
sourcepub fn num_commits(&self) -> u32
pub fn num_commits(&self) -> u32
Returns the number of commits in this graph file.
The maximum valid file::Position
that can be used with this file is one less than
num_commits()
.
Examples found in repository?
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
pub fn num_commits(&self) -> u32 {
self.files.iter().map(|f| f.num_commits()).sum()
}
}
/// Access fundamentals
impl Graph {
fn lookup_by_id(&self, id: &git_hash::oid) -> Option<LookupByIdResult<'_>> {
let mut current_file_start = 0;
for file in &self.files {
if let Some(lex_pos) = file.lookup(id) {
return Some(LookupByIdResult {
file,
file_pos: lex_pos,
graph_pos: graph::Position(current_file_start + lex_pos.0),
});
}
current_file_start += file.num_commits();
}
None
}
fn lookup_by_pos(&self, pos: graph::Position) -> LookupByPositionResult<'_> {
let mut remaining = pos.0;
for (file_index, file) in self.files.iter().enumerate() {
match remaining.checked_sub(file.num_commits()) {
Some(v) => remaining = v,
None => {
return LookupByPositionResult {
file,
_file_index: file_index,
pos: file::Position(remaining),
}
}
}
}
panic!("graph position too large: {}", pos.0);
}
More examples
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
pub fn id_at(&self, pos: file::Position) -> &git_hash::oid {
assert!(
pos.0 < self.num_commits(),
"expected lexigraphical position less than {}, got {}",
self.num_commits(),
pos.0
);
let pos: usize = pos
.0
.try_into()
.expect("an architecture able to hold 32 bits of integer");
let start = self.oid_lookup_offset + (pos * self.hash_len);
git_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len])
}
/// Return an iterator over all object hashes stored in the base graph.
pub fn iter_base_graph_ids(&self) -> impl Iterator<Item = &git_hash::oid> {
let start = self.base_graphs_list_offset.unwrap_or(0);
let base_graphs_list = &self.data[start..][..self.hash_len * usize::from(self.base_graph_count)];
base_graphs_list
.chunks(self.hash_len)
.map(git_hash::oid::from_bytes_unchecked)
}
/// return an iterator over all commits in this file.
pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
(0..self.num_commits()).map(move |i| self.commit_at(file::Position(i)))
}
/// Return an iterator over all object hashes stored in this file.
pub fn iter_ids(&self) -> impl Iterator<Item = &git_hash::oid> {
(0..self.num_commits()).map(move |i| self.id_at(file::Position(i)))
}
/// Translate the given object hash to its position within this file, if present.
// copied from git-odb/src/pack/index/ext
pub fn lookup(&self, id: impl AsRef<git_hash::oid>) -> Option<file::Position> {
let id = id.as_ref();
let first_byte = usize::from(id.first_byte());
let mut upper_bound = self.fan[first_byte];
let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 };
while lower_bound < upper_bound {
let mid = (lower_bound + upper_bound) / 2;
let mid_sha = self.id_at(file::Position(mid));
use std::cmp::Ordering::*;
match id.cmp(mid_sha) {
Less => upper_bound = mid,
Equal => return Some(file::Position(mid)),
Greater => lower_bound = mid + 1,
}
}
None
}
/// Returns the number of commits in this graph file.
///
/// The maximum valid `file::Position` that can be used with this file is one less than
/// `num_commits()`.
pub fn num_commits(&self) -> u32 {
self.fan[255]
}
/// Returns the path to this file.
pub fn path(&self) -> &Path {
&self.path
}
}
impl File {
/// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk.
pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] {
assert!(
pos.0 < self.num_commits(),
"expected lexigraphical position less than {}, got {}",
self.num_commits(),
pos.0
);
let pos: usize = pos
.0
.try_into()
.expect("an architecture able to hold 32 bits of integer");
let entry_size = self.hash_len + COMMIT_DATA_ENTRY_SIZE_SANS_HASH;
let start = self.commit_data_offset + (pos * entry_size);
&self.data[start..][..entry_size]
}
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
pub fn new(files: Vec<File>) -> Result<Self, Error> {
let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum();
if num_commits > u64::from(MAX_COMMITS) {
return Err(Error::TooManyCommits(num_commits));
}
for window in files.windows(2) {
let f1 = &window[0];
let f2 = &window[1];
if f1.object_hash() != f2.object_hash() {
return Err(Error::HashVersionMismatch {
path1: f1.path().to_owned(),
hash1: f1.object_hash(),
path2: f2.path().to_owned(),
hash2: f2.object_hash(),
});
}
}
Ok(Self { files })
}
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
Processor: FnMut(&file::Commit<'a>) -> Result<(), E>,
{
self.verify_checksum()
.map_err(|(actual, expected)| Error::Mismatch { actual, expected })?;
verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?;
let null_id = self.object_hash().null_ref();
let mut stats = Outcome {
max_generation: 0,
max_parents: 0,
min_generation: GENERATION_NUMBER_INFINITY,
num_commits: self.num_commits(),
parent_counts: HashMap::new(),
};
// TODO: Verify self.fan values as we go.
let mut prev_id: &git_hash::oid = null_id;
for commit in self.iter_commits() {
if commit.id() <= prev_id {
if commit.id() == null_id {
return Err(Error::CommitId {
pos: commit.position(),
id: commit.id().into(),
});
}
return Err(Error::CommitsOutOfOrder {
pos: commit.position(),
id: commit.id().into(),
predecessor_id: prev_id.into(),
});
}
if commit.root_tree_id() == null_id {
return Err(Error::RootTreeId {
id: commit.id().into(),
root_tree_id: commit.root_tree_id().into(),
});
}
if commit.generation() > GENERATION_NUMBER_MAX {
return Err(Error::Generation {
generation: commit.generation(),
id: commit.id().into(),
});
}
processor(&commit).map_err(Error::Processor)?;
stats.max_generation = max(stats.max_generation, commit.generation());
stats.min_generation = min(stats.min_generation, commit.generation());
let parent_count = commit
.iter_parents()
.try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))
.map_err(Error::Commit)?;
*stats.parent_counts.entry(parent_count).or_insert(0) += 1;
prev_id = commit.id();
}
if stats.min_generation == GENERATION_NUMBER_INFINITY {
stats.min_generation = 0;
}
Ok(stats)
}
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn verify_integrity<E>(
&self,
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
{
if self.files.len() > 256 {
// A file in a split chain can only have up to 255 base files.
return Err(Error::TooManyFiles(self.files.len()));
}
let mut stats = Outcome {
longest_path_length: None,
num_commits: 0,
parent_counts: BTreeMap::new(),
};
let mut max_generation = 0u32;
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
// a separate loop, e.g. self.iter_sorted_ids().
let mut file_start_pos = graph::Position(0);
for (file_index, file) in self.files.iter().enumerate() {
if usize::from(file.base_graph_count()) != file_index {
return Err(Error::BaseGraphCount {
actual: file.base_graph_count(),
expected: file_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
for (base_graph_index, (expected, actual)) in self.files[..file_index]
.iter()
.map(|base_file| base_file.checksum())
.zip(file.iter_base_graph_ids())
.enumerate()
{
if actual != expected {
return Err(Error::BaseGraphId {
actual: actual.into(),
expected: expected.into(),
index: base_graph_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
}
let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits());
let file_stats = file
.traverse(|commit| {
let mut max_parent_generation = 0u32;
for parent_pos in commit.iter_parents() {
let parent_pos = parent_pos.map_err(Error::Commit)?;
if parent_pos >= next_file_start_pos {
return Err(Error::ParentOutOfRange {
parent_pos,
id: commit.id().into(),
max_valid_pos: graph::Position(next_file_start_pos.0 - 1),
});
}
let parent = self.commit_at(parent_pos);
max_parent_generation = max(max_parent_generation, parent.generation());
}
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
// generation should be GENERATION_NUMBER_MAX too.
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
if commit.generation() != expected_generation {
return Err(Error::Generation {
actual: commit.generation(),
expected: expected_generation,
id: commit.id().into(),
});
}
processor(commit).map_err(Error::Processor)?;
Ok(())
})
.map_err(|err| Error::File {
err: match err {
file::verify::Error::Processor(e) => return e,
file::verify::Error::RootTreeId { id, root_tree_id } => {
file::verify::Error::RootTreeId { id, root_tree_id }
}
file::verify::Error::Mismatch { actual, expected } => {
file::verify::Error::Mismatch { actual, expected }
}
file::verify::Error::Generation { generation, id } => {
file::verify::Error::Generation { generation, id }
}
file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
} => file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
},
},
path: file.path().to_owned(),
})?;
max_generation = max(max_generation, file_stats.max_generation);
stats.num_commits += file_stats.num_commits;
for (key, value) in file_stats.parent_counts.into_iter() {
*stats.parent_counts.entry(key).or_insert(0) += value;
}
file_start_pos = next_file_start_pos;
}
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
Some(max_generation.saturating_sub(1))
} else {
None
};
Ok(stats)
}
sourcepub fn path(&self) -> &Path
pub fn path(&self) -> &Path
Returns the path to this file.
Examples found in repository?
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
pub fn new(files: Vec<File>) -> Result<Self, Error> {
let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum();
if num_commits > u64::from(MAX_COMMITS) {
return Err(Error::TooManyCommits(num_commits));
}
for window in files.windows(2) {
let f1 = &window[0];
let f2 = &window[1];
if f1.object_hash() != f2.object_hash() {
return Err(Error::HashVersionMismatch {
path1: f1.path().to_owned(),
hash1: f1.object_hash(),
path2: f2.path().to_owned(),
hash2: f2.object_hash(),
});
}
}
Ok(Self { files })
}
More examples
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn verify_integrity<E>(
&self,
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
{
if self.files.len() > 256 {
// A file in a split chain can only have up to 255 base files.
return Err(Error::TooManyFiles(self.files.len()));
}
let mut stats = Outcome {
longest_path_length: None,
num_commits: 0,
parent_counts: BTreeMap::new(),
};
let mut max_generation = 0u32;
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
// a separate loop, e.g. self.iter_sorted_ids().
let mut file_start_pos = graph::Position(0);
for (file_index, file) in self.files.iter().enumerate() {
if usize::from(file.base_graph_count()) != file_index {
return Err(Error::BaseGraphCount {
actual: file.base_graph_count(),
expected: file_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
for (base_graph_index, (expected, actual)) in self.files[..file_index]
.iter()
.map(|base_file| base_file.checksum())
.zip(file.iter_base_graph_ids())
.enumerate()
{
if actual != expected {
return Err(Error::BaseGraphId {
actual: actual.into(),
expected: expected.into(),
index: base_graph_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
}
let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits());
let file_stats = file
.traverse(|commit| {
let mut max_parent_generation = 0u32;
for parent_pos in commit.iter_parents() {
let parent_pos = parent_pos.map_err(Error::Commit)?;
if parent_pos >= next_file_start_pos {
return Err(Error::ParentOutOfRange {
parent_pos,
id: commit.id().into(),
max_valid_pos: graph::Position(next_file_start_pos.0 - 1),
});
}
let parent = self.commit_at(parent_pos);
max_parent_generation = max(max_parent_generation, parent.generation());
}
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
// generation should be GENERATION_NUMBER_MAX too.
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
if commit.generation() != expected_generation {
return Err(Error::Generation {
actual: commit.generation(),
expected: expected_generation,
id: commit.id().into(),
});
}
processor(commit).map_err(Error::Processor)?;
Ok(())
})
.map_err(|err| Error::File {
err: match err {
file::verify::Error::Processor(e) => return e,
file::verify::Error::RootTreeId { id, root_tree_id } => {
file::verify::Error::RootTreeId { id, root_tree_id }
}
file::verify::Error::Mismatch { actual, expected } => {
file::verify::Error::Mismatch { actual, expected }
}
file::verify::Error::Generation { generation, id } => {
file::verify::Error::Generation { generation, id }
}
file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
} => file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
},
},
path: file.path().to_owned(),
})?;
max_generation = max(max_generation, file_stats.max_generation);
stats.num_commits += file_stats.num_commits;
for (key, value) in file_stats.parent_counts.into_iter() {
*stats.parent_counts.entry(key).or_insert(0) += value;
}
file_start_pos = next_file_start_pos;
}
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
Some(max_generation.saturating_sub(1))
} else {
None
};
Ok(stats)
}
source§impl File
impl File
sourcepub fn at(path: impl AsRef<Path>) -> Result<File, Error>
pub fn at(path: impl AsRef<Path>) -> Result<File, Error>
Try to parse the commit graph file at path
.
Examples found in repository?
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
pub fn from_commit_graphs_dir(path: impl AsRef<Path>) -> Result<Self, Error> {
let commit_graphs_dir = path.as_ref();
let chain_file_path = commit_graphs_dir.join("commit-graph-chain");
let chain_file = std::fs::File::open(&chain_file_path).map_err(|e| Error::Io {
err: e,
path: chain_file_path.clone(),
})?;
let mut files = Vec::new();
for line in BufReader::new(chain_file).lines() {
let hash = line.map_err(|e| Error::Io {
err: e,
path: chain_file_path.clone(),
})?;
let graph_file_path = commit_graphs_dir.join(format!("graph-{}.graph", hash));
files.push(File::at(&graph_file_path).map_err(|e| Error::File {
err: e,
path: graph_file_path.clone(),
})?);
}
Self::new(files)
}
/// Instantiate a commit graph from a `.git/objects/info/commit-graph` or
/// `.git/objects/info/commit-graphs/graph-*.graph` file.
pub fn from_file(path: impl AsRef<Path>) -> Result<Self, Error> {
let path = path.as_ref();
let file = File::at(path).map_err(|e| Error::File {
err: e,
path: path.to_owned(),
})?;
Self::new(vec![file])
}
source§impl File
impl File
Verification
sourcepub fn checksum(&self) -> &oid
pub fn checksum(&self) -> &oid
Returns the trailing checksum over the entire content of this file.
Examples found in repository?
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
Processor: FnMut(&file::Commit<'a>) -> Result<(), E>,
{
self.verify_checksum()
.map_err(|(actual, expected)| Error::Mismatch { actual, expected })?;
verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?;
let null_id = self.object_hash().null_ref();
let mut stats = Outcome {
max_generation: 0,
max_parents: 0,
min_generation: GENERATION_NUMBER_INFINITY,
num_commits: self.num_commits(),
parent_counts: HashMap::new(),
};
// TODO: Verify self.fan values as we go.
let mut prev_id: &git_hash::oid = null_id;
for commit in self.iter_commits() {
if commit.id() <= prev_id {
if commit.id() == null_id {
return Err(Error::CommitId {
pos: commit.position(),
id: commit.id().into(),
});
}
return Err(Error::CommitsOutOfOrder {
pos: commit.position(),
id: commit.id().into(),
predecessor_id: prev_id.into(),
});
}
if commit.root_tree_id() == null_id {
return Err(Error::RootTreeId {
id: commit.id().into(),
root_tree_id: commit.root_tree_id().into(),
});
}
if commit.generation() > GENERATION_NUMBER_MAX {
return Err(Error::Generation {
generation: commit.generation(),
id: commit.id().into(),
});
}
processor(&commit).map_err(Error::Processor)?;
stats.max_generation = max(stats.max_generation, commit.generation());
stats.min_generation = min(stats.min_generation, commit.generation());
let parent_count = commit
.iter_parents()
.try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))
.map_err(Error::Commit)?;
*stats.parent_counts.entry(parent_count).or_insert(0) += 1;
prev_id = commit.id();
}
if stats.min_generation == GENERATION_NUMBER_INFINITY {
stats.min_generation = 0;
}
Ok(stats)
}
/// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing
/// checksum itself.
///
/// Return the actual checksum on success or `(actual checksum, expected checksum)` if there is a mismatch.
pub fn verify_checksum(&self) -> Result<git_hash::ObjectId, (git_hash::ObjectId, git_hash::ObjectId)> {
// Even though we could use git_features::hash::bytes_of_file(…), this would require using our own
// Error type to support io::Error and Mismatch. As we only gain progress, there probably isn't much value
// as these files are usually small enough to process them in less than a second, even for the large ones.
// But it's possible, once a progress instance is passed.
let data_len_without_trailer = self.data.len() - self.hash_len;
let mut hasher = git_features::hash::hasher(self.object_hash());
hasher.update(&self.data[..data_len_without_trailer]);
let actual = git_hash::ObjectId::from(hasher.digest().as_ref());
let expected = self.checksum();
if actual == expected {
Ok(actual)
} else {
Err((actual, expected.into()))
}
}
More examples
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn verify_integrity<E>(
&self,
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
{
if self.files.len() > 256 {
// A file in a split chain can only have up to 255 base files.
return Err(Error::TooManyFiles(self.files.len()));
}
let mut stats = Outcome {
longest_path_length: None,
num_commits: 0,
parent_counts: BTreeMap::new(),
};
let mut max_generation = 0u32;
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
// a separate loop, e.g. self.iter_sorted_ids().
let mut file_start_pos = graph::Position(0);
for (file_index, file) in self.files.iter().enumerate() {
if usize::from(file.base_graph_count()) != file_index {
return Err(Error::BaseGraphCount {
actual: file.base_graph_count(),
expected: file_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
for (base_graph_index, (expected, actual)) in self.files[..file_index]
.iter()
.map(|base_file| base_file.checksum())
.zip(file.iter_base_graph_ids())
.enumerate()
{
if actual != expected {
return Err(Error::BaseGraphId {
actual: actual.into(),
expected: expected.into(),
index: base_graph_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
}
let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits());
let file_stats = file
.traverse(|commit| {
let mut max_parent_generation = 0u32;
for parent_pos in commit.iter_parents() {
let parent_pos = parent_pos.map_err(Error::Commit)?;
if parent_pos >= next_file_start_pos {
return Err(Error::ParentOutOfRange {
parent_pos,
id: commit.id().into(),
max_valid_pos: graph::Position(next_file_start_pos.0 - 1),
});
}
let parent = self.commit_at(parent_pos);
max_parent_generation = max(max_parent_generation, parent.generation());
}
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
// generation should be GENERATION_NUMBER_MAX too.
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
if commit.generation() != expected_generation {
return Err(Error::Generation {
actual: commit.generation(),
expected: expected_generation,
id: commit.id().into(),
});
}
processor(commit).map_err(Error::Processor)?;
Ok(())
})
.map_err(|err| Error::File {
err: match err {
file::verify::Error::Processor(e) => return e,
file::verify::Error::RootTreeId { id, root_tree_id } => {
file::verify::Error::RootTreeId { id, root_tree_id }
}
file::verify::Error::Mismatch { actual, expected } => {
file::verify::Error::Mismatch { actual, expected }
}
file::verify::Error::Generation { generation, id } => {
file::verify::Error::Generation { generation, id }
}
file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
} => file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
},
},
path: file.path().to_owned(),
})?;
max_generation = max(max_generation, file_stats.max_generation);
stats.num_commits += file_stats.num_commits;
for (key, value) in file_stats.parent_counts.into_iter() {
*stats.parent_counts.entry(key).or_insert(0) += value;
}
file_start_pos = next_file_start_pos;
}
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
Some(max_generation.saturating_sub(1))
} else {
None
};
Ok(stats)
}
sourcepub fn traverse<'a, E, Processor>(
&'a self,
processor: Processor
) -> Result<Outcome, Error<E>>where
E: Error + 'static,
Processor: FnMut(&Commit<'a>) -> Result<(), E>,
pub fn traverse<'a, E, Processor>(
&'a self,
processor: Processor
) -> Result<Outcome, Error<E>>where
E: Error + 'static,
Processor: FnMut(&Commit<'a>) -> Result<(), E>,
Traverse all commits stored in this file and call processor(commit) -> Result<(), Error>
on it.
If the processor
fails, the iteration will be stopped and the entire call results in the respective error.
Examples found in repository?
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
pub fn verify_integrity<E>(
&self,
mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
{
if self.files.len() > 256 {
// A file in a split chain can only have up to 255 base files.
return Err(Error::TooManyFiles(self.files.len()));
}
let mut stats = Outcome {
longest_path_length: None,
num_commits: 0,
parent_counts: BTreeMap::new(),
};
let mut max_generation = 0u32;
// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
// a separate loop, e.g. self.iter_sorted_ids().
let mut file_start_pos = graph::Position(0);
for (file_index, file) in self.files.iter().enumerate() {
if usize::from(file.base_graph_count()) != file_index {
return Err(Error::BaseGraphCount {
actual: file.base_graph_count(),
expected: file_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
for (base_graph_index, (expected, actual)) in self.files[..file_index]
.iter()
.map(|base_file| base_file.checksum())
.zip(file.iter_base_graph_ids())
.enumerate()
{
if actual != expected {
return Err(Error::BaseGraphId {
actual: actual.into(),
expected: expected.into(),
index: base_graph_index
.try_into()
.expect("files.len() check to protect against this"),
path: file.path().to_owned(),
});
}
}
let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits());
let file_stats = file
.traverse(|commit| {
let mut max_parent_generation = 0u32;
for parent_pos in commit.iter_parents() {
let parent_pos = parent_pos.map_err(Error::Commit)?;
if parent_pos >= next_file_start_pos {
return Err(Error::ParentOutOfRange {
parent_pos,
id: commit.id().into(),
max_valid_pos: graph::Position(next_file_start_pos.0 - 1),
});
}
let parent = self.commit_at(parent_pos);
max_parent_generation = max(max_parent_generation, parent.generation());
}
// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
// generation should be GENERATION_NUMBER_MAX too.
let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
if commit.generation() != expected_generation {
return Err(Error::Generation {
actual: commit.generation(),
expected: expected_generation,
id: commit.id().into(),
});
}
processor(commit).map_err(Error::Processor)?;
Ok(())
})
.map_err(|err| Error::File {
err: match err {
file::verify::Error::Processor(e) => return e,
file::verify::Error::RootTreeId { id, root_tree_id } => {
file::verify::Error::RootTreeId { id, root_tree_id }
}
file::verify::Error::Mismatch { actual, expected } => {
file::verify::Error::Mismatch { actual, expected }
}
file::verify::Error::Generation { generation, id } => {
file::verify::Error::Generation { generation, id }
}
file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
} => file::verify::Error::CommitsOutOfOrder {
id,
pos,
predecessor_id,
},
},
path: file.path().to_owned(),
})?;
max_generation = max(max_generation, file_stats.max_generation);
stats.num_commits += file_stats.num_commits;
for (key, value) in file_stats.parent_counts.into_iter() {
*stats.parent_counts.entry(key).or_insert(0) += value;
}
file_start_pos = next_file_start_pos;
}
stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
Some(max_generation.saturating_sub(1))
} else {
None
};
Ok(stats)
}
sourcepub fn verify_checksum(&self) -> Result<ObjectId, (ObjectId, ObjectId)>
pub fn verify_checksum(&self) -> Result<ObjectId, (ObjectId, ObjectId)>
Assure the checksum
matches the actual checksum over all content of this file, excluding the trailing
checksum itself.
Return the actual checksum on success or (actual checksum, expected checksum)
if there is a mismatch.
Examples found in repository?
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Error<E>>
where
E: std::error::Error + 'static,
Processor: FnMut(&file::Commit<'a>) -> Result<(), E>,
{
self.verify_checksum()
.map_err(|(actual, expected)| Error::Mismatch { actual, expected })?;
verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?;
let null_id = self.object_hash().null_ref();
let mut stats = Outcome {
max_generation: 0,
max_parents: 0,
min_generation: GENERATION_NUMBER_INFINITY,
num_commits: self.num_commits(),
parent_counts: HashMap::new(),
};
// TODO: Verify self.fan values as we go.
let mut prev_id: &git_hash::oid = null_id;
for commit in self.iter_commits() {
if commit.id() <= prev_id {
if commit.id() == null_id {
return Err(Error::CommitId {
pos: commit.position(),
id: commit.id().into(),
});
}
return Err(Error::CommitsOutOfOrder {
pos: commit.position(),
id: commit.id().into(),
predecessor_id: prev_id.into(),
});
}
if commit.root_tree_id() == null_id {
return Err(Error::RootTreeId {
id: commit.id().into(),
root_tree_id: commit.root_tree_id().into(),
});
}
if commit.generation() > GENERATION_NUMBER_MAX {
return Err(Error::Generation {
generation: commit.generation(),
id: commit.id().into(),
});
}
processor(&commit).map_err(Error::Processor)?;
stats.max_generation = max(stats.max_generation, commit.generation());
stats.min_generation = min(stats.min_generation, commit.generation());
let parent_count = commit
.iter_parents()
.try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))
.map_err(Error::Commit)?;
*stats.parent_counts.entry(parent_count).or_insert(0) += 1;
prev_id = commit.id();
}
if stats.min_generation == GENERATION_NUMBER_INFINITY {
stats.min_generation = 0;
}
Ok(stats)
}