gix_commitgraph/file/
verify.rs

1//! Auxiliary types used in commit graph file verification methods.
2use std::{
3    cmp::{max, min},
4    collections::HashMap,
5    path::Path,
6};
7
8use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
9
10use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX};
11
12/// The positive result of [`File::traverse()`] providing some statistical information.
13#[derive(Clone, Debug, Eq, PartialEq)]
14#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
15pub struct Outcome {
16    /// The largest encountered [`file::Commit`] generation number.
17    pub max_generation: u32,
18    /// The smallest encountered [`file::Commit`] generation number.
19    pub min_generation: u32,
20    /// The largest number of parents in a single [`file::Commit`].
21    pub max_parents: u32,
22    /// The total number of [`commits`][file::Commit]s seen in the iteration.
23    pub num_commits: u32,
24    /// A mapping of `N -> number of commits with N parents`.
25    pub parent_counts: HashMap<u32, u32>,
26}
27
28/// Verification
29impl File {
30    /// Returns the trailing checksum over the entire content of this file.
31    pub fn checksum(&self) -> &gix_hash::oid {
32        gix_hash::oid::from_bytes_unchecked(&self.data[self.data.len() - self.hash_len..])
33    }
34
35    /// Traverse all [commits][file::Commit] stored in this file and call `processor(commit) -> Result<(), Error>` on it.
36    ///
37    /// If the `processor` fails, the iteration will be stopped and the entire call results in the respective error.
38    pub fn traverse<'a, Processor>(&'a self, mut processor: Processor) -> Result<Outcome, Exn<Message>>
39    where
40        Processor: FnMut(&file::Commit<'a>) -> Result<(), Exn>,
41    {
42        self.verify_checksum()?;
43        verify_split_chain_filename_hash(&self.path, self.checksum())?;
44
45        let null_id = self.object_hash().null_ref();
46
47        let mut stats = Outcome {
48            max_generation: 0,
49            max_parents: 0,
50            min_generation: GENERATION_NUMBER_INFINITY,
51            num_commits: self.num_commits(),
52            parent_counts: HashMap::new(),
53        };
54
55        // TODO: Verify self.fan values as we go.
56        let mut prev_id: &gix_hash::oid = null_id;
57        for commit in self.iter_commits() {
58            if commit.id() <= prev_id {
59                if commit.id() == null_id {
60                    return Err(message!(
61                        "commit at file position {} has invalid ID {}",
62                        commit.position(),
63                        commit.id()
64                    )
65                    .raise());
66                }
67                return Err(message!(
68                    "commit at file position {} with ID {} is out of order relative to its predecessor with ID {prev_id}",
69                    commit.position(),
70                    commit.id()
71                )
72                .raise());
73            }
74            if commit.root_tree_id() == null_id {
75                return Err(message!(
76                    "commit {} has invalid root tree ID {}",
77                    commit.id(),
78                    commit.root_tree_id()
79                )
80                .raise());
81            }
82            if commit.generation() > GENERATION_NUMBER_MAX {
83                return Err(message!("commit {} has invalid generation {}", commit.id(), commit.generation()).raise());
84            }
85
86            processor(&commit).or_raise(|| message!("processor failed on commit {}", commit.id()))?;
87
88            stats.max_generation = max(stats.max_generation, commit.generation());
89            stats.min_generation = min(stats.min_generation, commit.generation());
90            let parent_count = commit.iter_parents().try_fold(0u32, |acc, pos| pos.map(|_| acc + 1))?;
91            *stats.parent_counts.entry(parent_count).or_insert(0) += 1;
92            prev_id = commit.id();
93        }
94
95        if stats.min_generation == GENERATION_NUMBER_INFINITY {
96            stats.min_generation = 0;
97        }
98
99        Ok(stats)
100    }
101
102    /// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing
103    /// checksum itself.
104    ///
105    /// Return the actual checksum on success or [`Exn<Message>`] if there is a mismatch.
106    pub fn verify_checksum(&self) -> Result<gix_hash::ObjectId, Exn<Message>> {
107        // Even though we could use gix_hash::bytes_of_file(…), this would require extending our
108        // Error type to support io::Error. As we only gain progress, there probably isn't much value
109        // as these files are usually small enough to process them in less than a second, even for the large ones.
110        // But it's possible, once a progress instance is passed.
111        let data_len_without_trailer = self.data.len() - self.hash_len;
112        let mut hasher = gix_hash::hasher(self.object_hash());
113        hasher.update(&self.data[..data_len_without_trailer]);
114        let actual = hasher
115            .try_finalize()
116            .map_err(|e| message!("failed to hash commit graph file: {e}").raise())?;
117        actual.verify(self.checksum()).map_err(|e| message!("{e}").raise())?;
118        Ok(actual)
119    }
120}
121
122/// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the
123/// expected hash.
124fn verify_split_chain_filename_hash(path: &Path, expected: &gix_hash::oid) -> Result<(), Exn<Message>> {
125    path.file_name()
126        .and_then(std::ffi::OsStr::to_str)
127        .and_then(|filename| filename.strip_suffix(".graph"))
128        .and_then(|stem| stem.strip_prefix("graph-"))
129        .map_or(Ok(()), |hex| match gix_hash::ObjectId::from_hex(hex.as_bytes()) {
130            Ok(actual) if actual == expected => Ok(()),
131            _ => Err(message!("commit-graph filename should be graph-{}.graph", expected.to_hex()).raise()),
132        })
133}