git_commitgraph/graph/
verify.rs

1//! Auxiliary types used by graph verification methods.
2use std::{
3    cmp::{max, min},
4    collections::BTreeMap,
5    convert::TryInto,
6    path::PathBuf,
7};
8
9use crate::{
10    file::{self, commit},
11    graph, Graph, GENERATION_NUMBER_MAX,
12};
13
14/// The error used in [`verify_integrity()`][Graph::verify_integrity].
15#[derive(thiserror::Error, Debug)]
16#[allow(missing_docs)]
17pub enum Error<E: std::error::Error + 'static> {
18    #[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())]
19    BaseGraphCount { actual: u8, expected: u8, path: PathBuf },
20    #[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())]
21    BaseGraphId {
22        actual: git_hash::ObjectId,
23        expected: git_hash::ObjectId,
24        index: u8,
25        path: PathBuf,
26    },
27    #[error(transparent)]
28    Commit(#[from] commit::Error),
29    #[error("{}: {err}", .path.display())]
30    File {
31        // Use zero-size error type. We will never return
32        // `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the
33        // file's processor, and we convert`file::verify::Error::Processor<graph::verify::Error>`
34        // variants into direct `graph::verify::Error` values.
35        err: file::verify::Error<std::convert::Infallible>,
36        path: PathBuf,
37    },
38    #[error("Commit {id}'s generation should be {expected} but is {actual}")]
39    Generation {
40        actual: u32,
41        expected: u32,
42        id: git_hash::ObjectId,
43    },
44    #[error(
45        "Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})"
46    )]
47    ParentOutOfRange {
48        id: git_hash::ObjectId,
49        max_valid_pos: graph::Position,
50        parent_pos: graph::Position,
51    },
52    #[error("{0}")]
53    Processor(#[source] E),
54    #[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")]
55    TooManyFiles(usize),
56}
57
58/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
59#[derive(Clone, Debug, Eq, PartialEq)]
60#[cfg_attr(feature = "serde1", derive(serde::Deserialize, serde::Serialize))]
61pub struct Outcome {
62    /// The length of the longest path between any two commits in this graph.
63    ///
64    /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
65    /// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
66    /// If the longest path length is too large to fit in a [u32], then this will be [None].
67    pub longest_path_length: Option<u32>,
68    /// The total number of commits traversed.
69    pub num_commits: u32,
70    /// A mapping of `N -> number of commits with N parents`.
71    pub parent_counts: BTreeMap<u32, u32>,
72}
73
74impl Graph {
75    /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
76    ///
77    /// When `processor` returns an error, the entire verification is stopped and the error returned.
78    pub fn verify_integrity<E>(
79        &self,
80        mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
81    ) -> Result<Outcome, Error<E>>
82    where
83        E: std::error::Error + 'static,
84    {
85        if self.files.len() > 256 {
86            // A file in a split chain can only have up to 255 base files.
87            return Err(Error::TooManyFiles(self.files.len()));
88        }
89
90        let mut stats = Outcome {
91            longest_path_length: None,
92            num_commits: 0,
93            parent_counts: BTreeMap::new(),
94        };
95        let mut max_generation = 0u32;
96
97        // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
98        //   a separate loop, e.g. self.iter_sorted_ids().
99
100        let mut file_start_pos = graph::Position(0);
101        for (file_index, file) in self.files.iter().enumerate() {
102            if usize::from(file.base_graph_count()) != file_index {
103                return Err(Error::BaseGraphCount {
104                    actual: file.base_graph_count(),
105                    expected: file_index
106                        .try_into()
107                        .expect("files.len() check to protect against this"),
108                    path: file.path().to_owned(),
109                });
110            }
111
112            for (base_graph_index, (expected, actual)) in self.files[..file_index]
113                .iter()
114                .map(|base_file| base_file.checksum())
115                .zip(file.iter_base_graph_ids())
116                .enumerate()
117            {
118                if actual != expected {
119                    return Err(Error::BaseGraphId {
120                        actual: actual.into(),
121                        expected: expected.into(),
122                        index: base_graph_index
123                            .try_into()
124                            .expect("files.len() check to protect against this"),
125                        path: file.path().to_owned(),
126                    });
127                }
128            }
129
130            let next_file_start_pos = graph::Position(file_start_pos.0 + file.num_commits());
131            let file_stats = file
132                .traverse(|commit| {
133                    let mut max_parent_generation = 0u32;
134                    for parent_pos in commit.iter_parents() {
135                        let parent_pos = parent_pos.map_err(Error::Commit)?;
136                        if parent_pos >= next_file_start_pos {
137                            return Err(Error::ParentOutOfRange {
138                                parent_pos,
139                                id: commit.id().into(),
140                                max_valid_pos: graph::Position(next_file_start_pos.0 - 1),
141                            });
142                        }
143                        let parent = self.commit_at(parent_pos);
144                        max_parent_generation = max(max_parent_generation, parent.generation());
145                    }
146
147                    // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
148                    // generation should be GENERATION_NUMBER_MAX too.
149                    let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
150                    if commit.generation() != expected_generation {
151                        return Err(Error::Generation {
152                            actual: commit.generation(),
153                            expected: expected_generation,
154                            id: commit.id().into(),
155                        });
156                    }
157
158                    processor(commit).map_err(Error::Processor)?;
159
160                    Ok(())
161                })
162                .map_err(|err| Error::File {
163                    err: match err {
164                        file::verify::Error::Processor(e) => return e,
165                        file::verify::Error::RootTreeId { id, root_tree_id } => {
166                            file::verify::Error::RootTreeId { id, root_tree_id }
167                        }
168                        file::verify::Error::Mismatch { actual, expected } => {
169                            file::verify::Error::Mismatch { actual, expected }
170                        }
171                        file::verify::Error::Generation { generation, id } => {
172                            file::verify::Error::Generation { generation, id }
173                        }
174                        file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
175                        file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
176                        file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
177                        file::verify::Error::CommitsOutOfOrder {
178                            id,
179                            pos,
180                            predecessor_id,
181                        } => file::verify::Error::CommitsOutOfOrder {
182                            id,
183                            pos,
184                            predecessor_id,
185                        },
186                    },
187                    path: file.path().to_owned(),
188                })?;
189
190            max_generation = max(max_generation, file_stats.max_generation);
191            stats.num_commits += file_stats.num_commits;
192            for (key, value) in file_stats.parent_counts.into_iter() {
193                *stats.parent_counts.entry(key).or_insert(0) += value;
194            }
195            file_start_pos = next_file_start_pos;
196        }
197
198        stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
199            Some(max_generation.saturating_sub(1))
200        } else {
201            None
202        };
203        Ok(stats)
204    }
205}