gix_commitgraph/
verify.rs

1//! Auxiliary types used by graph verification methods.
2use std::{
3    cmp::{max, min},
4    collections::BTreeMap,
5    path::PathBuf,
6};
7
8use crate::{
9    file::{self, commit},
10    Graph, Position, GENERATION_NUMBER_MAX,
11};
12
13/// The error used in [`verify_integrity()`][Graph::verify_integrity].
14#[derive(thiserror::Error, Debug)]
15#[allow(missing_docs)]
16pub enum Error<E: std::error::Error + 'static> {
17    #[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())]
18    BaseGraphCount { actual: u8, expected: u8, path: PathBuf },
19    #[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())]
20    BaseGraphId {
21        actual: gix_hash::ObjectId,
22        expected: gix_hash::ObjectId,
23        index: u8,
24        path: PathBuf,
25    },
26    #[error(transparent)]
27    Commit(#[from] commit::Error),
28    #[error("{}: {err}", .path.display())]
29    File {
30        // Use zero-size error type. We will never return
31        // `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the
32        // file's processor, and we convert`file::verify::Error::Processor<graph::verify::Error>`
33        // variants into direct `graph::verify::Error` values.
34        err: file::verify::Error<std::convert::Infallible>,
35        path: PathBuf,
36    },
37    #[error("Commit {id}'s generation should be {expected} but is {actual}")]
38    Generation {
39        actual: u32,
40        expected: u32,
41        id: gix_hash::ObjectId,
42    },
43    #[error(
44        "Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})"
45    )]
46    ParentOutOfRange {
47        id: gix_hash::ObjectId,
48        max_valid_pos: Position,
49        parent_pos: Position,
50    },
51    #[error("{0}")]
52    Processor(#[source] E),
53    #[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")]
54    TooManyFiles(usize),
55}
56
57/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
58#[derive(Clone, Debug, Eq, PartialEq)]
59#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
60pub struct Outcome {
61    /// The length of the longest path between any two commits in this graph.
62    ///
63    /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
64    /// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
65    /// If the longest path length is too large to fit in a [u32], then this will be [None].
66    pub longest_path_length: Option<u32>,
67    /// The total number of commits traversed.
68    pub num_commits: u32,
69    /// A mapping of `N -> number of commits with N parents`.
70    pub parent_counts: BTreeMap<u32, u32>,
71}
72
73impl Graph {
74    /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
75    ///
76    /// When `processor` returns an error, the entire verification is stopped and the error returned.
77    pub fn verify_integrity<E>(
78        &self,
79        mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
80    ) -> Result<Outcome, Error<E>>
81    where
82        E: std::error::Error + 'static,
83    {
84        if self.files.len() > 256 {
85            // A file in a split chain can only have up to 255 base files.
86            return Err(Error::TooManyFiles(self.files.len()));
87        }
88
89        let mut stats = Outcome {
90            longest_path_length: None,
91            num_commits: 0,
92            parent_counts: BTreeMap::new(),
93        };
94        let mut max_generation = 0u32;
95
96        // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
97        //   a separate loop, e.g. self.iter_sorted_ids().
98
99        let mut file_start_pos = Position(0);
100        for (file_index, file) in self.files.iter().enumerate() {
101            if usize::from(file.base_graph_count()) != file_index {
102                return Err(Error::BaseGraphCount {
103                    actual: file.base_graph_count(),
104                    expected: file_index
105                        .try_into()
106                        .expect("files.len() check to protect against this"),
107                    path: file.path().to_owned(),
108                });
109            }
110
111            for (base_graph_index, (expected, actual)) in self.files[..file_index]
112                .iter()
113                .map(crate::File::checksum)
114                .zip(file.iter_base_graph_ids())
115                .enumerate()
116            {
117                if actual != expected {
118                    return Err(Error::BaseGraphId {
119                        actual: actual.into(),
120                        expected: expected.into(),
121                        index: base_graph_index
122                            .try_into()
123                            .expect("files.len() check to protect against this"),
124                        path: file.path().to_owned(),
125                    });
126                }
127            }
128
129            let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
130            let file_stats = file
131                .traverse(|commit| {
132                    let mut max_parent_generation = 0u32;
133                    for parent_pos in commit.iter_parents() {
134                        let parent_pos = parent_pos.map_err(Error::Commit)?;
135                        if parent_pos >= next_file_start_pos {
136                            return Err(Error::ParentOutOfRange {
137                                parent_pos,
138                                id: commit.id().into(),
139                                max_valid_pos: Position(next_file_start_pos.0 - 1),
140                            });
141                        }
142                        let parent = self.commit_at(parent_pos);
143                        max_parent_generation = max(max_parent_generation, parent.generation());
144                    }
145
146                    // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
147                    // generation should be GENERATION_NUMBER_MAX too.
148                    let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
149                    if commit.generation() != expected_generation {
150                        return Err(Error::Generation {
151                            actual: commit.generation(),
152                            expected: expected_generation,
153                            id: commit.id().into(),
154                        });
155                    }
156
157                    processor(commit).map_err(Error::Processor)?;
158
159                    Ok(())
160                })
161                .map_err(|err| Error::File {
162                    err: match err {
163                        file::verify::Error::Processor(e) => return e,
164                        file::verify::Error::RootTreeId { id, root_tree_id } => {
165                            file::verify::Error::RootTreeId { id, root_tree_id }
166                        }
167                        file::verify::Error::Checksum(err) => file::verify::Error::Checksum(err),
168                        file::verify::Error::Generation { generation, id } => {
169                            file::verify::Error::Generation { generation, id }
170                        }
171                        file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
172                        file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
173                        file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
174                        file::verify::Error::CommitsOutOfOrder {
175                            id,
176                            pos,
177                            predecessor_id,
178                        } => file::verify::Error::CommitsOutOfOrder {
179                            id,
180                            pos,
181                            predecessor_id,
182                        },
183                    },
184                    path: file.path().to_owned(),
185                })?;
186
187            max_generation = max(max_generation, file_stats.max_generation);
188            stats.num_commits += file_stats.num_commits;
189            for (key, value) in file_stats.parent_counts.into_iter() {
190                *stats.parent_counts.entry(key).or_insert(0) += value;
191            }
192            file_start_pos = next_file_start_pos;
193        }
194
195        stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
196            Some(max_generation.saturating_sub(1))
197        } else {
198            None
199        };
200        Ok(stats)
201    }
202}