gix_commitgraph/
verify.rs

1//! Auxiliary types used by graph verification methods.
2use std::{
3    cmp::{max, min},
4    collections::BTreeMap,
5};
6
7use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
8
9use crate::{
10    file::{self},
11    Graph, Position, GENERATION_NUMBER_MAX,
12};
13
14/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
15#[derive(Clone, Debug, Eq, PartialEq)]
16#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
17pub struct Outcome {
18    /// The length of the longest path between any two commits in this graph.
19    ///
20    /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
21    /// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
22    /// If the longest path length is too large to fit in a [u32], then this will be [None].
23    pub longest_path_length: Option<u32>,
24    /// The total number of commits traversed.
25    pub num_commits: u32,
26    /// A mapping of `N -> number of commits with N parents`.
27    pub parent_counts: BTreeMap<u32, u32>,
28}
29
30impl Graph {
31    /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
32    ///
33    /// When `processor` returns an error, the entire verification is stopped and the error returned.
34    pub fn verify_integrity<E>(
35        &self,
36        mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
37    ) -> Result<Outcome, Exn<Message>>
38    where
39        E: std::error::Error + Send + Sync + 'static,
40    {
41        if self.files.len() > 256 {
42            // A file in a split chain can only have up to 255 base files.
43            return Err(message!(
44                "Commit-graph should be composed of at most 256 files but actually contains {} files",
45                self.files.len()
46            )
47            .raise());
48        }
49
50        let mut stats = Outcome {
51            longest_path_length: None,
52            num_commits: 0,
53            parent_counts: BTreeMap::new(),
54        };
55        let mut max_generation = 0u32;
56
57        // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
58        //   a separate loop, e.g. self.iter_sorted_ids().
59
60        let mut file_start_pos = Position(0);
61        for (file_index, file) in self.files.iter().enumerate() {
62            if usize::from(file.base_graph_count()) != file_index {
63                return Err(message!(
64                    "'{}' should have {} base graphs, but claims {} base graphs",
65                    file.path().display(),
66                    file_index,
67                    file.base_graph_count()
68                )
69                .raise());
70            }
71
72            for (base_graph_index, (expected, actual)) in self.files[..file_index]
73                .iter()
74                .map(crate::File::checksum)
75                .zip(file.iter_base_graph_ids())
76                .enumerate()
77            {
78                if actual != expected {
79                    return Err(message!(
80                        "'{}' base graph at index {} should have ID {} but is {}",
81                        file.path().display(),
82                        base_graph_index,
83                        expected,
84                        actual
85                    )
86                    .raise());
87                }
88            }
89
90            let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
91            let file_stats = file.traverse(|commit| {
92                let mut max_parent_generation = 0u32;
93                for parent_pos in commit.iter_parents() {
94                    let parent_pos = parent_pos.map_err(|err| err.raise_erased())?;
95                    if parent_pos >= next_file_start_pos {
96                        return Err(message!(
97                            "Commit {} has parent position {parent_pos} that is out of range (should be in range 0-{})",
98                            commit.id(),
99                            Position(next_file_start_pos.0 - 1)
100                        )
101                        .raise_erased());
102                    }
103                    let parent = self.commit_at(parent_pos);
104                    max_parent_generation = max(max_parent_generation, parent.generation());
105                }
106
107                // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
108                // generation should be GENERATION_NUMBER_MAX too.
109                let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
110                if commit.generation() != expected_generation {
111                    return Err(message!(
112                        "Commit {}'s generation should be {expected_generation} but is {}",
113                        commit.id(),
114                        commit.generation()
115                    )
116                    .raise_erased());
117                }
118
119                processor(commit).or_raise_erased(|| message!("processor failed on commit {id}", id = commit.id()))?;
120
121                Ok(())
122            })?;
123
124            max_generation = max(max_generation, file_stats.max_generation);
125            stats.num_commits += file_stats.num_commits;
126            for (key, value) in file_stats.parent_counts.into_iter() {
127                *stats.parent_counts.entry(key).or_insert(0) += value;
128            }
129            file_start_pos = next_file_start_pos;
130        }
131
132        stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
133            Some(max_generation.saturating_sub(1))
134        } else {
135            None
136        };
137        Ok(stats)
138    }
139}