Skip to main content

gix_commitgraph/
verify.rs

1//! Auxiliary types used by graph verification methods.
2use std::{
3    cmp::{max, min},
4    collections::BTreeMap,
5};
6
7use gix_error::{message, ErrorExt, Exn, Message, ResultExt};
8
9use crate::{
10    file::{self},
11    Graph, Position, GENERATION_NUMBER_MAX,
12};
13
14/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
15#[derive(Clone, Debug, Eq, PartialEq)]
16#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
17pub struct Outcome {
18    /// The length of the longest path between any two commits in this graph.
19    ///
20    /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
21    /// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
22    /// If the longest path length is too large to fit in a [u32], then this will be [None].
23    pub longest_path_length: Option<u32>,
24    /// The total number of commits traversed.
25    pub num_commits: u32,
26    /// A mapping of `N -> number of commits with N parents`.
27    pub parent_counts: BTreeMap<u32, u32>,
28}
29
30impl Graph {
31    /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
32    ///
33    /// When `processor` returns an error, the entire verification is stopped and the error returned.
34    pub fn verify_integrity<E>(
35        &self,
36        mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
37    ) -> Result<Outcome, Exn<Message>>
38    where
39        E: std::error::Error + Send + Sync + 'static,
40    {
41        if self.files.len() > 256 {
42            // A file in a split chain can only have up to 255 base files.
43            return Err(message!(
44                "Commit-graph should be composed of at most 256 files but actually contains {} files",
45                self.files.len()
46            )
47            .raise());
48        }
49
50        let mut stats = Outcome {
51            longest_path_length: None,
52            num_commits: 0,
53            parent_counts: BTreeMap::new(),
54        };
55        let mut max_generation = 0u32;
56
57        // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
58        //   a separate loop, e.g. self.iter_sorted_ids().
59
60        let mut file_start_pos = Position(0);
61        for (file_index, file) in self.files.iter().enumerate() {
62            if usize::from(file.base_graph_count()) != file_index {
63                return Err(message!(
64                    "'{}' should have {} base graphs, but claims {} base graphs",
65                    file.path().display(),
66                    file_index,
67                    file.base_graph_count()
68                )
69                .raise());
70            }
71
72            for (base_graph_index, (expected, actual)) in self
73                .files
74                .iter()
75                .take(file_index)
76                .map(crate::File::checksum)
77                .zip(file.iter_base_graph_ids())
78                .enumerate()
79            {
80                if actual != expected {
81                    return Err(message!(
82                        "'{}' base graph at index {} should have ID {} but is {}",
83                        file.path().display(),
84                        base_graph_index,
85                        expected,
86                        actual
87                    )
88                    .raise());
89                }
90            }
91
92            let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
93            let file_stats = file.traverse(|commit| {
94                let mut max_parent_generation = 0u32;
95                for parent_pos in commit.iter_parents() {
96                    let parent_pos = parent_pos.map_err(|err| err.raise_erased())?;
97                    if parent_pos >= next_file_start_pos {
98                        return Err(message!(
99                            "Commit {} has parent position {parent_pos} that is out of range (should be in range 0-{})",
100                            commit.id(),
101                            Position(next_file_start_pos.0 - 1)
102                        )
103                        .raise_erased());
104                    }
105                    let parent = self.commit_at(parent_pos);
106                    max_parent_generation = max(max_parent_generation, parent.generation());
107                }
108
109                // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
110                // generation should be GENERATION_NUMBER_MAX too.
111                let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
112                if commit.generation() != expected_generation {
113                    return Err(message!(
114                        "Commit {}'s generation should be {expected_generation} but is {}",
115                        commit.id(),
116                        commit.generation()
117                    )
118                    .raise_erased());
119                }
120
121                processor(commit).or_raise_erased(|| message!("processor failed on commit {id}", id = commit.id()))?;
122
123                Ok(())
124            })?;
125
126            max_generation = max(max_generation, file_stats.max_generation);
127            stats.num_commits += file_stats.num_commits;
128            for (key, value) in file_stats.parent_counts.into_iter() {
129                *stats.parent_counts.entry(key).or_insert(0) += value;
130            }
131            file_start_pos = next_file_start_pos;
132        }
133
134        stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
135            Some(max_generation.saturating_sub(1))
136        } else {
137            None
138        };
139        Ok(stats)
140    }
141}