1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
use gix_hash::ObjectId;
use gix_odb::FindExt;
use crate::{ext::ObjectIdExt, revision, Repository};
/// The error returned by [`Platform::all()`].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error(transparent)]
AncestorIter(#[from] gix_traverse::commit::ancestors::Error),
#[error(transparent)]
ShallowCommits(#[from] crate::shallow::open::Error),
#[error(transparent)]
ConfigBoolean(#[from] crate::config::boolean::Error),
}
/// Information about a commit that we obtained naturally as part of the iteration.
#[derive(Debug, Clone)]
pub struct Info<'repo> {
/// The detached id of the commit.
pub id: gix_hash::ObjectId,
/// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`][gix_traverse::commit::Parents::First] is enabled.
pub parent_ids: gix_traverse::commit::ParentIds,
/// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`][gix_traverse::commit::Sorting::BreadthFirst],
/// as the walk needs to require the commit-date.
pub commit_time: Option<gix_date::SecondsSinceUnixEpoch>,
repo: &'repo Repository,
}
/// Access
impl<'repo> Info<'repo> {
/// Provide an attached version of our [`id`][Info::id] field.
pub fn id(&self) -> crate::Id<'repo> {
self.id.attach(self.repo)
}
/// Read the whole object from the object database.
///
/// Note that this is an expensive operation which shouldn't be performed unless one needs more than parent ids
/// and commit time.
pub fn object(&self) -> Result<crate::Commit<'repo>, crate::object::find::existing::Error> {
Ok(self.id().object()?.into_commit())
}
/// Provide an iterator yielding attached versions of our [`parent_ids`][Info::parent_ids] field.
pub fn parent_ids(&self) -> impl Iterator<Item = crate::Id<'repo>> + '_ {
self.parent_ids.iter().map(|id| id.attach(self.repo))
}
/// Returns the commit-time of this commit.
///
/// ### Panics
///
/// If the iteration wasn't ordered by date.
pub fn commit_time(&self) -> gix_date::SecondsSinceUnixEpoch {
self.commit_time.expect("traversal involving date caused it to be set")
}
}
/// Initialization and detachment
impl<'repo> Info<'repo> {
/// Create a new instance that represents `info`, but is attached to `repo` as well.
pub fn new(info: gix_traverse::commit::Info, repo: &'repo Repository) -> Self {
Info {
id: info.id,
parent_ids: info.parent_ids,
commit_time: info.commit_time,
repo,
}
}
/// Consume this instance and remove the reference to the underlying repository.
///
/// This is useful for sending instances across threads, for example.
pub fn detach(self) -> gix_traverse::commit::Info {
gix_traverse::commit::Info {
id: self.id,
parent_ids: self.parent_ids,
commit_time: self.commit_time,
}
}
}
/// A platform to traverse the revision graph by adding starting points as well as points which shouldn't be crossed,
/// returned by [`Repository::rev_walk()`].
///
/// **Note that we automatically leverage the commitgraph data structure**, but if you know that additional information like
/// author or commit messages will be required of *all* commits traversed here, it should be better to avoid trying to load it
/// by [turning commit-graph support off][Platform::use_commit_graph()]. This certainly is a micro-optimization though.
pub struct Platform<'repo> {
pub(crate) repo: &'repo Repository,
pub(crate) tips: Vec<ObjectId>,
pub(crate) sorting: gix_traverse::commit::Sorting,
pub(crate) parents: gix_traverse::commit::Parents,
pub(crate) use_commit_graph: Option<bool>,
pub(crate) commit_graph: Option<gix_commitgraph::Graph>,
}
impl<'repo> Platform<'repo> {
pub(crate) fn new(tips: impl IntoIterator<Item = impl Into<ObjectId>>, repo: &'repo Repository) -> Self {
revision::walk::Platform {
repo,
tips: tips.into_iter().map(Into::into).collect(),
sorting: Default::default(),
parents: Default::default(),
use_commit_graph: None,
commit_graph: None,
}
}
}
/// Create-time builder methods
impl<'repo> Platform<'repo> {
/// Set the sort mode for commits to the given value. The default is to order topologically breadth-first.
pub fn sorting(mut self, sorting: gix_traverse::commit::Sorting) -> Self {
self.sorting = sorting;
self
}
/// Only traverse the first parent of the commit graph.
pub fn first_parent_only(mut self) -> Self {
self.parents = gix_traverse::commit::Parents::First;
self
}
/// Allow using the commitgraph, if present, if `toggle` is `true`, or disallow it with `false`. Set it to `None` to leave
/// control over this to the configuration of `core.commitGraph` (the default).
///
/// Errors when loading the graph lead to falling back to the object database, it's treated as optional cache.
pub fn use_commit_graph(mut self, toggle: impl Into<Option<bool>>) -> Self {
self.use_commit_graph = toggle.into();
self
}
/// Set or unset the commit-graph to use for the iteration. This is useful if the caller wants to check if a commit-graph exists
/// and refer different implementations depending on the outcome.
///
/// It interacts with [`use_commit_graph`][Platform::use_commit_graph()] as one would expect, but it's worth noting that if `None`,
/// with [`use_commit_graph`][Platform::use_commit_graph()] being `true`, a graph will still be used for iteration.
/// To turn the commit-graph off, call [`use_commit_graph(false)`][Platform::use_commit_graph()] instead.
pub fn with_commit_graph(mut self, graph: Option<gix_commitgraph::Graph>) -> Self {
self.commit_graph = graph;
self
}
}
/// Produce the iterator
impl<'repo> Platform<'repo> {
/// For each commit, let `filter` return `true` if it and its parents should be included in the traversal, or `false`
/// if the traversal should exclude it and its ancestry entirely.
///
/// If `filter` is None, no pruning of the graph will be performed which is the default.
pub fn selected(
self,
mut filter: impl FnMut(&gix_hash::oid) -> bool + 'repo,
) -> Result<revision::Walk<'repo>, Error> {
let Platform {
repo,
tips,
sorting,
parents,
use_commit_graph,
commit_graph,
} = self;
Ok(revision::Walk {
repo,
inner: Box::new(
gix_traverse::commit::Ancestors::filtered(
tips,
gix_traverse::commit::ancestors::State::default(),
move |oid, buf| repo.objects.find_commit_iter(oid, buf),
{
// Note that specific shallow handling for commit-graphs isn't needed as these contain
// all information there is, and exclude shallow parents to be structurally consistent.
let shallow_commits = repo.shallow_commits()?;
let mut grafted_parents_to_skip = Vec::new();
let mut buf = Vec::new();
move |id| {
if !filter(id) {
return false;
}
match shallow_commits.as_ref() {
Some(commits) => {
let id = id.to_owned();
if let Ok(idx) = grafted_parents_to_skip.binary_search(&id) {
grafted_parents_to_skip.remove(idx);
return false;
};
if commits.binary_search(&id).is_ok() {
if let Ok(commit) = repo.objects.find_commit_iter(&id, &mut buf) {
grafted_parents_to_skip.extend(commit.parent_ids());
grafted_parents_to_skip.sort();
}
};
true
}
None => true,
}
}
},
)
.sorting(sorting)?
.parents(parents)
.commit_graph(
commit_graph.or(use_commit_graph
.map_or_else(|| self.repo.config.may_use_commit_graph(), Ok)?
.then(|| self.repo.commit_graph().ok())
.flatten()),
),
),
})
}
/// Return an iterator to traverse all commits reachable as configured by the [Platform].
///
/// # Performance
///
/// It's highly recommended to set an [`object cache`][Repository::object_cache_size()] on the parent repo
/// to greatly speed up performance if the returned id is supposed to be looked up right after.
pub fn all(self) -> Result<revision::Walk<'repo>, Error> {
self.selected(|_| true)
}
}
pub(crate) mod iter {
/// The iterator returned by [`crate::revision::walk::Platform::all()`].
pub struct Walk<'repo> {
pub(crate) repo: &'repo crate::Repository,
pub(crate) inner: Box<
dyn Iterator<Item = Result<gix_traverse::commit::Info, gix_traverse::commit::ancestors::Error>> + 'repo,
>,
}
impl<'repo> Iterator for Walk<'repo> {
type Item = Result<super::Info<'repo>, gix_traverse::commit::ancestors::Error>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
.map(|res| res.map(|info| super::Info::new(info, self.repo)))
}
}
}