git_revision/
describe.rs

1use std::{
2    borrow::Cow,
3    fmt::{Display, Formatter},
4};
5
6use bstr::BStr;
7use git_hashtable::HashMap;
8
9/// The positive result produced by [describe()][function::describe()].
10#[derive(Debug, Clone)]
11pub struct Outcome<'name> {
12    /// The name of the tag or branch that is closest to the commit `id`.
13    ///
14    /// If `None`, no name was found but it was requested to provide the `id` itself as fallback.
15    pub name: Option<Cow<'name, BStr>>,
16    /// The input commit object id that we describe.
17    pub id: git_hash::ObjectId,
18    /// The number of commits that are between the tag or branch with `name` and `id`.
19    /// These commits are all in the future of the named tag or branch.
20    pub depth: u32,
21    /// The mapping between object ids and their names initially provided by the describe call.
22    pub name_by_oid: HashMap<git_hash::ObjectId, Cow<'name, BStr>>,
23    /// The amount of commits we traversed.
24    pub commits_seen: u32,
25}
26
27impl<'a> Outcome<'a> {
28    /// Turn this outcome into a structure that can display itself in the typical `git describe` format.
29    pub fn into_format(self, hex_len: usize) -> Format<'a> {
30        Format {
31            name: self.name,
32            id: self.id,
33            hex_len,
34            depth: self.depth,
35            long: false,
36            dirty_suffix: None,
37        }
38    }
39}
40
41/// A structure implementing `Display`, producing a `git describe` like string.
42#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
43pub struct Format<'a> {
44    /// The name of the branch or tag to display, as is.
45    ///
46    /// If `None`, the `id` will be displayed as a fallback.
47    pub name: Option<Cow<'a, BStr>>,
48    /// The `id` of the commit to describe.
49    pub id: git_hash::ObjectId,
50    /// The amount of hex characters to use to display `id`.
51    pub hex_len: usize,
52    /// The amount of commits between `name` and `id`, where `id` is in the future of `name`.
53    pub depth: u32,
54    /// If true, the long form of the describe string will be produced even if `id` lies directly on `name`,
55    /// hence has a depth of 0.
56    pub long: bool,
57    /// If `Some(suffix)`, it will be appended to the describe string.
58    /// This should be set if the working tree was determined to be dirty.
59    pub dirty_suffix: Option<String>,
60}
61
62impl<'a> Format<'a> {
63    /// Return true if the `name` is directly associated with `id`, i.e. there are no commits between them.
64    pub fn is_exact_match(&self) -> bool {
65        self.depth == 0
66    }
67
68    /// Set this instance to print in long mode, that is if `depth` is 0, it will still print the whole
69    /// long form even though it's not quite necessary.
70    ///
71    /// Otherwise, it is allowed to shorten itself.
72    pub fn long(&mut self, long: bool) -> &mut Self {
73        self.long = long;
74        self
75    }
76}
77
78impl<'a> Display for Format<'a> {
79    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
80        if let Some(name) = self.name.as_deref() {
81            if !self.long && self.is_exact_match() {
82                name.fmt(f)?;
83            } else {
84                write!(f, "{}-{}-g{}", name, self.depth, self.id.to_hex_with_len(self.hex_len))?;
85            }
86        } else {
87            self.id.to_hex_with_len(self.hex_len).fmt(f)?;
88        }
89
90        if let Some(suffix) = &self.dirty_suffix {
91            write!(f, "-{suffix}")?;
92        }
93        Ok(())
94    }
95}
96
97type Flags = u32;
98const MAX_CANDIDATES: usize = std::mem::size_of::<Flags>() * 8;
99
100/// The options required to call [`describe()`][function::describe()].
101#[derive(Clone, Debug)]
102pub struct Options<'name> {
103    /// The candidate names from which to determine the `name` to use for the describe string,
104    /// as a mapping from a commit id and the name associated with it.
105    pub name_by_oid: HashMap<git_hash::ObjectId, Cow<'name, BStr>>,
106    /// The amount of names we will keep track of. Defaults to the maximum of 32.
107    ///
108    /// If the number is exceeded, it will be capped at 32 and defaults to 10.
109    pub max_candidates: usize,
110    /// If no candidate for naming, always show the abbreviated hash. Default: false.
111    pub fallback_to_oid: bool,
112    /// Only follow the first parent during graph traversal. Default: false.
113    ///
114    /// This may speed up the traversal at the cost of accuracy.
115    pub first_parent: bool,
116}
117
118impl<'name> Default for Options<'name> {
119    fn default() -> Self {
120        Options {
121            max_candidates: 10, // the same number as git uses, otherwise we perform worse by default on big repos
122            name_by_oid: Default::default(),
123            fallback_to_oid: false,
124            first_parent: false,
125        }
126    }
127}
128
129/// The error returned by the [`describe()`][function::describe()] function.
130#[derive(Debug, thiserror::Error)]
131#[allow(missing_docs)]
132pub enum Error<E>
133where
134    E: std::error::Error + Send + Sync + 'static,
135{
136    #[error("Commit {} could not be found during graph traversal", .oid.to_hex())]
137    Find {
138        #[source]
139        err: Option<E>,
140        oid: git_hash::ObjectId,
141    },
142    #[error("A commit could not be decoded during traversal")]
143    Decode(#[from] git_object::decode::Error),
144}
145
146pub(crate) mod function {
147    use std::{borrow::Cow, cmp::Ordering, collections::VecDeque, iter::FromIterator};
148
149    use bstr::BStr;
150    use git_hash::oid;
151    use git_hashtable::{hash_map, HashMap};
152    use git_object::CommitRefIter;
153
154    use super::{Error, Outcome};
155    use crate::describe::{Flags, Options, MAX_CANDIDATES};
156
157    /// Given a `commit` id, traverse the commit graph and collect candidate names from the `name_by_oid` mapping to produce
158    /// an `Outcome`, which converted [`into_format()`][Outcome::into_format()] will produce a typical `git describe` string.
159    ///
160    /// Note that the `name_by_oid` map is returned in the [`Outcome`], which can be forcefully returned even if there was no matching
161    /// candidate by setting `fallback_to_oid` to true.
162    pub fn describe<'name, Find, E>(
163        commit: &oid,
164        mut find: Find,
165        Options {
166            name_by_oid,
167            mut max_candidates,
168            fallback_to_oid,
169            first_parent,
170        }: Options<'name>,
171    ) -> Result<Option<Outcome<'name>>, Error<E>>
172    where
173        Find: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Result<Option<CommitRefIter<'b>>, E>,
174        E: std::error::Error + Send + Sync + 'static,
175    {
176        max_candidates = max_candidates.min(MAX_CANDIDATES);
177        if let Some(name) = name_by_oid.get(commit) {
178            return Ok(Some(Outcome {
179                name: name.clone().into(),
180                id: commit.to_owned(),
181                depth: 0,
182                name_by_oid,
183                commits_seen: 0,
184            }));
185        }
186
187        if max_candidates == 0 || name_by_oid.is_empty() {
188            return if fallback_to_oid {
189                Ok(Some(Outcome {
190                    id: commit.to_owned(),
191                    name: None,
192                    name_by_oid,
193                    depth: 0,
194                    commits_seen: 0,
195                }))
196            } else {
197                Ok(None)
198            };
199        }
200
201        let mut buf = Vec::new();
202        let mut parent_buf = Vec::new();
203
204        let mut queue = VecDeque::from_iter(Some((commit.to_owned(), u32::MAX)));
205        let mut candidates = Vec::new();
206        let mut commits_seen = 0;
207        let mut gave_up_on_commit = None;
208        let mut seen = HashMap::<git_hash::ObjectId, Flags>::default();
209        seen.insert(commit.to_owned(), 0u32);
210
211        while let Some((commit, _commit_time)) = queue.pop_front() {
212            commits_seen += 1;
213            if let Some(name) = name_by_oid.get(&commit) {
214                if candidates.len() < max_candidates {
215                    let identity_bit = 1 << candidates.len();
216                    candidates.push(Candidate {
217                        name: name.clone(),
218                        commits_in_its_future: commits_seen - 1,
219                        identity_bit,
220                        order: candidates.len(),
221                    });
222                    *seen.get_mut(&commit).expect("inserted") |= identity_bit;
223                } else {
224                    gave_up_on_commit = Some(commit);
225                    break;
226                }
227            }
228
229            let flags = seen[&commit];
230            for candidate in candidates
231                .iter_mut()
232                .filter(|c| (flags & c.identity_bit) != c.identity_bit)
233            {
234                candidate.commits_in_its_future += 1;
235            }
236
237            if queue.is_empty() && !candidates.is_empty() {
238                // single-trunk history that waits to be replenished.
239                // Abort early if the best-candidate is in the current commits past.
240                let mut shortest_depth = Flags::MAX;
241                let mut best_candidates_at_same_depth = 0_u32;
242                for candidate in &candidates {
243                    match candidate.commits_in_its_future.cmp(&shortest_depth) {
244                        Ordering::Less => {
245                            shortest_depth = candidate.commits_in_its_future;
246                            best_candidates_at_same_depth = candidate.identity_bit;
247                        }
248                        Ordering::Equal => {
249                            best_candidates_at_same_depth |= candidate.identity_bit;
250                        }
251                        Ordering::Greater => {}
252                    }
253                }
254
255                if (flags & best_candidates_at_same_depth) == best_candidates_at_same_depth {
256                    break;
257                }
258            }
259
260            parents_by_date_onto_queue_and_track_names(
261                &mut find,
262                &mut buf,
263                &mut parent_buf,
264                &mut queue,
265                &mut seen,
266                &commit,
267                flags,
268                first_parent,
269            )?;
270        }
271
272        if candidates.is_empty() {
273            return if fallback_to_oid {
274                Ok(Some(Outcome {
275                    id: commit.to_owned(),
276                    name: None,
277                    name_by_oid,
278                    depth: 0,
279                    commits_seen,
280                }))
281            } else {
282                Ok(None)
283            };
284        }
285
286        candidates.sort_by(|a, b| {
287            a.commits_in_its_future
288                .cmp(&b.commits_in_its_future)
289                .then_with(|| a.order.cmp(&b.order))
290        });
291
292        if let Some(commit_id) = gave_up_on_commit {
293            queue.push_front((commit_id, u32::MAX));
294            commits_seen -= 1;
295        }
296
297        commits_seen += finish_depth_computation(
298            queue,
299            find,
300            candidates.first_mut().expect("at least one candidate"),
301            seen,
302            buf,
303            parent_buf,
304            first_parent,
305        )?;
306
307        Ok(candidates.into_iter().next().map(|c| Outcome {
308            name: c.name.into(),
309            id: commit.to_owned(),
310            depth: c.commits_in_its_future,
311            name_by_oid,
312            commits_seen,
313        }))
314    }
315
316    #[allow(clippy::too_many_arguments)]
317    fn parents_by_date_onto_queue_and_track_names<Find, E>(
318        find: &mut Find,
319        buf: &mut Vec<u8>,
320        parent_buf: &mut Vec<u8>,
321        queue: &mut VecDeque<(git_hash::ObjectId, u32)>,
322        seen: &mut HashMap<git_hash::ObjectId, Flags>,
323        commit: &git_hash::oid,
324        commit_flags: Flags,
325        first_parent: bool,
326    ) -> Result<(), Error<E>>
327    where
328        Find: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Result<Option<CommitRefIter<'b>>, E>,
329        E: std::error::Error + Send + Sync + 'static,
330    {
331        let commit_iter = find(commit, buf)
332            .map_err(|err| Error::Find {
333                err: Some(err),
334                oid: commit.to_owned(),
335            })?
336            .ok_or_else(|| Error::Find {
337                err: None,
338                oid: commit.to_owned(),
339            })?;
340        for token in commit_iter {
341            match token {
342                Ok(git_object::commit::ref_iter::Token::Tree { .. }) => continue,
343                Ok(git_object::commit::ref_iter::Token::Parent { id: parent_id }) => match seen.entry(parent_id) {
344                    hash_map::Entry::Vacant(entry) => {
345                        let parent = match find(&parent_id, parent_buf).map_err(|err| Error::Find {
346                            err: Some(err),
347                            oid: commit.to_owned(),
348                        })? {
349                            Some(p) => p,
350                            None => continue, // skip missing objects, they don't exist.
351                        };
352
353                        let parent_commit_date = parent
354                            .committer()
355                            .map(|committer| committer.time.seconds_since_unix_epoch)
356                            .unwrap_or_default();
357
358                        entry.insert(commit_flags);
359                        match queue.binary_search_by(|c| c.1.cmp(&parent_commit_date).reverse()) {
360                            Ok(_) => queue.push_back((parent_id, parent_commit_date)),
361                            Err(pos) => queue.insert(pos, (parent_id, parent_commit_date)),
362                        };
363                    }
364                    hash_map::Entry::Occupied(mut entry) => {
365                        *entry.get_mut() |= commit_flags;
366                    }
367                },
368                Ok(_unused_token) => break,
369                Err(err) => return Err(err.into()),
370            }
371            if first_parent {
372                break;
373            }
374        }
375
376        Ok(())
377    }
378
379    #[allow(clippy::too_many_arguments)]
380    fn finish_depth_computation<'name, Find, E>(
381        mut queue: VecDeque<(git_hash::ObjectId, u32)>,
382        mut find: Find,
383        best_candidate: &mut Candidate<'name>,
384        mut seen: HashMap<git_hash::ObjectId, Flags>,
385        mut buf: Vec<u8>,
386        mut parent_buf: Vec<u8>,
387        first_parent: bool,
388    ) -> Result<u32, Error<E>>
389    where
390        Find: for<'b> FnMut(&oid, &'b mut Vec<u8>) -> Result<Option<CommitRefIter<'b>>, E>,
391        E: std::error::Error + Send + Sync + 'static,
392    {
393        let mut commits_seen = 0;
394        while let Some((commit, _commit_time)) = queue.pop_front() {
395            commits_seen += 1;
396            let flags = seen[&commit];
397            if (flags & best_candidate.identity_bit) == best_candidate.identity_bit {
398                if queue
399                    .iter()
400                    .all(|(id, _)| (seen[id] & best_candidate.identity_bit) == best_candidate.identity_bit)
401                {
402                    break;
403                }
404            } else {
405                best_candidate.commits_in_its_future += 1;
406            }
407
408            parents_by_date_onto_queue_and_track_names(
409                &mut find,
410                &mut buf,
411                &mut parent_buf,
412                &mut queue,
413                &mut seen,
414                &commit,
415                flags,
416                first_parent,
417            )?;
418        }
419        Ok(commits_seen)
420    }
421
422    #[derive(Debug)]
423    struct Candidate<'a> {
424        name: Cow<'a, BStr>,
425        commits_in_its_future: Flags,
426        /// A single bit identifying this candidate uniquely in a bitset
427        identity_bit: Flags,
428        /// The order at which we found the candidate, first one has order = 0
429        order: usize,
430    }
431}