Skip to main content

grit_lib/
name_rev.rs

1//! Name-rev: name commits relative to refs.
2//!
3//! This module implements the algorithm behind `git name-rev`: given a set of
4//! refs it walks backwards through the commit graph and assigns each reachable
5//! commit a human-readable name derived from the nearest ref tip.
6//!
7//! # Name format
8//!
9//! - Directly pointed to by a ref: `<refname>` (e.g. `tags/v1.0`, `main`)
10//! - Tag object pointing at the commit: `<refname>^0`
11//! - N first-parent hops from a tip: `<refname>~N`
12//! - Second or later parent of a merge: `<refname>^2`, `<refname>~3^2`, etc.
13
14use crate::error::{Error, Result};
15use crate::ident::committer_unix_seconds_for_ordering;
16use crate::objects::{parse_commit, parse_tag, CommitData, ObjectId, ObjectKind};
17use crate::refs;
18use crate::repo::Repository;
19use std::collections::{HashMap, VecDeque};
20
21/// How many first-parent generations are preferred over a single merge hop.
22const MERGE_TRAVERSAL_WEIGHT: u32 = 65_535;
23
24/// Internal per-commit naming record.
25#[derive(Clone, Debug)]
26struct RevName {
27    /// The ref-derived name for the tip of this naming chain.
28    ///
29    /// This is stored verbatim (e.g. `"main"` or `"tags/v1.0^0"`); the final
30    /// `~N` generation suffix is appended only at display time.
31    tip_name: String,
32    /// Tagger / committer date of the originating ref tip (Unix timestamp).
33    taggerdate: i64,
34    /// Number of first-parent hops from the naming tip to this commit.
35    generation: u32,
36    /// Weighted hop count used for name-quality comparisons.
37    distance: u32,
38    /// Whether the originating ref is under `refs/tags/`.
39    from_tag: bool,
40}
41
42/// Options that control which refs participate in naming.
43#[derive(Debug, Default, Clone)]
44pub struct NameRevOptions {
45    /// When true, only `refs/tags/` refs are used.
46    pub tags_only: bool,
47    /// When true, tag ref names are shortened to bare names (strips `tags/`).
48    ///
49    /// In Git this is enabled by `--tags --name-only` together.  Pass this as
50    /// `tags_only && name_only` from the CLI layer.
51    pub shorten_tags: bool,
52    /// Glob patterns — only refs whose path (or any sub-path) matches at least
53    /// one pattern are used.  Empty means "all refs pass".
54    pub ref_filters: Vec<String>,
55    /// Glob patterns — refs whose path (or any sub-path) matches are excluded.
56    pub exclude_filters: Vec<String>,
57}
58
59/// Build the complete `ObjectId → display-name` mapping for all commits
60/// reachable from the applicable refs in `repo`.
61///
62/// The returned map contains every commit that could be named given the
63/// supplied `options`.  Commits not reachable from any passing ref are absent
64/// from the map.
65///
66/// # Errors
67///
68/// Returns [`Error::Io`] for filesystem problems and [`Error::CorruptObject`]
69/// for malformed objects.
70pub fn build_name_map(
71    repo: &Repository,
72    options: &NameRevOptions,
73) -> Result<HashMap<ObjectId, String>> {
74    let tips = collect_tips(repo, options)?;
75    let mut names: HashMap<ObjectId, RevName> = HashMap::new();
76
77    let mut commit_cache: HashMap<ObjectId, CommitData> = HashMap::new();
78
79    for tip in &tips {
80        let Some(commit_oid) = tip.commit_oid else {
81            continue;
82        };
83        if tip.deref {
84            names.insert(
85                tip.object_oid,
86                RevName {
87                    tip_name: tip.display_name.clone(),
88                    taggerdate: tip.taggerdate,
89                    generation: 0,
90                    distance: 0,
91                    from_tag: tip.from_tag,
92                },
93            );
94        }
95        name_from_tip(
96            repo,
97            &mut names,
98            &mut commit_cache,
99            commit_oid,
100            &tip.display_name,
101            tip.taggerdate,
102            tip.from_tag,
103            tip.deref,
104        )?;
105    }
106
107    Ok(names
108        .into_iter()
109        .map(|(oid, name)| (oid, format_name(&name)))
110        .collect())
111}
112
113/// Return the display name for a named commit.
114///
115/// - `generation == 0`: returns `tip_name` as-is.
116/// - `generation > 0`: returns `<tip_name>~<generation>` with any trailing
117///   `"^0"` stripped from `tip_name` first.
118fn format_name(name: &RevName) -> String {
119    if name.generation == 0 {
120        return name.tip_name.clone();
121    }
122    let base = name.tip_name.strip_suffix("^0").unwrap_or(&name.tip_name);
123    format!("{}~{}", base, name.generation)
124}
125
126/// Compute the effective comparison distance for a (distance, generation) pair.
127///
128/// First-parent chains add [`MERGE_TRAVERSAL_WEIGHT`] to make merge-traversal
129/// paths look "closer" (lower distance) than deep first-parent chains when
130/// generation > 0.
131fn effective_distance(distance: u32, generation: u32) -> u32 {
132    distance.saturating_add(if generation > 0 {
133        MERGE_TRAVERSAL_WEIGHT
134    } else {
135        0
136    })
137}
138
139/// Return `true` when the proposed name is strictly better than the existing one.
140///
141/// Preference order:
142/// 1. Tags over non-tags.
143/// 2. Smaller effective distance.
144/// 3. Older tagger/committer date (more stable refs first).
145fn is_better_name(
146    existing: &RevName,
147    taggerdate: i64,
148    generation: u32,
149    distance: u32,
150    from_tag: bool,
151) -> bool {
152    let existing_eff = effective_distance(existing.distance, existing.generation);
153    let new_eff = effective_distance(distance, generation);
154
155    // Tags beat non-tags.
156    if from_tag && existing.from_tag {
157        return existing_eff > new_eff;
158    }
159    if existing.from_tag != from_tag {
160        return from_tag;
161    }
162
163    // Both non-tags: prefer smaller effective distance.
164    if existing_eff != new_eff {
165        return existing_eff > new_eff;
166    }
167
168    // Tiebreak by older date (more stable / earlier tag).
169    if existing.taggerdate != taggerdate {
170        return existing.taggerdate > taggerdate;
171    }
172
173    false
174}
175
176/// Derive the `tip_name` for a non-first parent.
177///
178/// Uses the *current* commit's name (not the parent's) so that e.g. the second
179/// parent of `main~3` is named `main~3^2`.
180fn get_parent_name(current: &RevName, parent_number: u32) -> String {
181    let base = current
182        .tip_name
183        .strip_suffix("^0")
184        .unwrap_or(&current.tip_name);
185    if current.generation > 0 {
186        format!("{}~{}^{}", base, current.generation, parent_number)
187    } else {
188        format!("{}^{}", base, parent_number)
189    }
190}
191
192/// Walk backwards from `start_oid`, assigning names to all reachable ancestors.
193///
194/// Uses an explicit stack to avoid deep recursion on long histories.
195fn name_from_tip(
196    repo: &Repository,
197    names: &mut HashMap<ObjectId, RevName>,
198    commit_cache: &mut HashMap<ObjectId, CommitData>,
199    start_oid: ObjectId,
200    tip_name: &str,
201    taggerdate: i64,
202    from_tag: bool,
203    deref: bool,
204) -> Result<()> {
205    let actual_tip_name = if deref {
206        format!("{}^0", tip_name)
207    } else {
208        tip_name.to_owned()
209    };
210
211    // Seed the starting commit.
212    let should_start = match names.get(&start_oid) {
213        None => true,
214        Some(existing) => is_better_name(existing, taggerdate, 0, 0, from_tag),
215    };
216    if !should_start {
217        return Ok(());
218    }
219    names.insert(
220        start_oid,
221        RevName {
222            tip_name: actual_tip_name,
223            taggerdate,
224            generation: 0,
225            distance: 0,
226            from_tag,
227        },
228    );
229
230    // Stack-based DFS; first parent is processed first.
231    let mut stack: Vec<ObjectId> = vec![start_oid];
232
233    while let Some(oid) = stack.pop() {
234        let current = match names.get(&oid) {
235            Some(n) => n.clone(),
236            None => continue,
237        };
238
239        let commit = match load_commit_cached(repo, commit_cache, oid) {
240            Ok(c) => c,
241            Err(_) => continue,
242        };
243        // Clone parents out so we can release the borrow.
244        let parents = commit.parents.clone();
245
246        let mut to_push: Vec<ObjectId> = Vec::new();
247
248        for (idx, parent_oid) in parents.iter().enumerate() {
249            let parent_number = (idx + 1) as u32;
250
251            let (parent_gen, parent_dist) = if parent_number > 1 {
252                (
253                    0u32,
254                    current.distance.saturating_add(MERGE_TRAVERSAL_WEIGHT),
255                )
256            } else {
257                (
258                    current.generation.saturating_add(1),
259                    current.distance.saturating_add(1),
260                )
261            };
262
263            let should_update = match names.get(parent_oid) {
264                None => true,
265                Some(existing) => {
266                    is_better_name(existing, taggerdate, parent_gen, parent_dist, from_tag)
267                }
268            };
269
270            if should_update {
271                let parent_tip_name = if parent_number > 1 {
272                    get_parent_name(&current, parent_number)
273                } else {
274                    current.tip_name.clone()
275                };
276
277                names.insert(
278                    *parent_oid,
279                    RevName {
280                        tip_name: parent_tip_name,
281                        taggerdate,
282                        generation: parent_gen,
283                        distance: parent_dist,
284                        from_tag,
285                    },
286                );
287                to_push.push(*parent_oid);
288            }
289        }
290
291        for parent in to_push.into_iter().rev() {
292            stack.push(parent);
293        }
294    }
295
296    Ok(())
297}
298
299/// A ref tip to be used as a naming source.
300struct TipEntry {
301    /// The object ID stored directly in the ref.
302    object_oid: ObjectId,
303    /// Short display name for output (e.g. `"main"`, `"tags/v1.0"`).
304    display_name: String,
305    /// Peeled commit OID, if the ref ultimately resolves to a commit.
306    commit_oid: Option<ObjectId>,
307    /// Tagger date (for annotated tags) or committer date.
308    taggerdate: i64,
309    /// True when the ref is under `refs/tags/`.
310    from_tag: bool,
311    /// True when the object was a tag and we peeled it to reach the commit.
312    deref: bool,
313}
314
315/// Collect all ref tips from the repository, applying option filters.
316fn collect_tips(repo: &Repository, options: &NameRevOptions) -> Result<Vec<TipEntry>> {
317    let all_refs = refs::list_refs(&repo.git_dir, "refs/")?;
318    let mut tips: Vec<TipEntry> = Vec::new();
319
320    for (refname, oid) in all_refs {
321        if options.tags_only && !refname.starts_with("refs/tags/") {
322            continue;
323        }
324
325        // Exclude filters.
326        if options
327            .exclude_filters
328            .iter()
329            .any(|pat| subpath_matches(&refname, pat))
330        {
331            continue;
332        }
333
334        // Include filters (if specified, at least one must match).
335        let can_abbreviate = if !options.ref_filters.is_empty() {
336            let mut matched = false;
337            let mut subpath_match = false;
338            for pat in &options.ref_filters {
339                match subpath_match_kind(&refname, pat) {
340                    SubpathMatch::Full => matched = true,
341                    SubpathMatch::Sub => {
342                        matched = true;
343                        subpath_match = true;
344                    }
345                    SubpathMatch::None => {}
346                }
347            }
348            if !matched {
349                continue;
350            }
351            subpath_match
352        } else {
353            // No filter: can abbreviate if tags_only (and caller also requests name_only,
354            // but we handle that at the CLI layer).
355            false
356        };
357
358        let from_tag = refname.starts_with("refs/tags/");
359        let display_name = shorten_refname(&refname, can_abbreviate || options.shorten_tags);
360
361        // Peel the object: follow tag chains until we reach a commit.
362        let (commit_oid, taggerdate, deref) = peel_to_commit(repo, oid)?;
363
364        tips.push(TipEntry {
365            object_oid: oid,
366            display_name,
367            commit_oid,
368            taggerdate,
369            from_tag,
370            deref,
371        });
372    }
373
374    // Sort: tags first (from_tag=true sorts before false), then older taggerdate.
375    tips.sort_by(|a, b| {
376        let tag_cmp = b.from_tag.cmp(&a.from_tag); // true > false, so b first = tags first
377        if tag_cmp != std::cmp::Ordering::Equal {
378            return tag_cmp;
379        }
380        a.taggerdate.cmp(&b.taggerdate)
381    });
382
383    Ok(tips)
384}
385
386/// Peel an object ID through tag layers to find the underlying commit.
387///
388/// Returns `(commit_oid, taggerdate, was_dereffed)`.  `taggerdate` is the
389/// tagger timestamp from the first (outermost) tag encountered, or the
390/// committer timestamp if the object is a plain commit.  `was_dereffed` is
391/// true if at least one tag layer was peeled.
392fn peel_to_commit(repo: &Repository, mut oid: ObjectId) -> Result<(Option<ObjectId>, i64, bool)> {
393    let mut deref = false;
394    let mut taggerdate: Option<i64> = None;
395
396    loop {
397        let obj = match repo.odb.read(&oid) {
398            Ok(o) => o,
399            Err(_) => return Ok((None, taggerdate.unwrap_or(0), deref)),
400        };
401
402        match obj.kind {
403            ObjectKind::Commit => {
404                let ts = if let Ok(c) = parse_commit(&obj.data) {
405                    parse_signature_time(&c.committer)
406                } else {
407                    0
408                };
409                let date = taggerdate.unwrap_or(ts);
410                return Ok((Some(oid), date, deref));
411            }
412            ObjectKind::Tag => {
413                let tag = match parse_tag(&obj.data) {
414                    Ok(t) => t,
415                    Err(_) => return Ok((None, taggerdate.unwrap_or(0), deref)),
416                };
417                // Record the outermost tag's date.
418                if taggerdate.is_none() {
419                    taggerdate = Some(tag.tagger.as_deref().map(parse_signature_time).unwrap_or(0));
420                }
421                oid = tag.object;
422                deref = true;
423            }
424            _ => return Ok((None, taggerdate.unwrap_or(0), deref)),
425        }
426    }
427}
428
429/// Walk all commits reachable from every ref and return their OIDs.
430///
431/// Used by `--all` mode to enumerate commits that should be named and printed.
432///
433/// # Errors
434///
435/// Returns object or I/O errors on failure.
436pub fn all_reachable_commits(repo: &Repository) -> Result<Vec<ObjectId>> {
437    let all_refs = refs::list_refs(&repo.git_dir, "refs/")?;
438    let mut seen: std::collections::HashSet<ObjectId> = std::collections::HashSet::new();
439    let mut queue: VecDeque<ObjectId> = VecDeque::new();
440
441    for (_, oid) in all_refs {
442        // Peel to commit if needed.
443        let (commit_oid, _, _) = peel_to_commit(repo, oid)?;
444        if let Some(c) = commit_oid {
445            if seen.insert(c) {
446                queue.push_back(c);
447            }
448        }
449    }
450
451    while let Some(oid) = queue.pop_front() {
452        let commit = match load_commit(repo, oid) {
453            Ok(c) => c,
454            Err(_) => continue,
455        };
456        for parent in commit.parents {
457            if seen.insert(parent) {
458                queue.push_back(parent);
459            }
460        }
461    }
462
463    let mut result: Vec<ObjectId> = seen.into_iter().collect();
464    result.sort();
465    Ok(result)
466}
467
468/// Shorten a fully-qualified ref name to its display form.
469///
470/// - `can_abbreviate = true`: strip `refs/heads/`, `refs/tags/`, or `refs/`
471///   (mimics Git's unambiguous-ref shortening for `--tags --name-only`).
472/// - `can_abbreviate = false`: strip only `refs/heads/`; other prefixes lose
473///   only the leading `refs/` component, preserving `tags/`, `remotes/`, etc.
474fn shorten_refname(refname: &str, can_abbreviate: bool) -> String {
475    if can_abbreviate {
476        if let Some(rest) = refname.strip_prefix("refs/heads/") {
477            return rest.to_owned();
478        }
479        if let Some(rest) = refname.strip_prefix("refs/tags/") {
480            return rest.to_owned();
481        }
482        if let Some(rest) = refname.strip_prefix("refs/") {
483            return rest.to_owned();
484        }
485        return refname.to_owned();
486    }
487    // Default: strip refs/heads/ only; everything else keeps its sub-namespace.
488    if let Some(rest) = refname.strip_prefix("refs/heads/") {
489        return rest.to_owned();
490    }
491    if let Some(rest) = refname.strip_prefix("refs/") {
492        return rest.to_owned();
493    }
494    refname.to_owned()
495}
496
497/// Possible outcomes of matching a ref path against a glob pattern.
498#[derive(PartialEq, Eq)]
499enum SubpathMatch {
500    /// The pattern matched the full path starting at position 0.
501    Full,
502    /// The pattern matched a sub-path (after a `/`).
503    Sub,
504    /// No match.
505    None,
506}
507
508/// Test whether `pattern` matches `path` or any slash-separated suffix of it.
509///
510/// Returns the kind of match found (full, sub-path, or none), matching
511/// Git's `subpath_matches` semantics.
512fn subpath_match_kind(path: &str, pattern: &str) -> SubpathMatch {
513    // Try the full path first.
514    if glob_matches(pattern, path) {
515        return SubpathMatch::Full;
516    }
517    // Try every sub-path (after each '/').
518    let mut rest = path;
519    while let Some(pos) = rest.find('/') {
520        rest = &rest[pos + 1..];
521        if glob_matches(pattern, rest) {
522            return SubpathMatch::Sub;
523        }
524    }
525    SubpathMatch::None
526}
527
528/// Return `true` when the path (or any sub-path) matches the pattern.
529fn subpath_matches(path: &str, pattern: &str) -> bool {
530    subpath_match_kind(path, pattern) != SubpathMatch::None
531}
532
533/// Minimal glob matcher supporting `*` (any sequence) and `?` (any single char).
534///
535/// All other characters match literally.
536fn glob_matches(pattern: &str, text: &str) -> bool {
537    let pat: Vec<char> = pattern.chars().collect();
538    let txt: Vec<char> = text.chars().collect();
539    glob_match_inner(&pat, &txt)
540}
541
542fn glob_match_inner(pat: &[char], txt: &[char]) -> bool {
543    match (pat.first(), txt.first()) {
544        (None, None) => true,
545        (None, Some(_)) => false,
546        (Some('*'), _) => {
547            // '*' matches zero or more characters.
548            glob_match_inner(&pat[1..], txt)
549                || (!txt.is_empty() && glob_match_inner(pat, &txt[1..]))
550        }
551        (Some('?'), Some(_)) => glob_match_inner(&pat[1..], &txt[1..]),
552        (Some('?'), None) => false,
553        (Some(p), Some(t)) => p == t && glob_match_inner(&pat[1..], &txt[1..]),
554        (Some(_), None) => false,
555    }
556}
557
558/// Parse the Unix timestamp from a Git signature string.
559///
560/// A signature has the form `"Name <email> <timestamp> <tz>"`.  This returns
561/// the second-to-last whitespace-delimited token parsed as an `i64`, or `0`
562/// on parse failure.
563pub(crate) fn parse_signature_time(sig: &str) -> i64 {
564    committer_unix_seconds_for_ordering(sig)
565}
566
567/// Load and parse a commit object, using a cache to avoid re-reading.
568fn load_commit_cached<'c>(
569    repo: &Repository,
570    cache: &'c mut HashMap<ObjectId, CommitData>,
571    oid: ObjectId,
572) -> Result<&'c CommitData> {
573    if let std::collections::hash_map::Entry::Vacant(e) = cache.entry(oid) {
574        let obj = repo.odb.read(&oid)?;
575        if obj.kind != ObjectKind::Commit {
576            return Err(Error::CorruptObject(format!(
577                "object {oid} is not a commit"
578            )));
579        }
580        let commit = parse_commit(&obj.data)?;
581        e.insert(commit);
582    }
583    cache
584        .get(&oid)
585        .ok_or_else(|| Error::CorruptObject(format!("commit {oid} missing from cache")))
586}
587
588/// Load and parse a commit object from the object database.
589fn load_commit(repo: &Repository, oid: ObjectId) -> Result<CommitData> {
590    let obj = repo.odb.read(&oid)?;
591    if obj.kind != ObjectKind::Commit {
592        return Err(Error::CorruptObject(format!(
593            "object {oid} is not a commit"
594        )));
595    }
596    parse_commit(&obj.data)
597}
598
599/// Resolve a full 40-hex OID string, an abbreviated OID, or a ref name to an
600/// [`ObjectId`].
601///
602/// Used when the caller passes raw OID strings on the command line.
603///
604/// # Errors
605///
606/// Returns [`Error::ObjectNotFound`] when the spec cannot be resolved.
607pub fn resolve_oid(repo: &Repository, spec: &str) -> Result<ObjectId> {
608    crate::rev_parse::resolve_revision(repo, spec)
609}
610
611/// Look up the best name for `oid` in the given map, peeling tags first.
612///
613/// For non-commit objects (e.g. a tag object), the map may contain an entry
614/// under the *underlying* commit OID.  This function tries `oid` directly, and
615/// if that misses (and the object is a tag), it peels and retries.
616///
617/// Returns `None` when no name is available.
618///
619/// # Errors
620///
621/// Returns object read errors.
622pub fn lookup_name<'m>(
623    repo: &Repository,
624    name_map: &'m HashMap<ObjectId, String>,
625    oid: ObjectId,
626) -> Result<Option<&'m String>> {
627    // Fast path: direct hit (works for commits and exact-ref matches).
628    if let Some(name) = name_map.get(&oid) {
629        return Ok(Some(name));
630    }
631
632    // For tag objects: peel to commit and try again.
633    let obj = match repo.odb.read(&oid) {
634        Ok(o) => o,
635        Err(_) => return Ok(None),
636    };
637    if obj.kind == ObjectKind::Tag {
638        let (commit_oid, _, _) = peel_to_commit(repo, oid)?;
639        if let Some(c) = commit_oid {
640            return Ok(name_map.get(&c));
641        }
642    }
643    Ok(None)
644}
645
646/// Annotate a line of text: wherever a 40-hex sequence appears whose OID is in
647/// `name_map`, append ` (<name>)` after the hex string (or replace the hex
648/// string with the name when `name_only` is true).
649///
650/// Returns the annotated line (including trailing newline if the original had
651/// one).
652pub fn annotate_line(
653    repo: &Repository,
654    name_map: &HashMap<ObjectId, String>,
655    line: &str,
656    name_only: bool,
657) -> Result<String> {
658    let mut out = String::with_capacity(line.len() + 32);
659    let chars: Vec<char> = line.chars().collect();
660    let hex_len = 40usize;
661    let mut i = 0usize;
662    let mut flush_start = 0usize;
663
664    while i + hex_len <= chars.len() {
665        // Check if chars[i..i+hex_len] is all hex and the char after is not hex.
666        let slice: String = chars[i..i + hex_len].iter().collect();
667        let after_is_hex = chars
668            .get(i + hex_len)
669            .map(|c| c.is_ascii_hexdigit())
670            .unwrap_or(false);
671        if !after_is_hex && slice.chars().all(|c| c.is_ascii_hexdigit()) {
672            // Try to resolve this as an OID.
673            if let Ok(oid) = slice.parse::<ObjectId>() {
674                if let Ok(Some(name)) = lookup_name(repo, name_map, oid) {
675                    // Flush everything before this match.
676                    let prefix: String = chars[flush_start..i].iter().collect();
677                    out.push_str(&prefix);
678
679                    if name_only {
680                        out.push_str(name);
681                    } else {
682                        out.push_str(&slice);
683                        out.push_str(" (");
684                        out.push_str(name);
685                        out.push(')');
686                    }
687                    flush_start = i + hex_len;
688                    i += hex_len;
689                    continue;
690                }
691            }
692        }
693        i += 1;
694    }
695
696    // Flush remainder.
697    let tail: String = chars[flush_start..].iter().collect();
698    out.push_str(&tail);
699    Ok(out)
700}
701
702/// Return a short abbreviated hex string for `oid` (first `len` hex chars).
703#[must_use]
704pub fn abbrev_oid(oid: ObjectId, len: usize) -> String {
705    let hex = oid.to_hex();
706    let n = len.clamp(4, 40).min(hex.len());
707    hex[..n].to_owned()
708}
709
710/// Return all commits reachable from `refs/` sorted by OID (for `--all` mode).
711///
712/// This is an alias for [`all_reachable_commits`] exposed for the CLI.
713pub use self::all_reachable_commits as walk_all_commits;
714
715/// Check if a path exists in the object database (either OID).
716pub fn object_exists(repo: &Repository, oid: ObjectId) -> bool {
717    repo.odb.exists(&oid)
718}