Skip to main content

grit_lib/
name_rev.rs

1//! Name-rev: name commits relative to refs.
2//!
3//! This module implements the algorithm behind `git name-rev`: given a set of
4//! refs it walks backwards through the commit graph and assigns each reachable
5//! commit a human-readable name derived from the nearest ref tip.
6//!
7//! # Name format
8//!
9//! - Directly pointed to by a ref: `<refname>` (e.g. `tags/v1.0`, `main`)
10//! - Tag object pointing at the commit: `<refname>^0`
11//! - N first-parent hops from a tip: `<refname>~N`
12//! - Second or later parent of a merge: `<refname>^2`, `<refname>~3^2`, etc.
13
14use crate::error::{Error, Result};
15use crate::objects::{parse_commit, parse_tag, CommitData, ObjectId, ObjectKind};
16use crate::refs;
17use crate::repo::Repository;
18use std::collections::{HashMap, VecDeque};
19
20/// How many first-parent generations are preferred over a single merge hop.
21const MERGE_TRAVERSAL_WEIGHT: u32 = 65_535;
22
23/// Internal per-commit naming record.
24#[derive(Clone, Debug)]
25struct RevName {
26    /// The ref-derived name for the tip of this naming chain.
27    ///
28    /// This is stored verbatim (e.g. `"main"` or `"tags/v1.0^0"`); the final
29    /// `~N` generation suffix is appended only at display time.
30    tip_name: String,
31    /// Tagger / committer date of the originating ref tip (Unix timestamp).
32    taggerdate: i64,
33    /// Number of first-parent hops from the naming tip to this commit.
34    generation: u32,
35    /// Weighted hop count used for name-quality comparisons.
36    distance: u32,
37    /// Whether the originating ref is under `refs/tags/`.
38    from_tag: bool,
39}
40
41/// Options that control which refs participate in naming.
42#[derive(Debug, Default, Clone)]
43pub struct NameRevOptions {
44    /// When true, only `refs/tags/` refs are used.
45    pub tags_only: bool,
46    /// When true, tag ref names are shortened to bare names (strips `tags/`).
47    ///
48    /// In Git this is enabled by `--tags --name-only` together.  Pass this as
49    /// `tags_only && name_only` from the CLI layer.
50    pub shorten_tags: bool,
51    /// Glob patterns — only refs whose path (or any sub-path) matches at least
52    /// one pattern are used.  Empty means "all refs pass".
53    pub ref_filters: Vec<String>,
54    /// Glob patterns — refs whose path (or any sub-path) matches are excluded.
55    pub exclude_filters: Vec<String>,
56}
57
58/// Build the complete `ObjectId → display-name` mapping for all commits
59/// reachable from the applicable refs in `repo`.
60///
61/// The returned map contains every commit that could be named given the
62/// supplied `options`.  Commits not reachable from any passing ref are absent
63/// from the map.
64///
65/// # Errors
66///
67/// Returns [`Error::Io`] for filesystem problems and [`Error::CorruptObject`]
68/// for malformed objects.
69pub fn build_name_map(
70    repo: &Repository,
71    options: &NameRevOptions,
72) -> Result<HashMap<ObjectId, String>> {
73    let tips = collect_tips(repo, options)?;
74    let mut names: HashMap<ObjectId, RevName> = HashMap::new();
75
76    let mut commit_cache: HashMap<ObjectId, CommitData> = HashMap::new();
77
78    for tip in &tips {
79        let Some(commit_oid) = tip.commit_oid else {
80            continue;
81        };
82        name_from_tip(
83            repo,
84            &mut names,
85            &mut commit_cache,
86            commit_oid,
87            &tip.display_name,
88            tip.taggerdate,
89            tip.from_tag,
90            tip.deref,
91        )?;
92    }
93
94    Ok(names
95        .into_iter()
96        .map(|(oid, name)| (oid, format_name(&name)))
97        .collect())
98}
99
100/// Return the display name for a named commit.
101///
102/// - `generation == 0`: returns `tip_name` as-is.
103/// - `generation > 0`: returns `<tip_name>~<generation>` with any trailing
104///   `"^0"` stripped from `tip_name` first.
105fn format_name(name: &RevName) -> String {
106    if name.generation == 0 {
107        return name.tip_name.clone();
108    }
109    let base = name.tip_name.strip_suffix("^0").unwrap_or(&name.tip_name);
110    format!("{}~{}", base, name.generation)
111}
112
113/// Compute the effective comparison distance for a (distance, generation) pair.
114///
115/// First-parent chains add [`MERGE_TRAVERSAL_WEIGHT`] to make merge-traversal
116/// paths look "closer" (lower distance) than deep first-parent chains when
117/// generation > 0.
118fn effective_distance(distance: u32, generation: u32) -> u32 {
119    distance.saturating_add(if generation > 0 {
120        MERGE_TRAVERSAL_WEIGHT
121    } else {
122        0
123    })
124}
125
126/// Return `true` when the proposed name is strictly better than the existing one.
127///
128/// Preference order:
129/// 1. Tags over non-tags.
130/// 2. Smaller effective distance.
131/// 3. Older tagger/committer date (more stable refs first).
132fn is_better_name(
133    existing: &RevName,
134    taggerdate: i64,
135    generation: u32,
136    distance: u32,
137    from_tag: bool,
138) -> bool {
139    let existing_eff = effective_distance(existing.distance, existing.generation);
140    let new_eff = effective_distance(distance, generation);
141
142    // Tags beat non-tags.
143    if from_tag && existing.from_tag {
144        return existing_eff > new_eff;
145    }
146    if existing.from_tag != from_tag {
147        return from_tag;
148    }
149
150    // Both non-tags: prefer smaller effective distance.
151    if existing_eff != new_eff {
152        return existing_eff > new_eff;
153    }
154
155    // Tiebreak by older date (more stable / earlier tag).
156    if existing.taggerdate != taggerdate {
157        return existing.taggerdate > taggerdate;
158    }
159
160    false
161}
162
163/// Derive the `tip_name` for a non-first parent.
164///
165/// Uses the *current* commit's name (not the parent's) so that e.g. the second
166/// parent of `main~3` is named `main~3^2`.
167fn get_parent_name(current: &RevName, parent_number: u32) -> String {
168    let base = current
169        .tip_name
170        .strip_suffix("^0")
171        .unwrap_or(&current.tip_name);
172    if current.generation > 0 {
173        format!("{}~{}^{}", base, current.generation, parent_number)
174    } else {
175        format!("{}^{}", base, parent_number)
176    }
177}
178
179/// Walk backwards from `start_oid`, assigning names to all reachable ancestors.
180///
181/// Uses an explicit stack to avoid deep recursion on long histories.
182fn name_from_tip(
183    repo: &Repository,
184    names: &mut HashMap<ObjectId, RevName>,
185    commit_cache: &mut HashMap<ObjectId, CommitData>,
186    start_oid: ObjectId,
187    tip_name: &str,
188    taggerdate: i64,
189    from_tag: bool,
190    deref: bool,
191) -> Result<()> {
192    let actual_tip_name = if deref {
193        format!("{}^0", tip_name)
194    } else {
195        tip_name.to_owned()
196    };
197
198    // Seed the starting commit.
199    let should_start = match names.get(&start_oid) {
200        None => true,
201        Some(existing) => is_better_name(existing, taggerdate, 0, 0, from_tag),
202    };
203    if !should_start {
204        return Ok(());
205    }
206    names.insert(
207        start_oid,
208        RevName {
209            tip_name: actual_tip_name,
210            taggerdate,
211            generation: 0,
212            distance: 0,
213            from_tag,
214        },
215    );
216
217    // Stack-based DFS; first parent is processed first.
218    let mut stack: Vec<ObjectId> = vec![start_oid];
219
220    while let Some(oid) = stack.pop() {
221        let current = match names.get(&oid) {
222            Some(n) => n.clone(),
223            None => continue,
224        };
225
226        let commit = match load_commit_cached(repo, commit_cache, oid) {
227            Ok(c) => c,
228            Err(_) => continue,
229        };
230        // Clone parents out so we can release the borrow.
231        let parents = commit.parents.clone();
232
233        let mut to_push: Vec<ObjectId> = Vec::new();
234
235        for (idx, parent_oid) in parents.iter().enumerate() {
236            let parent_number = (idx + 1) as u32;
237
238            let (parent_gen, parent_dist) = if parent_number > 1 {
239                (
240                    0u32,
241                    current.distance.saturating_add(MERGE_TRAVERSAL_WEIGHT),
242                )
243            } else {
244                (
245                    current.generation.saturating_add(1),
246                    current.distance.saturating_add(1),
247                )
248            };
249
250            let should_update = match names.get(parent_oid) {
251                None => true,
252                Some(existing) => {
253                    is_better_name(existing, taggerdate, parent_gen, parent_dist, from_tag)
254                }
255            };
256
257            if should_update {
258                let parent_tip_name = if parent_number > 1 {
259                    get_parent_name(&current, parent_number)
260                } else {
261                    current.tip_name.clone()
262                };
263
264                names.insert(
265                    *parent_oid,
266                    RevName {
267                        tip_name: parent_tip_name,
268                        taggerdate,
269                        generation: parent_gen,
270                        distance: parent_dist,
271                        from_tag,
272                    },
273                );
274                to_push.push(*parent_oid);
275            }
276        }
277
278        for parent in to_push.into_iter().rev() {
279            stack.push(parent);
280        }
281    }
282
283    Ok(())
284}
285
286/// A ref tip to be used as a naming source.
287struct TipEntry {
288    /// Short display name for output (e.g. `"main"`, `"tags/v1.0"`).
289    display_name: String,
290    /// Peeled commit OID, if the ref ultimately resolves to a commit.
291    commit_oid: Option<ObjectId>,
292    /// Tagger date (for annotated tags) or committer date.
293    taggerdate: i64,
294    /// True when the ref is under `refs/tags/`.
295    from_tag: bool,
296    /// True when the object was a tag and we peeled it to reach the commit.
297    deref: bool,
298}
299
300/// Collect all ref tips from the repository, applying option filters.
301fn collect_tips(repo: &Repository, options: &NameRevOptions) -> Result<Vec<TipEntry>> {
302    let all_refs = refs::list_refs(&repo.git_dir, "refs/")?;
303    let mut tips: Vec<TipEntry> = Vec::new();
304
305    for (refname, oid) in all_refs {
306        if options.tags_only && !refname.starts_with("refs/tags/") {
307            continue;
308        }
309
310        // Exclude filters.
311        if options
312            .exclude_filters
313            .iter()
314            .any(|pat| subpath_matches(&refname, pat))
315        {
316            continue;
317        }
318
319        // Include filters (if specified, at least one must match).
320        let can_abbreviate = if !options.ref_filters.is_empty() {
321            let mut matched = false;
322            let mut subpath_match = false;
323            for pat in &options.ref_filters {
324                match subpath_match_kind(&refname, pat) {
325                    SubpathMatch::Full => matched = true,
326                    SubpathMatch::Sub => {
327                        matched = true;
328                        subpath_match = true;
329                    }
330                    SubpathMatch::None => {}
331                }
332            }
333            if !matched {
334                continue;
335            }
336            subpath_match
337        } else {
338            // No filter: can abbreviate if tags_only (and caller also requests name_only,
339            // but we handle that at the CLI layer).
340            false
341        };
342
343        let from_tag = refname.starts_with("refs/tags/");
344        let display_name = shorten_refname(&refname, can_abbreviate || options.shorten_tags);
345
346        // Peel the object: follow tag chains until we reach a commit.
347        let (commit_oid, taggerdate, deref) = peel_to_commit(repo, oid)?;
348
349        tips.push(TipEntry {
350            display_name,
351            commit_oid,
352            taggerdate,
353            from_tag,
354            deref,
355        });
356    }
357
358    // Sort: tags first (from_tag=true sorts before false), then older taggerdate.
359    tips.sort_by(|a, b| {
360        let tag_cmp = b.from_tag.cmp(&a.from_tag); // true > false, so b first = tags first
361        if tag_cmp != std::cmp::Ordering::Equal {
362            return tag_cmp;
363        }
364        a.taggerdate.cmp(&b.taggerdate)
365    });
366
367    Ok(tips)
368}
369
370/// Peel an object ID through tag layers to find the underlying commit.
371///
372/// Returns `(commit_oid, taggerdate, was_dereffed)`.  `taggerdate` is the
373/// tagger timestamp from the first (outermost) tag encountered, or the
374/// committer timestamp if the object is a plain commit.  `was_dereffed` is
375/// true if at least one tag layer was peeled.
376fn peel_to_commit(repo: &Repository, mut oid: ObjectId) -> Result<(Option<ObjectId>, i64, bool)> {
377    let mut deref = false;
378    let mut taggerdate: Option<i64> = None;
379
380    loop {
381        let obj = match repo.odb.read(&oid) {
382            Ok(o) => o,
383            Err(_) => return Ok((None, taggerdate.unwrap_or(0), deref)),
384        };
385
386        match obj.kind {
387            ObjectKind::Commit => {
388                let ts = if let Ok(c) = parse_commit(&obj.data) {
389                    parse_signature_time(&c.committer)
390                } else {
391                    0
392                };
393                let date = taggerdate.unwrap_or(ts);
394                return Ok((Some(oid), date, deref));
395            }
396            ObjectKind::Tag => {
397                let tag = match parse_tag(&obj.data) {
398                    Ok(t) => t,
399                    Err(_) => return Ok((None, taggerdate.unwrap_or(0), deref)),
400                };
401                // Record the outermost tag's date.
402                if taggerdate.is_none() {
403                    taggerdate = Some(tag.tagger.as_deref().map(parse_signature_time).unwrap_or(0));
404                }
405                oid = tag.object;
406                deref = true;
407            }
408            _ => return Ok((None, taggerdate.unwrap_or(0), deref)),
409        }
410    }
411}
412
413/// Walk all commits reachable from every ref and return their OIDs.
414///
415/// Used by `--all` mode to enumerate commits that should be named and printed.
416///
417/// # Errors
418///
419/// Returns object or I/O errors on failure.
420pub fn all_reachable_commits(repo: &Repository) -> Result<Vec<ObjectId>> {
421    let all_refs = refs::list_refs(&repo.git_dir, "refs/")?;
422    let mut seen: std::collections::HashSet<ObjectId> = std::collections::HashSet::new();
423    let mut queue: VecDeque<ObjectId> = VecDeque::new();
424
425    for (_, oid) in all_refs {
426        // Peel to commit if needed.
427        let (commit_oid, _, _) = peel_to_commit(repo, oid)?;
428        if let Some(c) = commit_oid {
429            if seen.insert(c) {
430                queue.push_back(c);
431            }
432        }
433    }
434
435    while let Some(oid) = queue.pop_front() {
436        let commit = match load_commit(repo, oid) {
437            Ok(c) => c,
438            Err(_) => continue,
439        };
440        for parent in commit.parents {
441            if seen.insert(parent) {
442                queue.push_back(parent);
443            }
444        }
445    }
446
447    let mut result: Vec<ObjectId> = seen.into_iter().collect();
448    result.sort();
449    Ok(result)
450}
451
452/// Shorten a fully-qualified ref name to its display form.
453///
454/// - `can_abbreviate = true`: strip `refs/heads/`, `refs/tags/`, or `refs/`
455///   (mimics Git's unambiguous-ref shortening for `--tags --name-only`).
456/// - `can_abbreviate = false`: strip only `refs/heads/`; other prefixes lose
457///   only the leading `refs/` component, preserving `tags/`, `remotes/`, etc.
458fn shorten_refname(refname: &str, can_abbreviate: bool) -> String {
459    if can_abbreviate {
460        if let Some(rest) = refname.strip_prefix("refs/heads/") {
461            return rest.to_owned();
462        }
463        if let Some(rest) = refname.strip_prefix("refs/tags/") {
464            return rest.to_owned();
465        }
466        if let Some(rest) = refname.strip_prefix("refs/") {
467            return rest.to_owned();
468        }
469        return refname.to_owned();
470    }
471    // Default: strip refs/heads/ only; everything else keeps its sub-namespace.
472    if let Some(rest) = refname.strip_prefix("refs/heads/") {
473        return rest.to_owned();
474    }
475    if let Some(rest) = refname.strip_prefix("refs/") {
476        return rest.to_owned();
477    }
478    refname.to_owned()
479}
480
481/// Possible outcomes of matching a ref path against a glob pattern.
482#[derive(PartialEq, Eq)]
483enum SubpathMatch {
484    /// The pattern matched the full path starting at position 0.
485    Full,
486    /// The pattern matched a sub-path (after a `/`).
487    Sub,
488    /// No match.
489    None,
490}
491
492/// Test whether `pattern` matches `path` or any slash-separated suffix of it.
493///
494/// Returns the kind of match found (full, sub-path, or none), matching
495/// Git's `subpath_matches` semantics.
496fn subpath_match_kind(path: &str, pattern: &str) -> SubpathMatch {
497    // Try the full path first.
498    if glob_matches(pattern, path) {
499        return SubpathMatch::Full;
500    }
501    // Try every sub-path (after each '/').
502    let mut rest = path;
503    while let Some(pos) = rest.find('/') {
504        rest = &rest[pos + 1..];
505        if glob_matches(pattern, rest) {
506            return SubpathMatch::Sub;
507        }
508    }
509    SubpathMatch::None
510}
511
512/// Return `true` when the path (or any sub-path) matches the pattern.
513fn subpath_matches(path: &str, pattern: &str) -> bool {
514    subpath_match_kind(path, pattern) != SubpathMatch::None
515}
516
517/// Minimal glob matcher supporting `*` (any sequence) and `?` (any single char).
518///
519/// All other characters match literally.
520fn glob_matches(pattern: &str, text: &str) -> bool {
521    let pat: Vec<char> = pattern.chars().collect();
522    let txt: Vec<char> = text.chars().collect();
523    glob_match_inner(&pat, &txt)
524}
525
526fn glob_match_inner(pat: &[char], txt: &[char]) -> bool {
527    match (pat.first(), txt.first()) {
528        (None, None) => true,
529        (None, Some(_)) => false,
530        (Some('*'), _) => {
531            // '*' matches zero or more characters.
532            glob_match_inner(&pat[1..], txt)
533                || (!txt.is_empty() && glob_match_inner(pat, &txt[1..]))
534        }
535        (Some('?'), Some(_)) => glob_match_inner(&pat[1..], &txt[1..]),
536        (Some('?'), None) => false,
537        (Some(p), Some(t)) => p == t && glob_match_inner(&pat[1..], &txt[1..]),
538        (Some(_), None) => false,
539    }
540}
541
542/// Parse the Unix timestamp from a Git signature string.
543///
544/// A signature has the form `"Name <email> <timestamp> <tz>"`.  This returns
545/// the second-to-last whitespace-delimited token parsed as an `i64`, or `0`
546/// on parse failure.
547pub(crate) fn parse_signature_time(sig: &str) -> i64 {
548    let parts: Vec<&str> = sig.split_whitespace().collect();
549    if parts.len() < 2 {
550        return 0;
551    }
552    parts[parts.len().saturating_sub(2)]
553        .parse::<i64>()
554        .unwrap_or(0)
555}
556
557/// Load and parse a commit object, using a cache to avoid re-reading.
558fn load_commit_cached<'c>(
559    repo: &Repository,
560    cache: &'c mut HashMap<ObjectId, CommitData>,
561    oid: ObjectId,
562) -> Result<&'c CommitData> {
563    if let std::collections::hash_map::Entry::Vacant(e) = cache.entry(oid) {
564        let obj = repo.odb.read(&oid)?;
565        if obj.kind != ObjectKind::Commit {
566            return Err(Error::CorruptObject(format!(
567                "object {oid} is not a commit"
568            )));
569        }
570        let commit = parse_commit(&obj.data)?;
571        e.insert(commit);
572    }
573    Ok(cache.get(&oid).unwrap())
574}
575
576/// Load and parse a commit object from the object database.
577fn load_commit(repo: &Repository, oid: ObjectId) -> Result<CommitData> {
578    let obj = repo.odb.read(&oid)?;
579    if obj.kind != ObjectKind::Commit {
580        return Err(Error::CorruptObject(format!(
581            "object {oid} is not a commit"
582        )));
583    }
584    parse_commit(&obj.data)
585}
586
587/// Resolve a full 40-hex OID string, an abbreviated OID, or a ref name to an
588/// [`ObjectId`].
589///
590/// Used when the caller passes raw OID strings on the command line.
591///
592/// # Errors
593///
594/// Returns [`Error::ObjectNotFound`] when the spec cannot be resolved.
595pub fn resolve_oid(repo: &Repository, spec: &str) -> Result<ObjectId> {
596    crate::rev_parse::resolve_revision(repo, spec)
597}
598
599/// Look up the best name for `oid` in the given map, peeling tags first.
600///
601/// For non-commit objects (e.g. a tag object), the map may contain an entry
602/// under the *underlying* commit OID.  This function tries `oid` directly, and
603/// if that misses (and the object is a tag), it peels and retries.
604///
605/// Returns `None` when no name is available.
606///
607/// # Errors
608///
609/// Returns object read errors.
610pub fn lookup_name<'m>(
611    repo: &Repository,
612    name_map: &'m HashMap<ObjectId, String>,
613    oid: ObjectId,
614) -> Result<Option<&'m String>> {
615    // Fast path: direct hit (works for commits and exact-ref matches).
616    if let Some(name) = name_map.get(&oid) {
617        return Ok(Some(name));
618    }
619
620    // For tag objects: peel to commit and try again.
621    let obj = match repo.odb.read(&oid) {
622        Ok(o) => o,
623        Err(_) => return Ok(None),
624    };
625    if obj.kind == ObjectKind::Tag {
626        let (commit_oid, _, _) = peel_to_commit(repo, oid)?;
627        if let Some(c) = commit_oid {
628            return Ok(name_map.get(&c));
629        }
630    }
631    Ok(None)
632}
633
634/// Annotate a line of text: wherever a 40-hex sequence appears whose OID is in
635/// `name_map`, append ` (<name>)` after the hex string (or replace the hex
636/// string with the name when `name_only` is true).
637///
638/// Returns the annotated line (including trailing newline if the original had
639/// one).
640pub fn annotate_line(
641    repo: &Repository,
642    name_map: &HashMap<ObjectId, String>,
643    line: &str,
644    name_only: bool,
645) -> Result<String> {
646    let mut out = String::with_capacity(line.len() + 32);
647    let chars: Vec<char> = line.chars().collect();
648    let hex_len = 40usize;
649    let mut i = 0usize;
650    let mut flush_start = 0usize;
651
652    while i + hex_len <= chars.len() {
653        // Check if chars[i..i+hex_len] is all hex and the char after is not hex.
654        let slice: String = chars[i..i + hex_len].iter().collect();
655        let after_is_hex = chars
656            .get(i + hex_len)
657            .map(|c| c.is_ascii_hexdigit())
658            .unwrap_or(false);
659        if !after_is_hex && slice.chars().all(|c| c.is_ascii_hexdigit()) {
660            // Try to resolve this as an OID.
661            if let Ok(oid) = slice.parse::<ObjectId>() {
662                if let Ok(Some(name)) = lookup_name(repo, name_map, oid) {
663                    // Flush everything before this match.
664                    let prefix: String = chars[flush_start..i].iter().collect();
665                    out.push_str(&prefix);
666
667                    if name_only {
668                        out.push_str(name);
669                    } else {
670                        out.push_str(&slice);
671                        out.push_str(" (");
672                        out.push_str(name);
673                        out.push(')');
674                    }
675                    flush_start = i + hex_len;
676                    i += hex_len;
677                    continue;
678                }
679            }
680        }
681        i += 1;
682    }
683
684    // Flush remainder.
685    let tail: String = chars[flush_start..].iter().collect();
686    out.push_str(&tail);
687    Ok(out)
688}
689
690/// Return a short abbreviated hex string for `oid` (first `len` hex chars).
691#[must_use]
692pub fn abbrev_oid(oid: ObjectId, len: usize) -> String {
693    let hex = oid.to_hex();
694    let n = len.clamp(4, 40).min(hex.len());
695    hex[..n].to_owned()
696}
697
698/// Return all commits reachable from `refs/` sorted by OID (for `--all` mode).
699///
700/// This is an alias for [`all_reachable_commits`] exposed for the CLI.
701pub use self::all_reachable_commits as walk_all_commits;
702
703/// Check if a path exists in the object database (either OID).
704pub fn object_exists(repo: &Repository, oid: ObjectId) -> bool {
705    repo.odb.exists(&oid)
706}