Skip to main content

mkit_cli/commands/
revspec.rs

1//! Shared revision-spec resolver (issue #227, parent #226).
2//!
3//! [`resolve_revision`] turns a user-supplied revision string into a
4//! single object [`Hash`](tyalias@mkit_core::Hash). It is the keystone the diff/checkout/
5//! cherry-pick/bisect commands build on so they all accept the same
6//! grammar instead of each one hand-rolling a `hash::from_hex` call.
7//!
8//! Accepted grammar (a deliberately small subset of `git rev-parse`):
9//!
10//! - **Full hash** — exactly 64 hex chars (lower or upper), present in
11//!   the object store.
12//! - **Short hash** — a hex prefix of at least [`MIN_SHORT_HASH`] chars
13//!   that unambiguously names exactly one object in the store. An
14//!   ambiguous prefix (≥ 2 matches) is an error; a prefix matching none
15//!   is an error.
16//! - **Ref name** — a branch (`refs/heads/<name>`), a tag
17//!   (`refs/tags/<name>`), or the literal `HEAD`. Branches win over
18//!   tags on a name collision (matching the precedence the existing
19//!   `checkout` command used).
20//! - **Suffix navigation** — a `<base>` from any of the above followed
21//!   by zero or more `~n` / `^` / `^n` steps walking the commit's
22//!   first-parent chain. `~n` walks `n` first-parents (`~` alone == `~1`);
23//!   `^` / `^n` selects the n-th parent (`^` == `^1`, 1-based). `^0` is
24//!   the commit itself. `HEAD~2`, `main^`, `<hash>~1^2` all parse.
25//!
26//! The resolver intentionally does NOT accept pathspecs, `:/text`
27//! message searches, reflog (`@{n}`) syntax, or ranges — `A..B` ranges
28//! are split by the caller (`diff`) before each end reaches here.
29
30use mkit_core::hash::{self, HEX_LEN, Hash};
31use mkit_core::object::Object;
32use mkit_core::refs;
33use mkit_core::store::ObjectStore;
34use std::path::Path;
35
36/// Minimum length of an accepted short-hash prefix. Shorter prefixes are
37/// rejected as too-ambiguous-by-construction rather than scanned, so a
38/// stray 1–3 char token (e.g. a typo'd ref) fails fast.
39pub const MIN_SHORT_HASH: usize = 4;
40
41/// Errors raised while resolving a revision string.
42#[derive(Debug, thiserror::Error)]
43pub enum RevError {
44    /// The base token matched no ref and no object.
45    #[error("unknown revision '{0}'")]
46    Unknown(String),
47    /// A short-hash prefix matched more than one object.
48    #[error("ambiguous short hash '{0}' matches multiple objects")]
49    Ambiguous(String),
50    /// A `~`/`^` suffix walked off the end of the commit graph.
51    #[error("revision '{spec}': {detail}")]
52    BadSuffix {
53        /// The full spec the user supplied.
54        spec: String,
55        /// What specifically went wrong.
56        detail: String,
57    },
58    /// The base resolved but the suffix navigation hit a non-commit
59    /// object (a tree/blob cannot have parents).
60    #[error("revision '{0}' resolves to a non-commit object; cannot walk parents")]
61    NotACommit(String),
62    /// Underlying ref / store failure.
63    #[error("{0}")]
64    Backend(String),
65}
66
67/// Resolve `spec` to a single object [`Hash`](tyalias@mkit_core::Hash).
68///
69/// See the module docstring for the accepted grammar.
70///
71/// # Errors
72/// Returns [`RevError`] when the spec is unknown, ambiguous, or the
73/// suffix navigation is invalid.
74pub fn resolve_revision(
75    store: &ObjectStore,
76    mkit_dir: &Path,
77    spec: &str,
78) -> Result<Hash, RevError> {
79    // Split the base from any trailing `~`/`^` navigation. The base is
80    // everything up to the first `~` or `^`.
81    let split_at = spec.find(['~', '^']).unwrap_or(spec.len());
82    let (base, suffix) = spec.split_at(split_at);
83    if base.is_empty() {
84        return Err(RevError::Unknown(spec.to_string()));
85    }
86
87    let mut current = resolve_base(store, mkit_dir, base)?;
88
89    // Walk the suffix steps left-to-right.
90    let mut rest = suffix;
91    while !rest.is_empty() {
92        let bytes = rest.as_bytes();
93        match bytes[0] {
94            b'~' => {
95                let (n, consumed) = parse_count(&rest[1..]);
96                current = walk_first_parent(store, spec, current, n)?;
97                rest = &rest[1 + consumed..];
98            }
99            b'^' => {
100                let (n, consumed) = parse_count(&rest[1..]);
101                current = select_parent(store, spec, current, n)?;
102                rest = &rest[1 + consumed..];
103            }
104            _ => {
105                return Err(RevError::BadSuffix {
106                    spec: spec.to_string(),
107                    detail: format!("unexpected character in suffix '{rest}'"),
108                });
109            }
110        }
111    }
112    Ok(current)
113}
114
115/// Resolve the base token (no `~`/`^` suffix) to a hash: ref, then
116/// full/short hash.
117fn resolve_base(store: &ObjectStore, mkit_dir: &Path, base: &str) -> Result<Hash, RevError> {
118    // 1. HEAD.
119    if base == "HEAD" {
120        return match refs::resolve_head(mkit_dir) {
121            Ok(Some(h)) => Ok(h),
122            Ok(None) => Err(RevError::Unknown("HEAD".to_string())),
123            Err(e) => Err(RevError::Backend(format!("resolve HEAD: {e}"))),
124        };
125    }
126
127    // 2. Explicit full ref paths: refs/heads/<b>, refs/tags/<t>,
128    //    refs/remotes/<r>/<b>. These are unambiguous and checked
129    //    before any short-form guessing.
130    if let Some(short) = base.strip_prefix("refs/heads/") {
131        if let Ok(Some(h)) = refs::read_ref(mkit_dir, short) {
132            return Ok(h);
133        }
134        return Err(RevError::Unknown(base.to_string()));
135    }
136    if let Some(short) = base.strip_prefix("refs/tags/") {
137        if let Ok(Some(h)) = refs::read_tag(mkit_dir, short) {
138            return Ok(h);
139        }
140        return Err(RevError::Unknown(base.to_string()));
141    }
142    if let Some(rest) = base.strip_prefix("refs/remotes/") {
143        if let Some((remote, branch)) = rest.split_once('/')
144            && let Ok(Some(h)) = refs::read_remote_ref(mkit_dir, remote, branch)
145        {
146            return Ok(h);
147        }
148        return Err(RevError::Unknown(base.to_string()));
149    }
150
151    // 3. Branch (refs/heads), then tag (refs/tags). `read_ref` /
152    //    `read_tag` validate the name, returning `InvalidRefName` for a
153    //    bad ref name — which we treat as "not a ref" and fall through
154    //    to hash parsing (a bare hash is not a valid ref name anyway).
155    if refs::validate_ref_name(base) {
156        if let Ok(Some(h)) = refs::read_ref(mkit_dir, base) {
157            return Ok(h);
158        }
159        if let Ok(Some(h)) = refs::read_tag(mkit_dir, base) {
160            return Ok(h);
161        }
162        // 3b. Remote-tracking short form `<remote>/<branch>` (git's
163        //     resolution order also places this after heads/tags).
164        if let Some((remote, branch)) = base.split_once('/')
165            && let Ok(Some(h)) = refs::read_remote_ref(mkit_dir, remote, branch)
166        {
167            return Ok(h);
168        }
169    }
170
171    // 4. Full 64-hex hash that exists in the store.
172    if base.len() == HEX_LEN
173        && let Ok(h) = hash::from_hex(base)
174    {
175        if store.contains(&h) {
176            return Ok(h);
177        }
178        return Err(RevError::Unknown(base.to_string()));
179    }
180
181    // 5. Short hex prefix.
182    if base.len() >= MIN_SHORT_HASH && base.len() < HEX_LEN && is_hex(base) {
183        return resolve_short_hash(store, base);
184    }
185
186    Err(RevError::Unknown(base.to_string()))
187}
188
189/// Find the unique object whose hex starts with `prefix`. Errors on
190/// zero matches ([`RevError::Unknown`]) or ≥ 2 matches
191/// ([`RevError::Ambiguous`]).
192fn resolve_short_hash(store: &ObjectStore, prefix: &str) -> Result<Hash, RevError> {
193    let lower = prefix.to_ascii_lowercase();
194    // Object layout: `objects/<2-hex>/<62-hex>`. The first two hex chars
195    // (when present) name the shard directory, so we only scan one
196    // shard. With a 1-char prefix (below MIN_SHORT_HASH, never reached)
197    // we would have to scan 16 shards; the >= MIN_SHORT_HASH gate makes
198    // the 2-char shard slice always available.
199    let (shard, file_prefix) = lower.split_at(2);
200    let shard_dir = store.objects_root().join(shard);
201    let iter = match std::fs::read_dir(&shard_dir) {
202        Ok(i) => i,
203        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
204            return Err(RevError::Unknown(prefix.to_string()));
205        }
206        Err(e) => return Err(RevError::Backend(format!("scan objects: {e}"))),
207    };
208
209    let mut found: Option<Hash> = None;
210    for entry in iter {
211        let entry = entry.map_err(|e| RevError::Backend(format!("scan objects: {e}")))?;
212        let Some(name) = entry.file_name().to_str().map(str::to_owned) else {
213            continue;
214        };
215        if name.len() != HEX_LEN - 2 || !name.starts_with(file_prefix) {
216            continue;
217        }
218        let full = format!("{shard}{name}");
219        let Ok(h) = hash::from_hex(&full) else {
220            continue;
221        };
222        if found.is_some() {
223            return Err(RevError::Ambiguous(prefix.to_string()));
224        }
225        found = Some(h);
226    }
227    found.ok_or_else(|| RevError::Unknown(prefix.to_string()))
228}
229
230/// Walk `n` first-parents from `commit`.
231fn walk_first_parent(
232    store: &ObjectStore,
233    spec: &str,
234    mut commit: Hash,
235    n: u32,
236) -> Result<Hash, RevError> {
237    for _ in 0..n {
238        let parents = parents_of(store, spec, &commit)?;
239        let Some(first) = parents.first() else {
240            return Err(RevError::BadSuffix {
241                spec: spec.to_string(),
242                detail: format!(
243                    "commit {} has no parent (history root reached)",
244                    hash::to_hex(&commit)
245                ),
246            });
247        };
248        commit = *first;
249    }
250    Ok(commit)
251}
252
253/// Select the `n`-th parent of `commit` (1-based). `n == 0` is the
254/// commit itself.
255fn select_parent(store: &ObjectStore, spec: &str, commit: Hash, n: u32) -> Result<Hash, RevError> {
256    if n == 0 {
257        return Ok(commit);
258    }
259    let parents = parents_of(store, spec, &commit)?;
260    let idx = (n - 1) as usize;
261    parents
262        .get(idx)
263        .copied()
264        .ok_or_else(|| RevError::BadSuffix {
265            spec: spec.to_string(),
266            detail: format!(
267                "commit {} has no parent #{n} (only {} parent(s))",
268                hash::to_hex(&commit),
269                parents.len()
270            ),
271        })
272}
273
274/// Read the parent list of a commit-or-remix object.
275fn parents_of(store: &ObjectStore, spec: &str, commit: &Hash) -> Result<Vec<Hash>, RevError> {
276    match store.read_object(commit) {
277        Ok(Object::Commit(c)) => Ok(c.parents),
278        Ok(Object::Remix(r)) => Ok(r.parents),
279        Ok(_) => Err(RevError::NotACommit(spec.to_string())),
280        Err(e) => Err(RevError::Backend(format!("read object: {e}"))),
281    }
282}
283
284/// Parse a leading decimal count from `s`, returning `(count, consumed)`.
285/// An absent count (next char is not a digit, or `s` is empty) yields
286/// `(1, 0)` — matching `git`'s `~` == `~1` and `^` == `^1`.
287fn parse_count(s: &str) -> (u32, usize) {
288    let digits: String = s.chars().take_while(char::is_ascii_digit).collect();
289    if digits.is_empty() {
290        (1, 0)
291    } else {
292        // Saturate on overflow: a `~99999999999` walk would hit the
293        // history root and error long before the count mattered, but we
294        // must not panic on parse.
295        let n = digits.parse::<u32>().unwrap_or(u32::MAX);
296        (n, digits.len())
297    }
298}
299
300fn is_hex(s: &str) -> bool {
301    !s.is_empty() && s.bytes().all(|b| b.is_ascii_hexdigit())
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307    use mkit_core::object::{Commit, Identity, Object};
308    use mkit_core::serialize;
309    use mkit_core::{MKIT_DIR, refs};
310    use tempfile::TempDir;
311
312    fn author() -> Identity {
313        Identity::ed25519([0u8; 32])
314    }
315
316    /// Build a throwaway repo with an object store + ref dir.
317    fn fresh_repo() -> (TempDir, ObjectStore, std::path::PathBuf) {
318        let dir = TempDir::new().unwrap();
319        let store = ObjectStore::init(dir.path()).unwrap();
320        let mkit_dir = dir.path().join(MKIT_DIR);
321        refs::init(&mkit_dir).unwrap();
322        (dir, store, mkit_dir)
323    }
324
325    /// Write a commit object with the given parents and return its hash.
326    fn write_commit(store: &ObjectStore, parents: Vec<Hash>, seed: u8) -> Hash {
327        let commit = Commit::new_unannotated(
328            [seed; 32],
329            parents,
330            author(),
331            [0u8; 32],
332            vec![seed],
333            u64::from(seed),
334            [0u8; 64],
335        );
336        let bytes = serialize::serialize(&Object::Commit(commit)).unwrap();
337        store.write(&bytes).unwrap()
338    }
339
340    #[test]
341    fn resolves_full_hash() {
342        let (_d, store, mkit) = fresh_repo();
343        let c = write_commit(&store, vec![], 1);
344        let hex = hash::to_hex(&c);
345        assert_eq!(resolve_revision(&store, &mkit, &hex).unwrap(), c);
346    }
347
348    #[test]
349    fn full_hash_not_in_store_is_unknown() {
350        let (_d, store, mkit) = fresh_repo();
351        let hex = "ab".repeat(32);
352        let err = resolve_revision(&store, &mkit, &hex).unwrap_err();
353        assert!(matches!(err, RevError::Unknown(_)));
354    }
355
356    #[test]
357    fn resolves_unambiguous_short_hash() {
358        let (_d, store, mkit) = fresh_repo();
359        let c = write_commit(&store, vec![], 7);
360        let hex = hash::to_hex(&c);
361        let short = &hex[..12];
362        assert_eq!(resolve_revision(&store, &mkit, short).unwrap(), c);
363    }
364
365    #[test]
366    fn ambiguous_short_hash_errors() {
367        let (_d, store, mkit) = fresh_repo();
368        // Mine — in memory, hashing only — two blobs whose object hashes
369        // share their first MIN_SHORT_HASH hex chars, then write JUST
370        // those two to the store and query with the shared prefix. A
371        // 4-nibble (16-bit) collision is found within a few hundred
372        // candidates by the birthday bound, and we avoid fsync'ing
373        // thousands of objects to disk. The two serialized payloads
374        // resolve to the two object hashes (the store address is the
375        // BLAKE3 of the serialized bytes).
376        let mut seen: std::collections::HashMap<String, Vec<u8>> = std::collections::HashMap::new();
377        let mut pair: Option<(String, Vec<u8>, Vec<u8>)> = None;
378        for i in 0u32..200_000 {
379            let bytes = serialize::serialize(&Object::Blob(mkit_core::object::Blob {
380                data: i.to_le_bytes().to_vec(),
381            }))
382            .unwrap();
383            let h = hash::hash(&bytes);
384            let prefix = hash::to_hex(&h)[..MIN_SHORT_HASH].to_string();
385            if let Some(prev) = seen.get(&prefix) {
386                pair = Some((prefix, prev.clone(), bytes));
387                break;
388            }
389            seen.insert(prefix, bytes);
390        }
391        let (prefix, a, b) = pair.expect("expected a 4-nibble prefix collision");
392        store.write(&a).unwrap();
393        store.write(&b).unwrap();
394        let err = resolve_revision(&store, &mkit, &prefix).unwrap_err();
395        assert!(matches!(err, RevError::Ambiguous(_)), "got {err:?}");
396    }
397
398    #[test]
399    fn short_hash_no_match_is_unknown() {
400        let (_d, store, mkit) = fresh_repo();
401        write_commit(&store, vec![], 3);
402        // "ffff" almost certainly does not prefix our single commit.
403        let err = resolve_revision(&store, &mkit, "ffffffff").unwrap_err();
404        assert!(matches!(err, RevError::Unknown(_)));
405    }
406
407    #[test]
408    fn resolves_branch_ref() {
409        let (_d, store, mkit) = fresh_repo();
410        let c = write_commit(&store, vec![], 5);
411        refs::write_ref(&mkit, "feature", &c).unwrap();
412        assert_eq!(resolve_revision(&store, &mkit, "feature").unwrap(), c);
413    }
414
415    #[test]
416    fn resolves_tag_ref() {
417        let (_d, store, mkit) = fresh_repo();
418        let c = write_commit(&store, vec![], 6);
419        refs::write_tag(&mkit, "v1.0", &c).unwrap();
420        assert_eq!(resolve_revision(&store, &mkit, "v1.0").unwrap(), c);
421    }
422
423    #[test]
424    fn resolves_head() {
425        let (_d, store, mkit) = fresh_repo();
426        let c = write_commit(&store, vec![], 9);
427        refs::write_ref(&mkit, "main", &c).unwrap();
428        assert_eq!(resolve_revision(&store, &mkit, "HEAD").unwrap(), c);
429    }
430
431    #[test]
432    fn resolves_head_tilde_n() {
433        let (_d, store, mkit) = fresh_repo();
434        let root = write_commit(&store, vec![], 1);
435        let mid = write_commit(&store, vec![root], 2);
436        let tip = write_commit(&store, vec![mid], 3);
437        refs::write_ref(&mkit, "main", &tip).unwrap();
438        assert_eq!(resolve_revision(&store, &mkit, "HEAD").unwrap(), tip);
439        assert_eq!(resolve_revision(&store, &mkit, "HEAD~1").unwrap(), mid);
440        assert_eq!(resolve_revision(&store, &mkit, "HEAD~2").unwrap(), root);
441        // `~` alone == `~1`.
442        assert_eq!(resolve_revision(&store, &mkit, "HEAD~").unwrap(), mid);
443    }
444
445    #[test]
446    fn caret_selects_parent() {
447        let (_d, store, mkit) = fresh_repo();
448        let p1 = write_commit(&store, vec![], 1);
449        let p2 = write_commit(&store, vec![], 2);
450        let merge = write_commit(&store, vec![p1, p2], 3);
451        refs::write_ref(&mkit, "main", &merge).unwrap();
452        assert_eq!(resolve_revision(&store, &mkit, "HEAD^").unwrap(), p1);
453        assert_eq!(resolve_revision(&store, &mkit, "HEAD^1").unwrap(), p1);
454        assert_eq!(resolve_revision(&store, &mkit, "HEAD^2").unwrap(), p2);
455        assert_eq!(resolve_revision(&store, &mkit, "HEAD^0").unwrap(), merge);
456    }
457
458    #[test]
459    fn tilde_off_the_end_errors() {
460        let (_d, store, mkit) = fresh_repo();
461        let root = write_commit(&store, vec![], 1);
462        refs::write_ref(&mkit, "main", &root).unwrap();
463        let err = resolve_revision(&store, &mkit, "HEAD~1").unwrap_err();
464        assert!(matches!(err, RevError::BadSuffix { .. }));
465    }
466
467    #[test]
468    fn caret_past_parents_errors() {
469        let (_d, store, mkit) = fresh_repo();
470        let p1 = write_commit(&store, vec![], 1);
471        let c = write_commit(&store, vec![p1], 2);
472        refs::write_ref(&mkit, "main", &c).unwrap();
473        let err = resolve_revision(&store, &mkit, "HEAD^2").unwrap_err();
474        assert!(matches!(err, RevError::BadSuffix { .. }));
475    }
476
477    #[test]
478    fn unknown_ref_errors() {
479        let (_d, store, mkit) = fresh_repo();
480        let err = resolve_revision(&store, &mkit, "nope").unwrap_err();
481        assert!(matches!(err, RevError::Unknown(_)));
482    }
483
484    #[test]
485    fn too_short_prefix_is_unknown() {
486        let (_d, store, mkit) = fresh_repo();
487        let c = write_commit(&store, vec![], 1);
488        let hex = hash::to_hex(&c);
489        // A 3-char prefix is below MIN_SHORT_HASH and not a valid ref.
490        let err = resolve_revision(&store, &mkit, &hex[..3]).unwrap_err();
491        assert!(matches!(err, RevError::Unknown(_)));
492    }
493
494    #[test]
495    fn branch_wins_over_tag_on_collision() {
496        let (_d, store, mkit) = fresh_repo();
497        let branch_c = write_commit(&store, vec![], 1);
498        let tag_c = write_commit(&store, vec![], 2);
499        refs::write_ref(&mkit, "dup", &branch_c).unwrap();
500        refs::write_tag(&mkit, "dup", &tag_c).unwrap();
501        assert_eq!(resolve_revision(&store, &mkit, "dup").unwrap(), branch_c);
502    }
503}