Skip to main content

mkit_core/ops/
gc.rs

1//! GC retention roots — the complete set of object hashes that
2//! `mkit gc` (#233) must treat as live, plus the live-object closure
3//! over them.
4//!
5//! Pruning is only safe if the root set is **complete**: anything gc can
6//! reach from a root is kept; everything else is reclaimable. Missing a
7//! root means deleting a live object, so this collector is deliberately
8//! exhaustive and **fails closed** — if any source can't be read, the
9//! whole collection errors and the caller must abort rather than prune
10//! against an under-counted root set.
11//!
12//! Roots, by source:
13//! - **HEAD** (incl. detached) and every `refs/heads`, `refs/tags`, and
14//!   `refs/remotes/<remote>` ref.
15//! - **Stash** entries — each stashed commit and its recorded parent.
16//! - **In-progress operations** — merge (`MERGE_HEAD`), cherry-pick
17//!   (`CHERRY_PICK_HEAD`), rebase (`onto` + every `todo`/`done` commit),
18//!   the `ORIG_HEAD` saved by those ops and by `reset`, and the conflict
19//!   sidecar's base/ours/theirs blob hashes.
20//! - **Attestations** — every `attestations/<commit>/` directory pins
21//!   its commit so an attested commit is never orphaned.
22//! - **Recovery log** — every commit recorded as superseded by a
23//!   history-rewriting op (see [`super::recovery`]); retained so it stays
24//!   recoverable until `recovery::expire` drops it past the window.
25//!
26//! RECOVERY (#260): commits superseded by `commit --amend`, `reset`, or
27//! `rebase` are unrecoverable from the opaque-digest history journal, so
28//! [`super::recovery`] logs them (the commands record the old tip before
29//! moving the ref) and they are roots here.
30
31use std::collections::BTreeSet;
32use std::fs;
33use std::io;
34use std::path::Path;
35
36use crate::hash::{self, Hash};
37use crate::index;
38use crate::store::{ObjectStore, StoreError};
39
40use super::conflict_state::{self, ORIG_HEAD};
41use super::graph::reachable_closure_checked;
42use super::rebase::{self, REBASE_DIR};
43use super::recovery;
44use super::stash;
45use crate::refs::{self, HEADS_DIR, REMOTES_DIR, TAGS_DIR};
46
47/// Directory under `.mkit/` holding per-commit attestation envelopes.
48/// Owned here (not in `mkit-attest`) so the core collector stays free of
49/// a reverse crate dependency — it only reads directory *names*.
50const ATTESTATIONS_DIR: &str = "attestations";
51
52/// Depth cap for the strict ref walk. Refs nest by `/` in the name;
53/// anything deeper than this on disk is treated as an error (fail
54/// closed) rather than silently truncated.
55const MAX_REF_WALK_DEPTH: usize = 64;
56
57/// Errors from collecting the retention root set. Every underlying
58/// source error is wrapped so the collector can fail closed.
59///
60/// `#[non_exhaustive]`: new root sources (like the staging index, added
61/// in 0.2.0) come with new variants; downstream matches must keep a
62/// wildcard arm so those additions stay minor-version changes.
63#[derive(Debug, thiserror::Error)]
64#[non_exhaustive]
65pub enum GcRootsError {
66    #[error("refs: {0}")]
67    Refs(#[from] refs::RefError),
68    #[error("stash: {0}")]
69    Stash(#[from] stash::StashError),
70    #[error("conflict state: {0}")]
71    ConflictState(#[from] conflict_state::ConflictStateError),
72    #[error("rebase state: {0}")]
73    Rebase(#[from] rebase::RebaseError),
74    #[error("recovery log: {0}")]
75    Recovery(#[from] recovery::RecoveryError),
76    #[error("staging index: {0}")]
77    Index(#[from] index::IndexError),
78    #[error("object store: {0}")]
79    Store(#[from] StoreError),
80    #[error("malformed object id on disk: {0}")]
81    BadHash(#[from] hash::FromHexError),
82    #[error("io: {0}")]
83    Io(#[from] io::Error),
84    /// The reachable-object walk hit [`super::graph::MAX_REACHABLE`]
85    /// before completing. The live set is incomplete, so a caller must
86    /// abort rather than treat beyond-cap objects as prunable.
87    #[error("object graph exceeds the reachability cap; refusing to compute a partial keep-set")]
88    Truncated,
89    /// A ref directory nested deeper than `MAX_REF_WALK_DEPTH`.
90    #[error("ref tree too deep at {0} (fail closed)")]
91    RefTooDeep(String),
92    /// `.mkit` or `.mkit/objects` is a symlink. A deletion-capable gc
93    /// refuses, since pruning would follow the link and unlink files
94    /// outside the repo.
95    #[error("refusing to gc: {0} is a symlink (objects may live outside the repo)")]
96    SymlinkedStore(String),
97}
98
99/// Collect the complete set of GC retention roots for the repo at
100/// `mkit_dir` (the `.mkit` directory). The returned hashes are roots,
101/// not the closure — feed them to `reachable_closure` (or use
102/// [`live_objects`]) to get the full keep-set.
103///
104/// The all-zero hash is filtered out (an unset ref / `ORIG_HEAD`).
105///
106/// # Errors
107///
108/// [`GcRootsError`] if any source (refs, stash, op state, attestation
109/// dir) cannot be read — the caller must then abort, never prune.
110pub fn collect_roots(mkit_dir: &Path) -> Result<BTreeSet<Hash>, GcRootsError> {
111    let mut roots: BTreeSet<Hash> = BTreeSet::new();
112    let add = |h: Hash, set: &mut BTreeSet<Hash>| {
113        if h != hash::ZERO {
114            set.insert(h);
115        }
116    };
117
118    // HEAD (covers a detached HEAD not present under refs/heads).
119    if let Some(h) = refs::resolve_head(mkit_dir)? {
120        add(h, &mut roots);
121    }
122
123    // Branches, tags, and remote-tracking refs. We deliberately do NOT
124    // use `refs::list_refs`/`list_tags`/`list_remote_refs` here: those
125    // are lenient (they yield `hash: None` for malformed content, skip
126    // unreadable files, and silently stop at a depth cap), which would
127    // let a corrupt ref drop out of the root set while collection still
128    // "succeeds" — exactly the fail-open hole gc cannot tolerate. The
129    // strict walk below errors on any unreadable / undecodable / too-deep
130    // ref instead.
131    for ns in [HEADS_DIR, TAGS_DIR, REMOTES_DIR] {
132        walk_ref_roots_strict(&mkit_dir.join(ns), ns, 0, &mut roots)?;
133    }
134
135    // Stash: each stashed commit and the HEAD it was based on.
136    let repo_root = mkit_dir.parent().unwrap_or(mkit_dir);
137    for entry in stash::list(repo_root)?.entries {
138        add(entry.commit_hash, &mut roots);
139        add(entry.parent_hash, &mut roots);
140    }
141
142    // Staging index — blobs recorded by `mkit add` but not yet
143    // committed. They are reachable from no ref, so without this root
144    // staged work would be pruned once it ages past the grace window
145    // (or immediately under `--grace-secs 0`). `read_index` is strict
146    // (errors on corrupt/oversized index), so a damaged index aborts
147    // gc instead of silently dropping roots.
148    for entry in index::read_index(repo_root)?.entries {
149        add(entry.object_hash, &mut roots);
150    }
151
152    // ORIG_HEAD (written by reset and by the in-progress ops below).
153    if let Some(h) = read_optional_hash(&mkit_dir.join(ORIG_HEAD))? {
154        add(h, &mut roots);
155    }
156
157    // In-progress merge / cherry-pick.
158    if let Some(m) = conflict_state::read_merge_state(mkit_dir)? {
159        add(m.merge_head, &mut roots);
160        add(m.orig_head, &mut roots);
161    }
162    if let Some(c) = conflict_state::read_cherry_pick_state(mkit_dir)? {
163        add(c.cherry_pick_head, &mut roots);
164        add(c.orig_head, &mut roots);
165    }
166    if let Some(r) = conflict_state::read_revert_state(mkit_dir)? {
167        add(r.revert_head, &mut roots);
168        add(r.orig_head, &mut roots);
169    }
170
171    // In-progress rebase: target + every commit still to replay or
172    // already replayed onto the new base.
173    if rebase::is_rebase_in_progress(mkit_dir) {
174        let st = rebase::read_state(mkit_dir)?;
175        add(st.orig_head, &mut roots);
176        add(st.onto, &mut roots);
177        for h in st.todo.into_iter().chain(st.done) {
178            add(h, &mut roots);
179        }
180    }
181
182    // Conflict sidecar: base/ours/theirs blobs needed to resolve an
183    // in-progress conflict. Merge/cherry-pick write `.mkit/mkit-conflicts`;
184    // rebase writes its sidecar inside `.mkit/rebase-apply/`. Both are
185    // empty/absent when no conflict is recorded.
186    for dir in [mkit_dir.to_path_buf(), mkit_dir.join(REBASE_DIR)] {
187        for c in conflict_state::read_conflicts(&dir)? {
188            for h in [c.base_hash, c.ours_hash, c.theirs_hash]
189                .into_iter()
190                .flatten()
191            {
192                add(h, &mut roots);
193            }
194        }
195    }
196
197    // Attested commits — pinned so an attestation never dangles.
198    for h in attested_commits(mkit_dir)? {
199        add(h, &mut roots);
200    }
201
202    // Recovery log — commits superseded by amend/reset/rebase, retained
203    // so they stay recoverable. Clock-free here: `recovery::expire` (a
204    // gc maintenance step) drops entries past the retention window so
205    // they stop pinning objects.
206    for h in recovery::roots(mkit_dir)? {
207        add(h, &mut roots);
208    }
209
210    Ok(roots)
211}
212
213/// The full live-object keep-set for `mkit gc`: the reachable closure
214/// over every retention root from [`collect_roots`].
215///
216/// # Errors
217///
218/// [`GcRootsError`] if roots cannot be collected, or a [`StoreError`]
219/// (e.g. a root or referenced object missing) during the walk.
220pub fn live_objects(store: &ObjectStore, mkit_dir: &Path) -> Result<BTreeSet<Hash>, GcRootsError> {
221    let roots = collect_roots(mkit_dir)?;
222    let (live, truncated) = reachable_closure_checked(store, roots.iter())?;
223    if truncated {
224        return Err(GcRootsError::Truncated);
225    }
226    Ok(live)
227}
228
229/// Outcome of a [`run_gc`] sweep.
230#[derive(Debug, Default, Clone, Copy)]
231pub struct GcReport {
232    /// Objects examined in the store.
233    pub scanned: usize,
234    /// Objects retained because they are reachable from a root.
235    pub live: usize,
236    /// Unreachable objects retained anyway — within the grace window, or
237    /// whose age could not be determined (kept fail-safe).
238    pub kept_recent: usize,
239    /// Unreachable objects pruned (or that *would* be pruned in a dry run).
240    pub pruned: usize,
241    /// Bytes reclaimed by the pruned objects.
242    pub bytes_reclaimed: u64,
243    /// True if this was a dry run (nothing deleted).
244    pub dry_run: bool,
245}
246
247/// Mark-and-sweep prune: keep every object reachable from the retention
248/// roots ([`live_objects`]) plus every unreachable object younger than
249/// `grace_secs` (relative to `now_secs`); delete the rest. With
250/// `dry_run`, computes the report without deleting anything.
251///
252/// **Fail closed / fail safe.** If the live set can't be computed (a
253/// missing/corrupt root, a malformed ref, or the reachability cap), this
254/// returns an error and deletes nothing. An object whose age can't be
255/// read is kept, never pruned. The caller MUST hold the repo lock so the
256/// live set can't shift mid-sweep (see [`super::recovery`]); `gc` runs
257/// `recovery::expire` then this, all under that lock.
258///
259/// # Errors
260/// [`GcRootsError`] from [`live_objects`], store enumeration, or a delete.
261pub fn run_gc(
262    store: &ObjectStore,
263    mkit_dir: &Path,
264    now_secs: u64,
265    grace_secs: u64,
266    dry_run: bool,
267) -> Result<GcReport, GcRootsError> {
268    // Refuse to delete through a symlinked store: if `.mkit` or
269    // `.mkit/objects` is a symlink, `remove_object` would unlink the
270    // link target's files — potentially outside the repo. (Dry runs are
271    // safe but we reject uniformly so a preview matches the real run.)
272    reject_symlink(mkit_dir)?;
273    reject_symlink(store.objects_root())?;
274
275    // Compute the keep-set FIRST; if this fails we delete nothing.
276    let live = live_objects(store, mkit_dir)?;
277    let all = store.iter_object_hashes()?;
278
279    let mut report = GcReport {
280        dry_run,
281        ..GcReport::default()
282    };
283    for h in all {
284        report.scanned += 1;
285        if live.contains(&h) {
286            report.live += 1;
287            continue;
288        }
289        // Unreachable. Keep it if it is within the grace window, or if
290        // its age cannot be determined (fail safe — never delete when
291        // uncertain).
292        let Ok(meta) = store.object_metadata(&h) else {
293            report.kept_recent += 1;
294            continue;
295        };
296        let age_known_old = meta
297            .modified()
298            .ok()
299            .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
300            .map(|d| d.as_secs())
301            .is_some_and(|mtime| now_secs.saturating_sub(mtime) >= grace_secs);
302        if !age_known_old {
303            report.kept_recent += 1;
304            continue;
305        }
306        let len = meta.len();
307        if !dry_run {
308            store.remove_object(&h)?;
309        }
310        report.pruned += 1;
311        report.bytes_reclaimed += len;
312    }
313    Ok(report)
314}
315
316/// Strict, fail-closed walk of a ref namespace directory (e.g.
317/// `refs/heads`), inserting every ref's target hash into `roots`.
318///
319/// Unlike `refs::list_refs`, this errors instead of skipping on:
320/// unreadable files ([`io::Error`]), undecodable content
321/// ([`hash::FromHexError`]), and excessive nesting
322/// ([`GcRootsError::RefTooDeep`]). Dot-files are skipped (lock/temp
323/// cruft), and an absent namespace dir yields no roots. The all-zero
324/// hash (an unset ref) is excluded.
325fn walk_ref_roots_strict(
326    dir: &Path,
327    rel: &str,
328    depth: usize,
329    roots: &mut BTreeSet<Hash>,
330) -> Result<(), GcRootsError> {
331    if depth > MAX_REF_WALK_DEPTH {
332        return Err(GcRootsError::RefTooDeep(rel.to_owned()));
333    }
334    let rd = match fs::read_dir(dir) {
335        Ok(rd) => rd,
336        Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(()),
337        Err(e) => return Err(e.into()),
338    };
339    for entry in rd {
340        let entry = entry?;
341        let name = entry.file_name();
342        let name = name.to_string_lossy();
343        let ft = entry.file_type()?;
344        if ft.is_dir() {
345            walk_ref_roots_strict(&entry.path(), &format!("{rel}/{name}"), depth + 1, roots)?;
346            continue;
347        }
348        if !ft.is_file() || name.starts_with('.') {
349            // Skip non-files and lock/temp cruft (e.g. `*.lock`, dotfiles).
350            continue;
351        }
352        // Strict: read + decode, erroring (fail closed) on any failure.
353        let raw = fs::read_to_string(entry.path())?;
354        let h = hash::from_hex(raw.trim())?;
355        if h != hash::ZERO {
356            roots.insert(h);
357        }
358    }
359    Ok(())
360}
361
362/// Commit hashes that have at least one attestation envelope, taken from
363/// the `attestations/<commit-hex>/` directory names. Non-hex directory
364/// names are ignored (defensive); a missing dir yields an empty set.
365fn attested_commits(mkit_dir: &Path) -> Result<Vec<Hash>, io::Error> {
366    let dir = mkit_dir.join(ATTESTATIONS_DIR);
367    let mut out = Vec::new();
368    let rd = match fs::read_dir(&dir) {
369        Ok(rd) => rd,
370        Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(out),
371        Err(e) => return Err(e),
372    };
373    for entry in rd {
374        let entry = entry?;
375        if !entry.file_type()?.is_dir() {
376            continue;
377        }
378        if let Some(name) = entry.file_name().to_str()
379            && let Ok(h) = hash::from_hex(name)
380        {
381            out.push(h);
382        }
383    }
384    Ok(out)
385}
386
387/// Read a single 64-hex object id from `path`, trimming trailing
388/// whitespace. `Ok(None)` if the file is absent.
389fn read_optional_hash(path: &Path) -> Result<Option<Hash>, GcRootsError> {
390    let raw = match fs::read_to_string(path) {
391        Ok(s) => s,
392        Err(e) if e.kind() == io::ErrorKind::NotFound => return Ok(None),
393        Err(e) => return Err(e.into()),
394    };
395    let trimmed = raw.trim();
396    if trimmed.is_empty() {
397        return Ok(None);
398    }
399    Ok(Some(hash::from_hex(trimmed)?))
400}
401
402// =====================================================================
403// Tests
404// =====================================================================
405
406/// Error if `path` is a symlink — a deletion-capable gc must not follow
407/// it (the target may be outside the repo). Absent path is fine.
408fn reject_symlink(path: &Path) -> Result<(), GcRootsError> {
409    match std::fs::symlink_metadata(path) {
410        Ok(m) if m.file_type().is_symlink() => {
411            Err(GcRootsError::SymlinkedStore(path.display().to_string()))
412        }
413        Ok(_) => Ok(()),
414        Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(()),
415        Err(e) => Err(e.into()),
416    }
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422    use crate::object::EntryMode;
423    use crate::object::{Blob, Commit, Identity, Object, Tree, TreeEntry};
424    use crate::serialize;
425    use std::fs;
426    use tempfile::TempDir;
427
428    /// A repo with an initialized `.mkit` dir + object store.
429    fn repo() -> (TempDir, ObjectStore) {
430        let d = TempDir::new().unwrap();
431        let store = ObjectStore::init(d.path()).unwrap();
432        refs::init(&d.path().join(crate::MKIT_DIR)).unwrap();
433        (d, store)
434    }
435
436    fn mkit_dir(d: &TempDir) -> std::path::PathBuf {
437        d.path().join(crate::MKIT_DIR)
438    }
439
440    /// Write a loose ref file (e.g. `refs/heads/main`) — the on-disk
441    /// form `list_refs`/`list_tags` read.
442    fn write_ref(md: &Path, rel: &str, h: &Hash) {
443        let path = md.join(rel);
444        fs::create_dir_all(path.parent().unwrap()).unwrap();
445        fs::write(path, format!("{}\n", hash::to_hex(h))).unwrap();
446    }
447
448    fn write_blob(s: &ObjectStore, data: &[u8]) -> Hash {
449        s.write(
450            &serialize::serialize(&Object::Blob(Blob {
451                data: data.to_vec(),
452            }))
453            .unwrap(),
454        )
455        .unwrap()
456    }
457
458    /// Commit a single-file tree; returns `(commit, blob)` hashes.
459    fn commit_one(s: &ObjectStore, name: &[u8], data: &[u8], parents: Vec<Hash>) -> (Hash, Hash) {
460        let blob = write_blob(s, data);
461        let tree = s
462            .write(
463                &serialize::serialize(&Object::Tree(Tree {
464                    entries: vec![TreeEntry {
465                        name: name.to_vec(),
466                        mode: EntryMode::Blob,
467                        object_hash: blob,
468                    }],
469                }))
470                .unwrap(),
471            )
472            .unwrap();
473        let commit = s
474            .write(
475                &serialize::serialize(&Object::Commit(Commit {
476                    tree_hash: tree,
477                    parents,
478                    author: Identity::opaque(b"t".to_vec()),
479                    signer: [0u8; 32],
480                    message: name.to_vec(),
481                    // Per-commit divergence so distinct fixtures don't dedup.
482                    timestamp: name.len() as u64,
483                    message_hash: [0u8; 32],
484                    content_digest: [0u8; 32],
485                    signature: [0u8; 64],
486                }))
487                .unwrap(),
488            )
489            .unwrap();
490        (commit, blob)
491    }
492
493    #[test]
494    fn collect_roots_includes_branches_and_tags() {
495        let (d, s) = repo();
496        let md = mkit_dir(&d);
497        let (c1, _) = commit_one(&s, b"a", b"a", vec![]);
498        let (c2, _) = commit_one(&s, b"b", b"b", vec![]);
499        write_ref(&md, "refs/heads/main", &c1);
500        write_ref(&md, "refs/tags/v1", &c2);
501
502        let roots = collect_roots(&md).unwrap();
503        assert!(roots.contains(&c1), "branch tip must be a root");
504        assert!(roots.contains(&c2), "tag target must be a root");
505    }
506
507    #[test]
508    fn collect_roots_includes_orig_head_and_attested_commit() {
509        let (d, s) = repo();
510        let md = mkit_dir(&d);
511        let (orig, _) = commit_one(&s, b"o", b"o", vec![]);
512        let (att, _) = commit_one(&s, b"x", b"x", vec![]);
513        fs::write(md.join(ORIG_HEAD), format!("{}\n", hash::to_hex(&orig))).unwrap();
514        fs::create_dir_all(md.join(ATTESTATIONS_DIR).join(hash::to_hex(&att))).unwrap();
515
516        let roots = collect_roots(&md).unwrap();
517        assert!(roots.contains(&orig), "ORIG_HEAD must be a root");
518        assert!(roots.contains(&att), "attested commit must be a root");
519    }
520
521    #[test]
522    fn live_objects_keeps_only_reachable_closure() {
523        let (d, s) = repo();
524        let md = mkit_dir(&d);
525        let (kept, kept_blob) = commit_one(&s, b"keep", b"keep", vec![]);
526        // An unreferenced commit + blob: reachable from no root.
527        let (orphan, orphan_blob) = commit_one(&s, b"orphan", b"orphan", vec![]);
528        write_ref(&md, "refs/heads/main", &kept);
529
530        let live = live_objects(&s, &md).unwrap();
531        assert!(
532            live.contains(&kept) && live.contains(&kept_blob),
533            "kept closure live"
534        );
535        assert!(
536            !live.contains(&orphan) && !live.contains(&orphan_blob),
537            "unreferenced objects must not be live"
538        );
539    }
540
541    #[test]
542    fn reachable_closure_is_union_of_single_root_closures() {
543        let (_d, s) = repo();
544        let (c1, b1) = commit_one(&s, b"a", b"a", vec![]);
545        let (c2, b2) = commit_one(&s, b"b", b"b", vec![]);
546        let multi = super::super::graph::reachable_closure(&s, [&c1, &c2]).unwrap();
547        let single1 = super::super::graph::reachable_objects(&s, &c1).unwrap();
548        let single2 = super::super::graph::reachable_objects(&s, &c2).unwrap();
549        let union: BTreeSet<Hash> = single1.union(&single2).copied().collect();
550        assert_eq!(multi, union);
551        assert!([c1, b1, c2, b2].iter().all(|h| multi.contains(h)));
552    }
553
554    #[test]
555    fn strict_walk_picks_up_nested_remote_ref() {
556        let (d, s) = repo();
557        let md = mkit_dir(&d);
558        let (c, _) = commit_one(&s, b"r", b"r", vec![]);
559        write_ref(&md, "refs/remotes/origin/main", &c);
560        assert!(
561            collect_roots(&md).unwrap().contains(&c),
562            "nested remote-tracking ref must be a root"
563        );
564    }
565
566    #[test]
567    fn run_gc_prunes_orphans_but_never_a_live_object() {
568        let (d, s) = repo();
569        let md = mkit_dir(&d);
570        // Live: a branch commit + its tree + blob.
571        let (kept, kept_blob) = commit_one(&s, b"keep", b"keep", vec![]);
572        write_ref(&md, "refs/heads/main", &kept);
573        let live = live_objects(&s, &md).unwrap();
574        // Orphans: unreferenced commit + its tree + blob.
575        let (orphan, orphan_blob) = commit_one(&s, b"orphan", b"orphan", vec![]);
576
577        // grace=0 → all unreachable objects are old enough to prune.
578        let report = run_gc(&s, &md, u64::MAX, 0, false).unwrap();
579
580        // The safety invariant: every live object still present.
581        for h in &live {
582            assert!(s.contains(h), "gc must never delete a live object");
583        }
584        // Orphan closure gone.
585        assert!(
586            !s.contains(&orphan) && !s.contains(&orphan_blob),
587            "orphans pruned"
588        );
589        assert_eq!(report.live, live.len());
590        assert!(
591            report.pruned >= 2,
592            "orphan commit + blob pruned: {report:?}"
593        );
594        // Sanity: kept objects accounted as live.
595        assert!(s.contains(&kept) && s.contains(&kept_blob));
596    }
597
598    #[test]
599    fn run_gc_keeps_staged_but_uncommitted_blobs() {
600        let (d, s) = repo();
601        let md = mkit_dir(&d);
602        // A committed branch so the repo has a normal ref-side root.
603        let (kept, _) = commit_one(&s, b"k", b"k", vec![]);
604        write_ref(&md, "refs/heads/main", &kept);
605        // Stage a blob no commit references — what `mkit add` leaves
606        // behind: the object in the store + an index entry.
607        let staged = write_blob(&s, b"staged-only");
608        let idx = index::Index {
609            entries: vec![index::IndexEntry {
610                path: "staged.txt".into(),
611                status: index::EntryStatus::Blob,
612                object_hash: staged,
613                mtime_ns: 0,
614                size: 0,
615                ino: 0,
616                ctime_ns: 0,
617            }],
618        };
619        index::write_index(d.path(), &idx).unwrap();
620
621        assert!(
622            collect_roots(&md).unwrap().contains(&staged),
623            "staged blob must be a retention root"
624        );
625        // grace=0 → anything unrooted is pruned immediately.
626        run_gc(&s, &md, u64::MAX, 0, false).unwrap();
627        assert!(
628            s.contains(&staged),
629            "gc must never delete staged-but-uncommitted content"
630        );
631    }
632
633    #[test]
634    fn run_gc_grace_window_keeps_recent_orphans() {
635        let (d, s) = repo();
636        let md = mkit_dir(&d);
637        let (kept, _) = commit_one(&s, b"k", b"k", vec![]);
638        write_ref(&md, "refs/heads/main", &kept);
639        let (orphan, _) = commit_one(&s, b"o", b"o", vec![]);
640
641        // Huge grace window with now=0 → nothing is "old", so the orphan
642        // is kept despite being unreachable.
643        let report = run_gc(&s, &md, 0, u64::MAX, false).unwrap();
644        assert!(s.contains(&orphan), "recent orphan kept by grace window");
645        assert_eq!(report.pruned, 0);
646        assert!(report.kept_recent >= 1, "{report:?}");
647    }
648
649    #[cfg(unix)]
650    #[test]
651    fn run_gc_refuses_symlinked_objects_dir() {
652        use std::os::unix::fs::symlink;
653        let (d, s) = repo();
654        let md = mkit_dir(&d);
655        let (kept, _) = commit_one(&s, b"k", b"k", vec![]);
656        write_ref(&md, "refs/heads/main", &kept);
657
658        // Replace `.mkit/objects` with a symlink to an external dir. A
659        // deletion-capable gc must refuse rather than prune through it.
660        let external = d.path().join("external-objects");
661        let real_objects = md.join("objects");
662        fs::create_dir_all(&external).unwrap();
663        // Move existing shards out so the symlink target holds them.
664        for entry in fs::read_dir(&real_objects).unwrap() {
665            let entry = entry.unwrap();
666            fs::rename(entry.path(), external.join(entry.file_name())).unwrap();
667        }
668        fs::remove_dir_all(&real_objects).unwrap();
669        symlink(&external, &real_objects).unwrap();
670
671        let err = run_gc(&s, &md, u64::MAX, 0, false).unwrap_err();
672        assert!(
673            matches!(err, GcRootsError::SymlinkedStore(_)),
674            "gc must refuse a symlinked objects dir, got {err:?}"
675        );
676    }
677
678    #[test]
679    fn run_gc_dry_run_deletes_nothing() {
680        let (d, s) = repo();
681        let md = mkit_dir(&d);
682        let (kept, _) = commit_one(&s, b"k", b"k", vec![]);
683        write_ref(&md, "refs/heads/main", &kept);
684        let (orphan, _) = commit_one(&s, b"o", b"o", vec![]);
685
686        let report = run_gc(&s, &md, u64::MAX, 0, true).unwrap();
687        assert!(report.dry_run && report.pruned >= 1, "{report:?}");
688        assert!(s.contains(&orphan), "dry run must not delete the orphan");
689    }
690
691    #[test]
692    fn run_gc_keeps_recovery_logged_orphan() {
693        let (d, s) = repo();
694        let md = mkit_dir(&d);
695        let (kept, _) = commit_one(&s, b"k", b"k", vec![]);
696        write_ref(&md, "refs/heads/main", &kept);
697        // An orphan that is recorded in the recovery log must survive gc.
698        let (superseded, superseded_blob) = commit_one(&s, b"old", b"old", vec![]);
699        super::super::recovery::record(
700            &md,
701            &super::super::recovery::RecoveryEntry {
702                timestamp: 1,
703                op: "amend".into(),
704                superseded,
705                branch: "main".into(),
706            },
707        )
708        .unwrap();
709
710        run_gc(&s, &md, u64::MAX, 0, false).unwrap();
711        assert!(
712            s.contains(&superseded) && s.contains(&superseded_blob),
713            "a recovery-logged commit must not be pruned"
714        );
715    }
716
717    #[test]
718    fn collect_roots_includes_recovery_log_entries() {
719        let (d, s) = repo();
720        let md = mkit_dir(&d);
721        let (superseded, _) = commit_one(&s, b"old", b"old", vec![]);
722        super::super::recovery::record(
723            &md,
724            &super::super::recovery::RecoveryEntry {
725                timestamp: 1,
726                op: "amend".into(),
727                superseded,
728                branch: "main".into(),
729            },
730        )
731        .unwrap();
732        assert!(
733            collect_roots(&md).unwrap().contains(&superseded),
734            "a superseded commit in the recovery log must be a root"
735        );
736    }
737
738    #[test]
739    fn collect_roots_fails_closed_on_malformed_ref() {
740        let (d, _s) = repo();
741        let md = mkit_dir(&d);
742        // A corrupt ref file (not 64-hex) must error, never be silently
743        // dropped — else gc could prune the object it should pin.
744        let bad = md.join("refs/heads/corrupt");
745        fs::create_dir_all(bad.parent().unwrap()).unwrap();
746        fs::write(&bad, b"not-a-valid-object-id\n").unwrap();
747        assert!(
748            matches!(collect_roots(&md), Err(GcRootsError::BadHash(_))),
749            "malformed ref must fail closed"
750        );
751    }
752
753    #[test]
754    fn strict_walk_skips_lock_and_dotfile_cruft() {
755        let (d, s) = repo();
756        let md = mkit_dir(&d);
757        let (c, _) = commit_one(&s, b"m", b"m", vec![]);
758        write_ref(&md, "refs/heads/main", &c);
759        // Atomic-write temp files are dotfiles; a stale one must not
760        // break collection.
761        fs::write(md.join("refs/heads").join(".main.tmp.123.4"), b"garbage").unwrap();
762        let roots = collect_roots(&md).unwrap();
763        assert!(roots.contains(&c), "real ref still collected past cruft");
764    }
765}