mkit_cli/commands/gc.rs
1//! `mkit gc` — reclaim unreachable objects (mark-and-sweep prune).
2//!
3//! Under the repo lock: expire the recovery log, compute the live-object
4//! keep-set (every object reachable from the retention roots — refs,
5//! stash, in-progress op state, attestations, and the recovery log), then
6//! delete unreachable objects that are older than the grace window.
7//!
8//! Safety: the live set is computed **before** anything is deleted and
9//! the whole run is **fail-closed** — a missing/corrupt root, a malformed
10//! ref, or the reachability cap aborts with nothing removed (see
11//! `mkit_core::ops::gc`). Unreachable objects younger than the grace
12//! window (default 14 days) are kept as a belt-and-suspenders against
13//! objects written just before a reference that points at them. Use
14//! `--dry-run` to preview, and `--grace-secs 0` to prune every
15//! unreachable object regardless of age.
16//!
17//! Concurrency: gc holds the repo lock for its whole run, and the
18//! root-publishing paths now take the same lock around their object-write +
19//! ref/attestation-publish window — `tag` (annotated/signed), `fetch` /
20//! `pull`, and `attest` (#267) — so they are serialized against gc. The
21//! grace window remains the belt-and-suspenders net (like Git's default
22//! `gc.pruneExpire`, vs `prune --expire=now`): `--grace-secs 0` bypasses it
23//! and prints a warning, but with the publishers now locked it is safe even
24//! under concurrency.
25
26use std::io::Write;
27use std::time::{SystemTime, UNIX_EPOCH};
28
29use clap::Parser;
30use mkit_core::ops::recovery::{self, RetentionPolicy};
31use mkit_core::ops::run_gc;
32use mkit_core::store::ObjectStore;
33
34use crate::clap_shim;
35use crate::exit;
36
37/// Default object grace window: 14 days, matching Git's `gc.pruneExpire`.
38const DEFAULT_GRACE_SECS: u64 = 14 * 24 * 60 * 60;
39
40#[derive(Debug, Parser)]
41#[command(
42 name = "mkit gc",
43 about = "Reclaim unreachable objects (delete unreachable objects older than the grace window)."
44)]
45struct GcOpts {
46 /// Show what would be pruned without deleting anything.
47 #[arg(short = 'n', long = "dry-run")]
48 dry_run: bool,
49
50 /// Keep unreachable objects younger than this many seconds (default
51 /// 14 days). `0` prunes every unreachable object, but bypasses the
52 /// grace window that protects in-flight objects — only safe when no
53 /// other mkit process is operating on the repo.
54 #[arg(long = "grace-secs", value_name = "SECS", default_value_t = DEFAULT_GRACE_SECS)]
55 grace_secs: u64,
56}
57
58#[must_use]
59pub fn run(args: &[String]) -> u8 {
60 let opts = match clap_shim::parse::<GcOpts>("mkit gc", args) {
61 Ok(o) => o,
62 Err(code) => return code,
63 };
64 let cwd = match std::env::current_dir() {
65 Ok(p) => p,
66 Err(e) => return emit_err(&format!("cwd: {e}"), exit::NOINPUT),
67 };
68 let store = match ObjectStore::open(&cwd) {
69 Ok(s) => s,
70 Err(e) => return emit_err(&format!("not a mkit repo: {e}"), exit::GENERAL_ERROR),
71 };
72 let mkit_dir = cwd.join(mkit_core::MKIT_DIR);
73
74 // Hold the worktree lock for the whole run. This serializes gc
75 // against worktree/index-mutating commands and other gc runs. It does
76 // NOT serialize against the non-worktree root publishers (`tag`,
77 // `fetch`, `attest`) — those don't take this lock yet (#267), so the
78 // grace window is what protects their in-flight objects from a
79 // concurrent prune.
80 let _lock = match super::acquire_worktree_lock(&cwd) {
81 Ok(l) => l,
82 Err(code) => return code,
83 };
84
85 let now = SystemTime::now()
86 .duration_since(UNIX_EPOCH)
87 .map_or(0, |d| d.as_secs());
88
89 if opts.grace_secs == 0 && !opts.dry_run {
90 let mut stderr = std::io::stderr().lock();
91 let _ = writeln!(
92 stderr,
93 "warning: --grace-secs 0 prunes every unreachable object, bypassing the grace window; \
94 ensure no other mkit process is operating on this repo"
95 );
96 }
97
98 // Expire stale recovery entries first so they stop pinning objects;
99 // abort on error (fail closed — don't prune against a half-expired log).
100 // A dry run must not mutate state, so it only *counts* what would
101 // expire (and therefore reports a conservative prune set, since those
102 // soon-to-expire commits are still pinned during the preview).
103 let policy = RetentionPolicy::default();
104 let expired = if opts.dry_run {
105 match recovery::would_expire(&mkit_dir, now, &policy) {
106 Ok(n) => n,
107 Err(e) => return emit_err(&format!("recovery log: {e}"), exit::GENERAL_ERROR),
108 }
109 } else {
110 match recovery::expire(&mkit_dir, now, &policy) {
111 Ok(n) => n,
112 Err(e) => return emit_err(&format!("expire recovery log: {e}"), exit::CANTCREAT),
113 }
114 };
115
116 let report = match run_gc(&store, &mkit_dir, now, opts.grace_secs, opts.dry_run) {
117 Ok(r) => r,
118 Err(e) => return emit_err(&format!("gc: {e}"), exit::GENERAL_ERROR),
119 };
120
121 let mut stderr = std::io::stderr().lock();
122 let (prune_verb, expire_verb) = if report.dry_run {
123 ("would prune", "would expire")
124 } else {
125 ("pruned", "expired")
126 };
127 let _ = writeln!(
128 stderr,
129 "gc{}: {prune_verb} {} object(s), {} bytes; scanned {}, live {}, kept-recent {}; {expire_verb} {} recovery entr{}",
130 if report.dry_run { " (dry run)" } else { "" },
131 report.pruned,
132 report.bytes_reclaimed,
133 report.scanned,
134 report.live,
135 report.kept_recent,
136 expired,
137 if expired == 1 { "y" } else { "ies" },
138 );
139 exit::OK
140}
141
142fn emit_err(msg: &str, code: u8) -> u8 {
143 let mut stderr = std::io::stderr().lock();
144 let _ = writeln!(stderr, "error: {msg}");
145 code
146}