Skip to main content

git_remote_object_store/manage/
gc.rs

1//! `gc` subcommand for the management CLI (issue #66, Phase 5 of #52).
2//!
3//! Runs the two-phase mark-and-sweep flow defined in
4//! [`crate::packchain::gc`]. Three operating modes:
5//!
6//! - **Default**: mark, then sweep. Tombstones from the current mark
7//!   pass do not become eligible for sweep until at least
8//!   `grace_hours` elapse, so the same invocation only sweeps
9//!   tombstones from earlier runs. This is the cron-friendly shape:
10//!   schedule `gc` weekly and previous weeks' tombstones age out
11//!   while the current week's tombstones wait.
12//! - **`--mark-only`**: produce a tombstone but do not sweep. Useful
13//!   in CI to surface orphan counts without bucket mutation.
14//! - **`--sweep-only`**: skip mark, only process pre-existing
15//!   tombstones. Operators use this after a manual mark phase or
16//!   to re-attempt deletions a previous sweep skipped.
17//!
18//! All output is human-readable on stdout; the management CLI may
19//! write to stdout per `.claude/rules/protocol-stdout.md`. The formatter
20//! lives in [`super::gc_output`] so this subcommand and
21//! `compact --with-gc` (see [`super::compact`]) cannot drift apart.
22
23use std::io::Write;
24use std::sync::Arc;
25
26use tracing::info;
27
28use super::ManageError;
29use super::gc_output::{format_mark_outcome, format_sweep_outcome};
30use crate::object_store::ObjectStore;
31use crate::packchain::gc;
32
33/// Tunables for [`Gc::run`]. Field semantics mirror the CLI flags.
34#[derive(Debug, Clone, Copy, Default)]
35pub struct GcOpts {
36    /// Operating mode. The two-boolean `mark_only`/`sweep_only`
37    /// shape was prone to silently no-oping the conflicting
38    /// combination; the enum makes the three valid states the only
39    /// representable states.
40    pub mode: GcMode,
41    /// `force` mode for sweep: bypass grace window and the orphan
42    /// re-check. Operator-asserted safe.
43    pub force: bool,
44    /// Grace window in hours before a tombstone becomes eligible for
45    /// sweep. `None` falls back to [`crate::packchain::gc::grace_hours_from_env`]
46    /// which honours `GIT_REMOTE_OBJECT_STORE_GC_GRACE_HOURS` (defaulting
47    /// to [`gc::DEFAULT_GRACE_HOURS`] when unset).
48    pub grace_hours: Option<u64>,
49}
50
51/// Which phases of the mark-and-sweep flow [`Gc::run`] executes.
52///
53/// The `--mark-only` / `--sweep-only` CLI flags translate into the
54/// matching variant at the parser boundary; passing both flags is
55/// rejected there (rather than silently degenerating to a no-op,
56/// which the previous two-boolean representation would have done).
57#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
58pub enum GcMode {
59    /// Mark then sweep. Cron-friendly: previous runs' tombstones
60    /// age out while the current run's tombstones wait the grace
61    /// window.
62    #[default]
63    Default,
64    /// Mark only — produce a tombstone, do not delete.
65    MarkOnly,
66    /// Sweep only — process pre-existing tombstones, do not mark.
67    SweepOnly,
68}
69
70/// `gc` runner. Held by the CLI for the lifetime of one invocation.
71pub struct Gc {
72    store: Arc<dyn ObjectStore>,
73    prefix: String,
74    opts: GcOpts,
75}
76
77impl Gc {
78    /// Construct a runner. `prefix` is the parsed remote URL's
79    /// repository prefix without a trailing slash; pass an empty
80    /// string for bucket-root repositories.
81    #[must_use]
82    pub fn new(store: Arc<dyn ObjectStore>, prefix: impl Into<String>, opts: GcOpts) -> Self {
83        Self {
84            store,
85            prefix: prefix.into(),
86            opts,
87        }
88    }
89
90    /// Execute the configured flow.
91    ///
92    /// # Errors
93    ///
94    /// Returns [`ManageError::Store`] for transport failures and
95    /// [`ManageError::Packchain`] for engine-level failures (corrupt
96    /// `chain.json`, schema-version mismatch).
97    pub async fn run(&self) -> Result<(), ManageError> {
98        // Match `Compact::run` and the helper-protocol writers: pass
99        // an unlocked `Stdout` rather than holding a `StdoutLock`
100        // across `.await` points. Holding the lock through the
101        // mark/sweep network round-trips would (a) make this future
102        // `!Send` for no reason and (b) block any other `println!`
103        // for the duration of the I/O. `Write` on `Stdout` already
104        // takes the lock per-call.
105        self.run_with_writer(&mut std::io::stdout()).await
106    }
107
108    async fn run_with_writer<W: Write>(&self, out: &mut W) -> Result<(), ManageError> {
109        let store_ref = self.store.as_ref();
110
111        if self.opts.mode != GcMode::SweepOnly {
112            let mark_outcome = gc::mark(store_ref, &self.prefix, gc::MarkOpts::default()).await?;
113            format_mark_outcome(out, &mark_outcome)?;
114            if mark_outcome.orphan_count != 0 {
115                info!(
116                    run_id = %mark_outcome.run_id,
117                    key = %mark_outcome.tombstone_key,
118                    "gc mark completed",
119                );
120            }
121        }
122
123        if self.opts.mode != GcMode::MarkOnly {
124            let grace_hours = gc::resolve_grace_hours(self.opts.grace_hours);
125            let sweep_outcome = gc::sweep(
126                store_ref,
127                &self.prefix,
128                gc::SweepOpts {
129                    grace_hours,
130                    force: self.opts.force,
131                },
132            )
133            .await?;
134            format_sweep_outcome(out, &sweep_outcome)?;
135        }
136        Ok(())
137    }
138}
139
140#[cfg(test)]
141mod tests {
142    use super::*;
143    use crate::git::RefName;
144    use crate::object_store::mock::MockStore;
145    use crate::packchain::manifest::write_chain;
146    use crate::packchain::schema::{ChainManifest, ChainSegment, Sha40};
147    use bytes::Bytes;
148
149    const SHA_TIP: &str = "0000000000000000000000000000000000000001";
150    const SHA_PACK_LIVE: &str = "1111111111111111111111111111111111111111";
151    const SHA_PACK_ORPHAN: &str = "2222222222222222222222222222222222222222";
152
153    fn sha40(s: &str) -> Sha40 {
154        Sha40::try_new(s).unwrap()
155    }
156
157    fn ref_main() -> RefName {
158        RefName::new("refs/heads/main").unwrap()
159    }
160
161    async fn seed_state(store: &MockStore, prefix: Option<&str>) {
162        let chain = ChainManifest {
163            v: 1,
164            tip: sha40(SHA_TIP),
165            full_at: sha40(SHA_TIP),
166            segments: vec![ChainSegment {
167                sha: sha40(SHA_TIP),
168                parent_sha: None,
169                pack: format!("packs/{SHA_PACK_LIVE}.pack"),
170                bytes: 1_024,
171            }],
172        };
173        write_chain(store, prefix, &ref_main(), &chain)
174            .await
175            .unwrap();
176        let live_pack = crate::packchain::keys::pack_key(prefix, &sha40(SHA_PACK_LIVE));
177        let live_idx = crate::packchain::keys::pack_idx_key(prefix, &sha40(SHA_PACK_LIVE));
178        store.insert(live_pack, Bytes::from_static(b"PACK"));
179        store.insert(live_idx, Bytes::from_static(b"IDX"));
180        let orphan_pack = crate::packchain::keys::pack_key(prefix, &sha40(SHA_PACK_ORPHAN));
181        let orphan_idx = crate::packchain::keys::pack_idx_key(prefix, &sha40(SHA_PACK_ORPHAN));
182        store.insert(orphan_pack, Bytes::from_static(b"PACK"));
183        store.insert(orphan_idx, Bytes::from_static(b"IDX"));
184    }
185
186    #[tokio::test]
187    async fn run_mark_only_writes_tombstone_without_sweep() {
188        let store = Arc::new(MockStore::new());
189        seed_state(&store, Some("repo")).await;
190        let gc = Gc::new(
191            Arc::clone(&store) as Arc<dyn ObjectStore>,
192            "repo",
193            GcOpts {
194                mode: GcMode::MarkOnly,
195                ..GcOpts::default()
196            },
197        );
198        gc.run().await.unwrap();
199        // Tombstone exists; orphan pack still on bucket (sweep never ran).
200        let metas = store.list("repo/gc/").await.unwrap();
201        assert_eq!(metas.len(), 1, "exactly one tombstone after mark-only");
202        store
203            .get_bytes(&format!("repo/packs/{SHA_PACK_ORPHAN}.pack"))
204            .await
205            .expect("orphan pack must survive mark-only");
206    }
207
208    #[tokio::test]
209    async fn run_sweep_only_with_force_deletes_orphans() {
210        let store = Arc::new(MockStore::new());
211        seed_state(&store, Some("repo")).await;
212        // Mark first to produce a tombstone.
213        gc::mark(store.as_ref(), "repo", gc::MarkOpts::default())
214            .await
215            .unwrap();
216        // Sweep with force: no grace, no recheck.
217        let gc = Gc::new(
218            Arc::clone(&store) as Arc<dyn ObjectStore>,
219            "repo",
220            GcOpts {
221                mode: GcMode::SweepOnly,
222                force: true,
223                ..GcOpts::default()
224            },
225        );
226        gc.run().await.unwrap();
227        // Orphan pack gone; live pack survives.
228        let err = store
229            .get_bytes(&format!("repo/packs/{SHA_PACK_ORPHAN}.pack"))
230            .await
231            .unwrap_err();
232        assert!(matches!(
233            err,
234            crate::object_store::ObjectStoreError::NotFound(_)
235        ));
236        store
237            .get_bytes(&format!("repo/packs/{SHA_PACK_LIVE}.pack"))
238            .await
239            .unwrap();
240    }
241
242    #[tokio::test]
243    async fn run_mark_then_sweep_force_round_trips() {
244        let store = Arc::new(MockStore::new());
245        seed_state(&store, Some("repo")).await;
246        let gc = Gc::new(
247            Arc::clone(&store) as Arc<dyn ObjectStore>,
248            "repo",
249            GcOpts {
250                force: true,
251                ..GcOpts::default()
252            },
253        );
254        gc.run().await.unwrap();
255        // Force sweep ran in the same call → orphan deleted.
256        let err = store
257            .get_bytes(&format!("repo/packs/{SHA_PACK_ORPHAN}.pack"))
258            .await
259            .unwrap_err();
260        assert!(matches!(
261            err,
262            crate::object_store::ObjectStoreError::NotFound(_)
263        ));
264    }
265
266    #[tokio::test]
267    async fn run_with_no_orphans_is_noop() {
268        let store = Arc::new(MockStore::new());
269        // Live chain referencing the only pack on bucket.
270        let chain = ChainManifest {
271            v: 1,
272            tip: sha40(SHA_TIP),
273            full_at: sha40(SHA_TIP),
274            segments: vec![ChainSegment {
275                sha: sha40(SHA_TIP),
276                parent_sha: None,
277                pack: format!("packs/{SHA_PACK_LIVE}.pack"),
278                bytes: 1_024,
279            }],
280        };
281        write_chain(store.as_ref(), Some("repo"), &ref_main(), &chain)
282            .await
283            .unwrap();
284        let live_pack = crate::packchain::keys::pack_key(Some("repo"), &sha40(SHA_PACK_LIVE));
285        store.insert(live_pack, Bytes::from_static(b"PACK"));
286        let gc = Gc::new(
287            Arc::clone(&store) as Arc<dyn ObjectStore>,
288            "repo",
289            GcOpts::default(),
290        );
291        gc.run().await.unwrap();
292        let metas = store.list("repo/gc/").await.unwrap();
293        assert!(metas.is_empty(), "no tombstone when no orphans");
294    }
295}