Skip to main content

git_remote_object_store/manage/
mod.rs

1//! Management CLI: `doctor`, `delete-branch`, `protect`, `unprotect`.
2//!
3//! These commands operate against the same on-bucket object layout as
4//! the helper protocol (bundles under `<prefix>/<ref>/`, `PROTECTED#`
5//! markers, lock files).
6//!
7//! The library entry points (`Doctor`, `ManageBranch`) take an
8//! [`ObjectStore`][crate::object_store::ObjectStore] and a
9//! [`Prompter`], so the binary, mock-backed unit tests, and any future
10//! non-interactive frontend share the same code path.
11
12pub mod branch;
13pub mod compact;
14pub mod doctor;
15pub mod gc;
16pub(crate) mod gc_output;
17pub(crate) mod snapshot;
18
19use std::fmt;
20use std::io;
21
22use thiserror::Error;
23
24use crate::keys;
25use crate::object_store::{ObjectMeta, ObjectStoreError};
26
27/// `fmt = ...` helper for [`ManageError::PartialDelete`]. Branches on
28/// `n_undeleted == 1` so the operator-facing wording reads "1 key" /
29/// "<N> keys" instead of always-plural "<N> keys".
30///
31/// thiserror's `fmt = path` hook calls this with each variant field
32/// passed by reference and the formatter as the final argument; we
33/// silence `clippy::{ptr_arg, trivially_copy_pass_by_ref}` because
34/// thiserror controls the signature, not us.
35#[allow(clippy::ptr_arg, clippy::trivially_copy_pass_by_ref)]
36fn fmt_partial_delete(
37    branch: &String,
38    undeleted: &Vec<String>,
39    attempted: &usize,
40    f: &mut fmt::Formatter<'_>,
41) -> fmt::Result {
42    let n = undeleted.len();
43    let noun = if n == 1 { "key" } else { "keys" };
44    write!(
45        f,
46        "delete-branch {branch} failed: {n} of {attempted} {noun} could not be deleted: {} (retry to converge)",
47        undeleted.join(", "),
48    )
49}
50
51/// Default lock TTL in seconds. Re-exported from
52/// [`crate::protocol::push::DEFAULT_LOCK_TTL_SECONDS`] so the doctor's
53/// stale-lock predicate and `acquire_lock`'s TTL cannot silently drift.
54pub use crate::protocol::push::DEFAULT_LOCK_TTL_SECONDS;
55
56/// `true` iff `key` is a lock-file key. The `.lock` suffix is a
57/// wire-format token on a case-sensitive S3/Azure key, not a filesystem
58/// extension — clippy's case-insensitive-extension hint is silenced
59/// once here so callers don't need to repeat the rationale.
60#[allow(clippy::case_sensitive_file_extension_comparisons)]
61pub(crate) fn is_lock_key(key: &str) -> bool {
62    key.ends_with(".lock")
63}
64
65/// `true` iff `entries` contains at least one key that represents real
66/// branch data — i.e. NOT a lock file and NOT a `PROTECTED#` marker.
67///
68/// A branch whose only residue is operational metadata (a stale
69/// `*.lock` or a previously-written `PROTECTED#` marker) is treated as
70/// gone for the purposes of "does the branch still exist on the
71/// bucket?" — those keys are coordination state, not user-visible
72/// branch data. Both `ManageBranch::protect` (issue #137) and
73/// `Doctor::fix_head` (issue #138) consult this helper before writing
74/// state that would otherwise re-anchor against a deleted branch.
75pub(crate) fn has_branch_data(entries: &[ObjectMeta]) -> bool {
76    entries.iter().any(|entry| {
77        let last = entry
78            .key
79            .rsplit_once('/')
80            .map_or(entry.key.as_str(), |(_, s)| s);
81        !is_lock_key(&entry.key) && !keys::is_protected_marker_segment(last)
82    })
83}
84
85/// Why a [`ManageError::StaleSnapshot`] was raised. The re-check that
86/// fires immediately before a mutating write can fail in two
87/// observably different ways, and both deserve their own operator-
88/// facing wording.
89///
90/// * [`Deleted`][StaleReason::Deleted] — the re-check saw nothing at
91///   all under the entity's prefix (or the singleton key was
92///   `NotFound`). A concurrent delete completed cleanly.
93///
94/// * [`ResidueOnly`][StaleReason::ResidueOnly] — the re-check found
95///   keys, but none of them carry branch data: only `*.lock` files
96///   and / or a `PROTECTED#` marker remain. Operational metadata can
97///   outlive user-visible branch data when a concurrent delete runs
98///   partially, and writing HEAD against that residue would re-create
99///   the invalid-HEAD condition the doctor exists to prevent.
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum StaleReason {
102    /// Nothing remains under the entity's prefix (or its singleton
103    /// key returned `NotFound`).
104    Deleted,
105    /// Only operational metadata (lock files and / or a `PROTECTED#`
106    /// marker) remains under the entity's prefix.
107    ResidueOnly,
108}
109
110impl fmt::Display for StaleReason {
111    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112        match self {
113            Self::Deleted => f.write_str("was deleted between selection and write"),
114            Self::ResidueOnly => f.write_str(
115                "is considered gone — only operational metadata \
116                 (lock files / PROTECTED# marker) remains under its prefix",
117            ),
118        }
119    }
120}
121
122/// Errors surfaced by the management surface.
123#[derive(Debug, Error)]
124pub enum ManageError {
125    /// Underlying object-store call failed.
126    #[error(transparent)]
127    Store(#[from] ObjectStoreError),
128
129    /// `delete-branch` / `protect` / `unprotect` was invoked against a
130    /// branch that has no objects under `<prefix>/refs/heads/<branch>/`.
131    #[error("branch not found: {0}")]
132    BranchNotFound(String),
133
134    /// `delete-branch` was invoked against a branch that has a
135    /// `PROTECTED#` marker. Mirrors the refusal the helper-protocol
136    /// delete path emits so both surfaces share one wording.
137    #[error(
138        "ref is protected. Run git-remote-object-store unprotect <url> <branch> to remove protection before deleting."
139    )]
140    Protected(String),
141
142    /// `delete-branch` could not acquire the per-ref `LOCK#.lock`
143    /// because another writer (a concurrent `git push` / delete /
144    /// compact) currently holds it. Mirrors the helper-protocol
145    /// push path's contention surface so both delete surfaces
146    /// converge on the same "another operation in progress" wording.
147    /// Issue #158: without this lock, a concurrent push that lands
148    /// between the post-prompt re-list and the sweep is silently
149    /// missed and the ref survives despite an apparent success.
150    #[error(
151        "could not acquire ref lock at {lock}. Another client may be pushing or deleting. If this persists beyond {ttl_seconds}s, run git-remote-object-store doctor to inspect and optionally clear stale locks."
152    )]
153    LockContended {
154        /// Branch the lock guards.
155        branch: String,
156        /// Full lock key on the bucket (`<prefix>/<ref>/LOCK#.lock`).
157        /// Operators copy this into a doctor invocation.
158        lock: String,
159        /// Lock TTL in seconds at the time of attempted acquisition,
160        /// rendered into the operator-facing message so a tuned
161        /// `GIT_REMOTE_OBJECT_STORE_LOCK_TTL_SECONDS` is visible.
162        ttl_seconds: i64,
163    },
164
165    /// `delete-branch` swept the fresh listing but one or more per-key
166    /// deletes failed with a non-`NotFound` error. The loop continues
167    /// past each transient failure so the caller has a complete
168    /// inventory of which keys survived; this variant carries that
169    /// inventory verbatim.
170    ///
171    /// Retry on the same branch is naturally idempotent — the re-list
172    /// at the start of the next `delete` call will only show the
173    /// surviving keys, and the same loop will try to delete them. A
174    /// `NotFound` mid-sweep is tolerated and counts as success, so the
175    /// `undeleted` field is strictly the set of keys whose deletes
176    /// raised something else (Network, `AccessDenied`, etc.).
177    #[error(fmt = fmt_partial_delete)]
178    PartialDelete {
179        /// Branch the sweep ran against.
180        branch: String,
181        /// Keys whose per-key delete returned a non-`NotFound` error.
182        /// Stored verbatim so a retry-by-key tool can target exactly
183        /// what survived.
184        undeleted: Vec<String>,
185        /// Total number of keys the fresh listing yielded, for the
186        /// operator-facing "N of M" framing in the error message.
187        attempted: usize,
188    },
189
190    /// Branch name failed `gix-validate`'s strict ref-name check; we
191    /// reject these at the management boundary so a value like
192    /// `foo/../bar` cannot land as a literal substring of a stored
193    /// object key.
194    #[error("invalid branch name: {0}")]
195    InvalidBranch(String),
196
197    /// User cancelled an interactive prompt via Ctrl+C or EOF. A
198    /// deliberate "no" on a confirmation prompt is not an error —
199    /// callers (`ManageBranch::delete`, `fix_multiple_bundles`) print
200    /// "Aborted" and return `Ok(())`.
201    #[error("operation cancelled")]
202    Cancelled,
203
204    /// I/O error from `dialoguer` or other non-store sources.
205    #[error(transparent)]
206    Io(#[from] io::Error),
207
208    /// A defensive invariant inside the management code was violated —
209    /// for example a snapshot map lookup that the caller had previously
210    /// proven to exist, or a prompter returning an out-of-range index.
211    /// These should not happen in practice; surfacing them as a typed
212    /// error keeps the helper from aborting the process.
213    #[error("internal management error: {0}")]
214    Internal(String),
215
216    /// `doctor`'s top-of-run snapshot disagreed with a fresh re-check
217    /// taken immediately before a mutating write — the on-bucket state
218    /// changed under us between the snapshot LIST and the write. The
219    /// canonical case (issue #138) is `fix_head` racing against a
220    /// concurrent `git push :<branch>` or `manage delete-branch`: the
221    /// operator picks a HEAD candidate from the snapshot, but by the
222    /// time the prompt returns the chosen branch has been deleted.
223    /// Writing HEAD anyway would reproduce the invalid-HEAD condition
224    /// the doctor was trying to fix.
225    ///
226    /// Carries the entity whose presence was re-verified (e.g.
227    /// `"refs/heads/main"`) and a [`StaleReason`] describing exactly
228    /// what the re-check observed, so the operator-facing message names
229    /// the branch and tells them to re-run the doctor.
230    #[error("doctor snapshot is stale: {entity} {reason}; re-run doctor")]
231    StaleSnapshot {
232        /// The ref-path or other entity whose presence was re-verified.
233        entity: String,
234        /// What the re-check actually saw under that entity's prefix.
235        reason: StaleReason,
236    },
237
238    /// Packchain engine surface error. Surfaced by the `doctor`'s
239    /// engine-aware audit path. Carries the typed source so the
240    /// `main`-level downcast can recognise transport failures and
241    /// emit the categorical `fatal:` line.
242    #[error(transparent)]
243    Packchain(#[from] crate::packchain::PackchainError),
244}
245
246/// Interactive UI surface used by [`doctor`] and [`branch`].
247///
248/// Production binaries inject [`DialoguerPrompter`]; tests inject
249/// `ScriptedPrompter` (gated on `test-util`) so prompt-driven flows
250/// can be exercised deterministically without spawning the binary.
251pub trait Prompter: Send + Sync {
252    /// Ask the user to pick one of `options` by index. `prompt` is the
253    /// short headline shown above the choices.
254    ///
255    /// # Errors
256    ///
257    /// Returns [`ManageError::Cancelled`] if the user aborts (Ctrl+C or
258    /// EOF), or [`ManageError::Io`] for underlying I/O failures.
259    fn select(&self, prompt: &str, options: &[String]) -> Result<usize, ManageError>;
260
261    /// Ask the user a yes/no question. Returns `Ok(true)` for "yes" and
262    /// `Ok(false)` for "no".
263    ///
264    /// # Errors
265    ///
266    /// Returns [`ManageError::Cancelled`] on EOF or signal, or
267    /// [`ManageError::Io`] for underlying I/O failures.
268    fn confirm(&self, prompt: &str) -> Result<bool, ManageError>;
269}
270
271/// Default [`Prompter`] backed by the `dialoguer` crate.
272///
273/// Each method runs synchronously on the calling thread. Callers driving
274/// the prompter from a `tokio::main` runtime should wrap calls in
275/// [`tokio::task::spawn_blocking`] when responsiveness matters; the
276/// management CLI today drives prompts serially between async I/O calls,
277/// so a brief blocking read is acceptable.
278#[derive(Debug, Default, Clone, Copy)]
279pub struct DialoguerPrompter;
280
281impl Prompter for DialoguerPrompter {
282    fn select(&self, prompt: &str, options: &[String]) -> Result<usize, ManageError> {
283        Ok(dialoguer::Select::new()
284            .with_prompt(prompt)
285            .items(options)
286            .default(0)
287            .interact()?)
288    }
289
290    fn confirm(&self, prompt: &str) -> Result<bool, ManageError> {
291        Ok(dialoguer::Confirm::new()
292            .with_prompt(prompt)
293            .default(false)
294            .interact()?)
295    }
296}
297
298impl From<dialoguer::Error> for ManageError {
299    fn from(err: dialoguer::Error) -> Self {
300        match err {
301            dialoguer::Error::IO(io_err) if io_err.kind() == io::ErrorKind::Interrupted => {
302                ManageError::Cancelled
303            }
304            dialoguer::Error::IO(io_err) => ManageError::Io(io_err),
305        }
306    }
307}
308
309#[cfg(any(test, feature = "test-util"))]
310pub use scripted::ScriptedPrompter;
311
312#[cfg(any(test, feature = "test-util"))]
313mod scripted {
314    use std::collections::VecDeque;
315    use std::sync::Mutex;
316
317    use super::{ManageError, Prompter};
318
319    /// Test-only [`Prompter`] that returns a queued answer for each prompt.
320    ///
321    /// Construct with [`ScriptedPrompter::new`], then drive one answer per
322    /// call. Running out of answers returns [`ManageError::Cancelled`] —
323    /// tests should queue exactly the answers they expect, so an unexpected
324    /// extra prompt fails loudly.
325    pub struct ScriptedPrompter {
326        answers: Mutex<VecDeque<Answer>>,
327    }
328
329    /// One queued response in a [`ScriptedPrompter`] script.
330    #[derive(Debug, Clone)]
331    pub enum Answer {
332        /// Reply to a `select` prompt with this index.
333        Select(usize),
334        /// Reply to a `confirm` prompt with this boolean.
335        Confirm(bool),
336        /// Treat the next prompt as cancelled.
337        Cancel,
338    }
339
340    impl ScriptedPrompter {
341        /// Build a prompter that returns `answers` in order.
342        #[must_use]
343        pub fn new(answers: impl IntoIterator<Item = Answer>) -> Self {
344            Self {
345                answers: Mutex::new(answers.into_iter().collect()),
346            }
347        }
348
349        /// Number of queued answers not yet consumed. Tests assert this is
350        /// `0` to catch over-armed scripts.
351        ///
352        /// # Panics
353        ///
354        /// Panics if the inner mutex was poisoned by a previous panic
355        /// while holding the lock.
356        #[must_use]
357        pub fn remaining(&self) -> usize {
358            self.answers.lock().expect("scripted mutex poisoned").len()
359        }
360
361        fn pop(&self) -> Result<Answer, ManageError> {
362            self.answers
363                .lock()
364                .expect("scripted mutex poisoned")
365                .pop_front()
366                .ok_or(ManageError::Cancelled)
367        }
368    }
369
370    impl Prompter for ScriptedPrompter {
371        fn select(&self, _prompt: &str, _options: &[String]) -> Result<usize, ManageError> {
372            match self.pop()? {
373                Answer::Select(i) => Ok(i),
374                Answer::Cancel => Err(ManageError::Cancelled),
375                Answer::Confirm(_) => panic!("expected Select answer, got Confirm"),
376            }
377        }
378
379        fn confirm(&self, _prompt: &str) -> Result<bool, ManageError> {
380            match self.pop()? {
381                Answer::Confirm(b) => Ok(b),
382                Answer::Cancel => Err(ManageError::Cancelled),
383                Answer::Select(_) => panic!("expected Confirm answer, got Select"),
384            }
385        }
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392
393    // Issue #199: `ManageError::StaleSnapshot` is raised from two
394    // observably different conditions. The Display must distinguish
395    // them so the operator-facing wording matches the on-bucket
396    // reality. These two tests pin the wording for each branch — if a
397    // future refactor collapses them back into a single string, the
398    // tests fail loudly.
399    #[test]
400    fn stale_snapshot_deleted_display_names_branch_and_uses_deleted_wording() {
401        let err = ManageError::StaleSnapshot {
402            entity: "refs/heads/main".to_owned(),
403            reason: StaleReason::Deleted,
404        };
405        let rendered = err.to_string();
406        assert!(
407            rendered.contains("refs/heads/main"),
408            "Display must name the entity: {rendered}",
409        );
410        assert!(
411            rendered.contains("was deleted between selection and write"),
412            "Deleted branch must use the 'was deleted' wording: {rendered}",
413        );
414        assert!(
415            rendered.contains("re-run doctor"),
416            "Display must instruct the operator to re-run: {rendered}",
417        );
418    }
419
420    #[test]
421    fn stale_snapshot_residue_only_display_names_branch_and_uses_residue_wording() {
422        let err = ManageError::StaleSnapshot {
423            entity: "refs/heads/main".to_owned(),
424            reason: StaleReason::ResidueOnly,
425        };
426        let rendered = err.to_string();
427        assert!(
428            rendered.contains("refs/heads/main"),
429            "Display must name the entity: {rendered}",
430        );
431        assert!(
432            rendered.contains("only operational metadata"),
433            "ResidueOnly must mention operational metadata: {rendered}",
434        );
435        assert!(
436            rendered.contains("PROTECTED# marker"),
437            "ResidueOnly must mention the PROTECTED# marker: {rendered}",
438        );
439        assert!(
440            !rendered.contains("was deleted between selection and write"),
441            "ResidueOnly must NOT use the 'was deleted' wording — that's \
442             precisely the bug issue #199 fixed: {rendered}",
443        );
444        assert!(
445            rendered.contains("re-run doctor"),
446            "Display must instruct the operator to re-run: {rendered}",
447        );
448    }
449}