git_remote_object_store/manage/mod.rs
1//! Management CLI: `doctor`, `delete-branch`, `protect`, `unprotect`.
2//!
3//! These commands operate against the same on-bucket object layout as
4//! the helper protocol (bundles under `<prefix>/<ref>/`, `PROTECTED#`
5//! markers, lock files).
6//!
7//! The library entry points (`Doctor`, `ManageBranch`) take an
8//! [`ObjectStore`][crate::object_store::ObjectStore] and a
9//! [`Prompter`], so the binary, mock-backed unit tests, and any future
10//! non-interactive frontend share the same code path.
11
12pub mod branch;
13pub mod compact;
14pub mod doctor;
15pub mod gc;
16pub(crate) mod gc_output;
17pub(crate) mod snapshot;
18
19use std::fmt;
20use std::io;
21
22use thiserror::Error;
23
24use crate::keys;
25use crate::object_store::{ObjectMeta, ObjectStoreError};
26
27/// `fmt = ...` helper for [`ManageError::PartialDelete`]. Branches on
28/// `n_undeleted == 1` so the operator-facing wording reads "1 key" /
29/// "<N> keys" instead of always-plural "<N> keys".
30///
31/// thiserror's `fmt = path` hook calls this with each variant field
32/// passed by reference and the formatter as the final argument; we
33/// silence `clippy::{ptr_arg, trivially_copy_pass_by_ref}` because
34/// thiserror controls the signature, not us.
35#[allow(clippy::ptr_arg, clippy::trivially_copy_pass_by_ref)]
36fn fmt_partial_delete(
37 branch: &String,
38 undeleted: &Vec<String>,
39 attempted: &usize,
40 f: &mut fmt::Formatter<'_>,
41) -> fmt::Result {
42 let n = undeleted.len();
43 let noun = if n == 1 { "key" } else { "keys" };
44 write!(
45 f,
46 "delete-branch {branch} failed: {n} of {attempted} {noun} could not be deleted: {} (retry to converge)",
47 undeleted.join(", "),
48 )
49}
50
51/// Default lock TTL in seconds. Re-exported from
52/// [`crate::protocol::push::DEFAULT_LOCK_TTL_SECONDS`] so the doctor's
53/// stale-lock predicate and `acquire_lock`'s TTL cannot silently drift.
54pub use crate::protocol::push::DEFAULT_LOCK_TTL_SECONDS;
55
56/// `true` iff `key` is a lock-file key. The `.lock` suffix is a
57/// wire-format token on a case-sensitive S3/Azure key, not a filesystem
58/// extension — clippy's case-insensitive-extension hint is silenced
59/// once here so callers don't need to repeat the rationale.
60#[allow(clippy::case_sensitive_file_extension_comparisons)]
61pub(crate) fn is_lock_key(key: &str) -> bool {
62 key.ends_with(".lock")
63}
64
65/// `true` iff `entries` contains at least one key that represents real
66/// branch data — i.e. NOT a lock file and NOT a `PROTECTED#` marker.
67///
68/// A branch whose only residue is operational metadata (a stale
69/// `*.lock` or a previously-written `PROTECTED#` marker) is treated as
70/// gone for the purposes of "does the branch still exist on the
71/// bucket?" — those keys are coordination state, not user-visible
72/// branch data. Both `ManageBranch::protect` (issue #137) and
73/// `Doctor::fix_head` (issue #138) consult this helper before writing
74/// state that would otherwise re-anchor against a deleted branch.
75pub(crate) fn has_branch_data(entries: &[ObjectMeta]) -> bool {
76 entries.iter().any(|entry| {
77 let last = entry
78 .key
79 .rsplit_once('/')
80 .map_or(entry.key.as_str(), |(_, s)| s);
81 !is_lock_key(&entry.key) && !keys::is_protected_marker_segment(last)
82 })
83}
84
85/// Why a [`ManageError::StaleSnapshot`] was raised. The re-check that
86/// fires immediately before a mutating write can fail in two
87/// observably different ways, and both deserve their own operator-
88/// facing wording.
89///
90/// * [`Deleted`][StaleReason::Deleted] — the re-check saw nothing at
91/// all under the entity's prefix (or the singleton key was
92/// `NotFound`). A concurrent delete completed cleanly.
93///
94/// * [`ResidueOnly`][StaleReason::ResidueOnly] — the re-check found
95/// keys, but none of them carry branch data: only `*.lock` files
96/// and / or a `PROTECTED#` marker remain. Operational metadata can
97/// outlive user-visible branch data when a concurrent delete runs
98/// partially, and writing HEAD against that residue would re-create
99/// the invalid-HEAD condition the doctor exists to prevent.
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub enum StaleReason {
102 /// Nothing remains under the entity's prefix (or its singleton
103 /// key returned `NotFound`).
104 Deleted,
105 /// Only operational metadata (lock files and / or a `PROTECTED#`
106 /// marker) remains under the entity's prefix.
107 ResidueOnly,
108}
109
110impl fmt::Display for StaleReason {
111 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
112 match self {
113 Self::Deleted => f.write_str("was deleted between selection and write"),
114 Self::ResidueOnly => f.write_str(
115 "is considered gone — only operational metadata \
116 (lock files / PROTECTED# marker) remains under its prefix",
117 ),
118 }
119 }
120}
121
122/// Errors surfaced by the management surface.
123#[derive(Debug, Error)]
124pub enum ManageError {
125 /// Underlying object-store call failed.
126 #[error(transparent)]
127 Store(#[from] ObjectStoreError),
128
129 /// `delete-branch` / `protect` / `unprotect` was invoked against a
130 /// branch that has no objects under `<prefix>/refs/heads/<branch>/`.
131 #[error("branch not found: {0}")]
132 BranchNotFound(String),
133
134 /// `delete-branch` was invoked against a branch that has a
135 /// `PROTECTED#` marker. Mirrors the refusal the helper-protocol
136 /// delete path emits so both surfaces share one wording.
137 #[error(
138 "ref is protected. Run git-remote-object-store unprotect <url> <branch> to remove protection before deleting."
139 )]
140 Protected(String),
141
142 /// `delete-branch` could not acquire the per-ref `LOCK#.lock`
143 /// because another writer (a concurrent `git push` / delete /
144 /// compact) currently holds it. Mirrors the helper-protocol
145 /// push path's contention surface so both delete surfaces
146 /// converge on the same "another operation in progress" wording.
147 /// Issue #158: without this lock, a concurrent push that lands
148 /// between the post-prompt re-list and the sweep is silently
149 /// missed and the ref survives despite an apparent success.
150 #[error(
151 "could not acquire ref lock at {lock}. Another client may be pushing or deleting. If this persists beyond {ttl_seconds}s, run git-remote-object-store doctor to inspect and optionally clear stale locks."
152 )]
153 LockContended {
154 /// Branch the lock guards.
155 branch: String,
156 /// Full lock key on the bucket (`<prefix>/<ref>/LOCK#.lock`).
157 /// Operators copy this into a doctor invocation.
158 lock: String,
159 /// Lock TTL in seconds at the time of attempted acquisition,
160 /// rendered into the operator-facing message so a tuned
161 /// `GIT_REMOTE_OBJECT_STORE_LOCK_TTL_SECONDS` is visible.
162 ttl_seconds: i64,
163 },
164
165 /// `delete-branch` swept the fresh listing but one or more per-key
166 /// deletes failed with a non-`NotFound` error. The loop continues
167 /// past each transient failure so the caller has a complete
168 /// inventory of which keys survived; this variant carries that
169 /// inventory verbatim.
170 ///
171 /// Retry on the same branch is naturally idempotent — the re-list
172 /// at the start of the next `delete` call will only show the
173 /// surviving keys, and the same loop will try to delete them. A
174 /// `NotFound` mid-sweep is tolerated and counts as success, so the
175 /// `undeleted` field is strictly the set of keys whose deletes
176 /// raised something else (Network, `AccessDenied`, etc.).
177 #[error(fmt = fmt_partial_delete)]
178 PartialDelete {
179 /// Branch the sweep ran against.
180 branch: String,
181 /// Keys whose per-key delete returned a non-`NotFound` error.
182 /// Stored verbatim so a retry-by-key tool can target exactly
183 /// what survived.
184 undeleted: Vec<String>,
185 /// Total number of keys the fresh listing yielded, for the
186 /// operator-facing "N of M" framing in the error message.
187 attempted: usize,
188 },
189
190 /// Branch name failed `gix-validate`'s strict ref-name check; we
191 /// reject these at the management boundary so a value like
192 /// `foo/../bar` cannot land as a literal substring of a stored
193 /// object key.
194 #[error("invalid branch name: {0}")]
195 InvalidBranch(String),
196
197 /// User cancelled an interactive prompt via Ctrl+C or EOF. A
198 /// deliberate "no" on a confirmation prompt is not an error —
199 /// callers (`ManageBranch::delete`, `fix_multiple_bundles`) print
200 /// "Aborted" and return `Ok(())`.
201 #[error("operation cancelled")]
202 Cancelled,
203
204 /// I/O error from `dialoguer` or other non-store sources.
205 #[error(transparent)]
206 Io(#[from] io::Error),
207
208 /// A defensive invariant inside the management code was violated —
209 /// for example a snapshot map lookup that the caller had previously
210 /// proven to exist, or a prompter returning an out-of-range index.
211 /// These should not happen in practice; surfacing them as a typed
212 /// error keeps the helper from aborting the process.
213 #[error("internal management error: {0}")]
214 Internal(String),
215
216 /// `doctor`'s top-of-run snapshot disagreed with a fresh re-check
217 /// taken immediately before a mutating write — the on-bucket state
218 /// changed under us between the snapshot LIST and the write. The
219 /// canonical case (issue #138) is `fix_head` racing against a
220 /// concurrent `git push :<branch>` or `manage delete-branch`: the
221 /// operator picks a HEAD candidate from the snapshot, but by the
222 /// time the prompt returns the chosen branch has been deleted.
223 /// Writing HEAD anyway would reproduce the invalid-HEAD condition
224 /// the doctor was trying to fix.
225 ///
226 /// Carries the entity whose presence was re-verified (e.g.
227 /// `"refs/heads/main"`) and a [`StaleReason`] describing exactly
228 /// what the re-check observed, so the operator-facing message names
229 /// the branch and tells them to re-run the doctor.
230 #[error("doctor snapshot is stale: {entity} {reason}; re-run doctor")]
231 StaleSnapshot {
232 /// The ref-path or other entity whose presence was re-verified.
233 entity: String,
234 /// What the re-check actually saw under that entity's prefix.
235 reason: StaleReason,
236 },
237
238 /// Packchain engine surface error. Surfaced by the `doctor`'s
239 /// engine-aware audit path. Carries the typed source so the
240 /// `main`-level downcast can recognise transport failures and
241 /// emit the categorical `fatal:` line.
242 #[error(transparent)]
243 Packchain(#[from] crate::packchain::PackchainError),
244}
245
246/// Interactive UI surface used by [`doctor`] and [`branch`].
247///
248/// Production binaries inject [`DialoguerPrompter`]; tests inject
249/// `ScriptedPrompter` (gated on `test-util`) so prompt-driven flows
250/// can be exercised deterministically without spawning the binary.
251pub trait Prompter: Send + Sync {
252 /// Ask the user to pick one of `options` by index. `prompt` is the
253 /// short headline shown above the choices.
254 ///
255 /// # Errors
256 ///
257 /// Returns [`ManageError::Cancelled`] if the user aborts (Ctrl+C or
258 /// EOF), or [`ManageError::Io`] for underlying I/O failures.
259 fn select(&self, prompt: &str, options: &[String]) -> Result<usize, ManageError>;
260
261 /// Ask the user a yes/no question. Returns `Ok(true)` for "yes" and
262 /// `Ok(false)` for "no".
263 ///
264 /// # Errors
265 ///
266 /// Returns [`ManageError::Cancelled`] on EOF or signal, or
267 /// [`ManageError::Io`] for underlying I/O failures.
268 fn confirm(&self, prompt: &str) -> Result<bool, ManageError>;
269}
270
271/// Default [`Prompter`] backed by the `dialoguer` crate.
272///
273/// Each method runs synchronously on the calling thread. Callers driving
274/// the prompter from a `tokio::main` runtime should wrap calls in
275/// [`tokio::task::spawn_blocking`] when responsiveness matters; the
276/// management CLI today drives prompts serially between async I/O calls,
277/// so a brief blocking read is acceptable.
278#[derive(Debug, Default, Clone, Copy)]
279pub struct DialoguerPrompter;
280
281impl Prompter for DialoguerPrompter {
282 fn select(&self, prompt: &str, options: &[String]) -> Result<usize, ManageError> {
283 Ok(dialoguer::Select::new()
284 .with_prompt(prompt)
285 .items(options)
286 .default(0)
287 .interact()?)
288 }
289
290 fn confirm(&self, prompt: &str) -> Result<bool, ManageError> {
291 Ok(dialoguer::Confirm::new()
292 .with_prompt(prompt)
293 .default(false)
294 .interact()?)
295 }
296}
297
298impl From<dialoguer::Error> for ManageError {
299 fn from(err: dialoguer::Error) -> Self {
300 match err {
301 dialoguer::Error::IO(io_err) if io_err.kind() == io::ErrorKind::Interrupted => {
302 ManageError::Cancelled
303 }
304 dialoguer::Error::IO(io_err) => ManageError::Io(io_err),
305 }
306 }
307}
308
309#[cfg(any(test, feature = "test-util"))]
310pub use scripted::ScriptedPrompter;
311
312#[cfg(any(test, feature = "test-util"))]
313mod scripted {
314 use std::collections::VecDeque;
315 use std::sync::Mutex;
316
317 use super::{ManageError, Prompter};
318
319 /// Test-only [`Prompter`] that returns a queued answer for each prompt.
320 ///
321 /// Construct with [`ScriptedPrompter::new`], then drive one answer per
322 /// call. Running out of answers returns [`ManageError::Cancelled`] —
323 /// tests should queue exactly the answers they expect, so an unexpected
324 /// extra prompt fails loudly.
325 pub struct ScriptedPrompter {
326 answers: Mutex<VecDeque<Answer>>,
327 }
328
329 /// One queued response in a [`ScriptedPrompter`] script.
330 #[derive(Debug, Clone)]
331 pub enum Answer {
332 /// Reply to a `select` prompt with this index.
333 Select(usize),
334 /// Reply to a `confirm` prompt with this boolean.
335 Confirm(bool),
336 /// Treat the next prompt as cancelled.
337 Cancel,
338 }
339
340 impl ScriptedPrompter {
341 /// Build a prompter that returns `answers` in order.
342 #[must_use]
343 pub fn new(answers: impl IntoIterator<Item = Answer>) -> Self {
344 Self {
345 answers: Mutex::new(answers.into_iter().collect()),
346 }
347 }
348
349 /// Number of queued answers not yet consumed. Tests assert this is
350 /// `0` to catch over-armed scripts.
351 ///
352 /// # Panics
353 ///
354 /// Panics if the inner mutex was poisoned by a previous panic
355 /// while holding the lock.
356 #[must_use]
357 pub fn remaining(&self) -> usize {
358 self.answers.lock().expect("scripted mutex poisoned").len()
359 }
360
361 fn pop(&self) -> Result<Answer, ManageError> {
362 self.answers
363 .lock()
364 .expect("scripted mutex poisoned")
365 .pop_front()
366 .ok_or(ManageError::Cancelled)
367 }
368 }
369
370 impl Prompter for ScriptedPrompter {
371 fn select(&self, _prompt: &str, _options: &[String]) -> Result<usize, ManageError> {
372 match self.pop()? {
373 Answer::Select(i) => Ok(i),
374 Answer::Cancel => Err(ManageError::Cancelled),
375 Answer::Confirm(_) => panic!("expected Select answer, got Confirm"),
376 }
377 }
378
379 fn confirm(&self, _prompt: &str) -> Result<bool, ManageError> {
380 match self.pop()? {
381 Answer::Confirm(b) => Ok(b),
382 Answer::Cancel => Err(ManageError::Cancelled),
383 Answer::Select(_) => panic!("expected Confirm answer, got Select"),
384 }
385 }
386 }
387}
388
389#[cfg(test)]
390mod tests {
391 use super::*;
392
393 // Issue #199: `ManageError::StaleSnapshot` is raised from two
394 // observably different conditions. The Display must distinguish
395 // them so the operator-facing wording matches the on-bucket
396 // reality. These two tests pin the wording for each branch — if a
397 // future refactor collapses them back into a single string, the
398 // tests fail loudly.
399 #[test]
400 fn stale_snapshot_deleted_display_names_branch_and_uses_deleted_wording() {
401 let err = ManageError::StaleSnapshot {
402 entity: "refs/heads/main".to_owned(),
403 reason: StaleReason::Deleted,
404 };
405 let rendered = err.to_string();
406 assert!(
407 rendered.contains("refs/heads/main"),
408 "Display must name the entity: {rendered}",
409 );
410 assert!(
411 rendered.contains("was deleted between selection and write"),
412 "Deleted branch must use the 'was deleted' wording: {rendered}",
413 );
414 assert!(
415 rendered.contains("re-run doctor"),
416 "Display must instruct the operator to re-run: {rendered}",
417 );
418 }
419
420 #[test]
421 fn stale_snapshot_residue_only_display_names_branch_and_uses_residue_wording() {
422 let err = ManageError::StaleSnapshot {
423 entity: "refs/heads/main".to_owned(),
424 reason: StaleReason::ResidueOnly,
425 };
426 let rendered = err.to_string();
427 assert!(
428 rendered.contains("refs/heads/main"),
429 "Display must name the entity: {rendered}",
430 );
431 assert!(
432 rendered.contains("only operational metadata"),
433 "ResidueOnly must mention operational metadata: {rendered}",
434 );
435 assert!(
436 rendered.contains("PROTECTED# marker"),
437 "ResidueOnly must mention the PROTECTED# marker: {rendered}",
438 );
439 assert!(
440 !rendered.contains("was deleted between selection and write"),
441 "ResidueOnly must NOT use the 'was deleted' wording — that's \
442 precisely the bug issue #199 fixed: {rendered}",
443 );
444 assert!(
445 rendered.contains("re-run doctor"),
446 "Display must instruct the operator to re-run: {rendered}",
447 );
448 }
449}