git-remote-object-store 0.2.4

Git remote helper backed by cloud object stores (S3, Azure Blob Storage)
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
//! Management CLI: `doctor`, `delete-branch`, `protect`, `unprotect`.
//!
//! These commands operate against the same on-bucket object layout as
//! the helper protocol (bundles under `<prefix>/<ref>/`, `PROTECTED#`
//! markers, lock files).
//!
//! The library entry points (`Doctor`, `ManageBranch`) take an
//! [`ObjectStore`][crate::object_store::ObjectStore] and a
//! [`Prompter`], so the binary, mock-backed unit tests, and any future
//! non-interactive frontend share the same code path.

pub mod branch;
pub mod compact;
pub mod doctor;
pub mod gc;
pub(crate) mod gc_output;
pub(crate) mod snapshot;

use std::fmt;
use std::io;

use thiserror::Error;

use crate::keys;
use crate::object_store::{ObjectMeta, ObjectStoreError};

/// `fmt = ...` helper for [`ManageError::PartialDelete`]. Branches on
/// `n_undeleted == 1` so the operator-facing wording reads "1 key" /
/// "<N> keys" instead of always-plural "<N> keys".
///
/// thiserror's `fmt = path` hook calls this with each variant field
/// passed by reference and the formatter as the final argument; we
/// silence `clippy::{ptr_arg, trivially_copy_pass_by_ref}` because
/// thiserror controls the signature, not us.
#[allow(clippy::ptr_arg, clippy::trivially_copy_pass_by_ref)]
fn fmt_partial_delete(
    branch: &String,
    undeleted: &Vec<String>,
    attempted: &usize,
    f: &mut fmt::Formatter<'_>,
) -> fmt::Result {
    let n = undeleted.len();
    let noun = if n == 1 { "key" } else { "keys" };
    write!(
        f,
        "delete-branch {branch} failed: {n} of {attempted} {noun} could not be deleted: {} (retry to converge)",
        undeleted.join(", "),
    )
}

/// Default lock TTL in seconds. Re-exported from
/// [`crate::protocol::push::DEFAULT_LOCK_TTL_SECONDS`] so the doctor's
/// stale-lock predicate and `acquire_lock`'s TTL cannot silently drift.
pub use crate::protocol::push::DEFAULT_LOCK_TTL_SECONDS;

/// `true` iff `key` is a lock-file key. The `.lock` suffix is a
/// wire-format token on a case-sensitive S3/Azure key, not a filesystem
/// extension — clippy's case-insensitive-extension hint is silenced
/// once here so callers don't need to repeat the rationale.
#[allow(clippy::case_sensitive_file_extension_comparisons)]
pub(crate) fn is_lock_key(key: &str) -> bool {
    key.ends_with(".lock")
}

/// `true` iff `entries` contains at least one key that represents real
/// branch data — i.e. NOT a lock file and NOT a `PROTECTED#` marker.
///
/// A branch whose only residue is operational metadata (a stale
/// `*.lock` or a previously-written `PROTECTED#` marker) is treated as
/// gone for the purposes of "does the branch still exist on the
/// bucket?" — those keys are coordination state, not user-visible
/// branch data. Both `ManageBranch::protect` (issue #137) and
/// `Doctor::fix_head` (issue #138) consult this helper before writing
/// state that would otherwise re-anchor against a deleted branch.
pub(crate) fn has_branch_data(entries: &[ObjectMeta]) -> bool {
    entries.iter().any(|entry| {
        let last = entry
            .key
            .rsplit_once('/')
            .map_or(entry.key.as_str(), |(_, s)| s);
        !is_lock_key(&entry.key) && !keys::is_protected_marker_segment(last)
    })
}

/// Why a [`ManageError::StaleSnapshot`] was raised. The re-check that
/// fires immediately before a mutating write can fail in two
/// observably different ways, and both deserve their own operator-
/// facing wording.
///
/// * [`Deleted`][StaleReason::Deleted] — the re-check saw nothing at
///   all under the entity's prefix (or the singleton key was
///   `NotFound`). A concurrent delete completed cleanly.
///
/// * [`ResidueOnly`][StaleReason::ResidueOnly] — the re-check found
///   keys, but none of them carry branch data: only `*.lock` files
///   and / or a `PROTECTED#` marker remain. Operational metadata can
///   outlive user-visible branch data when a concurrent delete runs
///   partially, and writing HEAD against that residue would re-create
///   the invalid-HEAD condition the doctor exists to prevent.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum StaleReason {
    /// Nothing remains under the entity's prefix (or its singleton
    /// key returned `NotFound`).
    Deleted,
    /// Only operational metadata (lock files and / or a `PROTECTED#`
    /// marker) remains under the entity's prefix.
    ResidueOnly,
}

impl fmt::Display for StaleReason {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Deleted => f.write_str("was deleted between selection and write"),
            Self::ResidueOnly => f.write_str(
                "is considered gone — only operational metadata \
                 (lock files / PROTECTED# marker) remains under its prefix",
            ),
        }
    }
}

/// Errors surfaced by the management surface.
#[derive(Debug, Error)]
pub enum ManageError {
    /// Underlying object-store call failed.
    #[error(transparent)]
    Store(#[from] ObjectStoreError),

    /// `delete-branch` / `protect` / `unprotect` was invoked against a
    /// branch that has no objects under `<prefix>/refs/heads/<branch>/`.
    #[error("branch not found: {0}")]
    BranchNotFound(String),

    /// `delete-branch` was invoked against a branch that has a
    /// `PROTECTED#` marker. Mirrors the refusal the helper-protocol
    /// delete path emits so both surfaces share one wording.
    #[error(
        "ref is protected. Run git-remote-object-store unprotect <url> <branch> to remove protection before deleting."
    )]
    Protected(String),

    /// `delete-branch` could not acquire the per-ref `LOCK#.lock`
    /// because another writer (a concurrent `git push` / delete /
    /// compact) currently holds it. Mirrors the helper-protocol
    /// push path's contention surface so both delete surfaces
    /// converge on the same "another operation in progress" wording.
    /// Issue #158: without this lock, a concurrent push that lands
    /// between the post-prompt re-list and the sweep is silently
    /// missed and the ref survives despite an apparent success.
    #[error(
        "could not acquire ref lock at {lock}. Another client may be pushing or deleting. If this persists beyond {ttl_seconds}s, run git-remote-object-store doctor to inspect and optionally clear stale locks."
    )]
    LockContended {
        /// Branch the lock guards.
        branch: String,
        /// Full lock key on the bucket (`<prefix>/<ref>/LOCK#.lock`).
        /// Operators copy this into a doctor invocation.
        lock: String,
        /// Lock TTL in seconds at the time of attempted acquisition,
        /// rendered into the operator-facing message so a tuned
        /// `GIT_REMOTE_OBJECT_STORE_LOCK_TTL_SECONDS` is visible.
        ttl_seconds: i64,
    },

    /// `delete-branch` swept the fresh listing but one or more per-key
    /// deletes failed with a non-`NotFound` error. The loop continues
    /// past each transient failure so the caller has a complete
    /// inventory of which keys survived; this variant carries that
    /// inventory verbatim.
    ///
    /// Retry on the same branch is naturally idempotent — the re-list
    /// at the start of the next `delete` call will only show the
    /// surviving keys, and the same loop will try to delete them. A
    /// `NotFound` mid-sweep is tolerated and counts as success, so the
    /// `undeleted` field is strictly the set of keys whose deletes
    /// raised something else (Network, `AccessDenied`, etc.).
    #[error(fmt = fmt_partial_delete)]
    PartialDelete {
        /// Branch the sweep ran against.
        branch: String,
        /// Keys whose per-key delete returned a non-`NotFound` error.
        /// Stored verbatim so a retry-by-key tool can target exactly
        /// what survived.
        undeleted: Vec<String>,
        /// Total number of keys the fresh listing yielded, for the
        /// operator-facing "N of M" framing in the error message.
        attempted: usize,
    },

    /// Branch name failed `gix-validate`'s strict ref-name check; we
    /// reject these at the management boundary so a value like
    /// `foo/../bar` cannot land as a literal substring of a stored
    /// object key.
    #[error("invalid branch name: {0}")]
    InvalidBranch(String),

    /// User cancelled an interactive prompt via Ctrl+C or EOF. A
    /// deliberate "no" on a confirmation prompt is not an error —
    /// callers (`ManageBranch::delete`, `fix_multiple_bundles`) print
    /// "Aborted" and return `Ok(())`.
    #[error("operation cancelled")]
    Cancelled,

    /// I/O error from `dialoguer` or other non-store sources.
    #[error(transparent)]
    Io(#[from] io::Error),

    /// A defensive invariant inside the management code was violated —
    /// for example a snapshot map lookup that the caller had previously
    /// proven to exist, or a prompter returning an out-of-range index.
    /// These should not happen in practice; surfacing them as a typed
    /// error keeps the helper from aborting the process.
    #[error("internal management error: {0}")]
    Internal(String),

    /// `doctor`'s top-of-run snapshot disagreed with a fresh re-check
    /// taken immediately before a mutating write — the on-bucket state
    /// changed under us between the snapshot LIST and the write. The
    /// canonical case (issue #138) is `fix_head` racing against a
    /// concurrent `git push :<branch>` or `manage delete-branch`: the
    /// operator picks a HEAD candidate from the snapshot, but by the
    /// time the prompt returns the chosen branch has been deleted.
    /// Writing HEAD anyway would reproduce the invalid-HEAD condition
    /// the doctor was trying to fix.
    ///
    /// Carries the entity whose presence was re-verified (e.g.
    /// `"refs/heads/main"`) and a [`StaleReason`] describing exactly
    /// what the re-check observed, so the operator-facing message names
    /// the branch and tells them to re-run the doctor.
    #[error("doctor snapshot is stale: {entity} {reason}; re-run doctor")]
    StaleSnapshot {
        /// The ref-path or other entity whose presence was re-verified.
        entity: String,
        /// What the re-check actually saw under that entity's prefix.
        reason: StaleReason,
    },

    /// Packchain engine surface error. Surfaced by the `doctor`'s
    /// engine-aware audit path. Carries the typed source so the
    /// `main`-level downcast can recognise transport failures and
    /// emit the categorical `fatal:` line.
    #[error(transparent)]
    Packchain(#[from] crate::packchain::PackchainError),
}

/// Interactive UI surface used by [`doctor`] and [`branch`].
///
/// Production binaries inject [`DialoguerPrompter`]; tests inject
/// `ScriptedPrompter` (gated on `test-util`) so prompt-driven flows
/// can be exercised deterministically without spawning the binary.
pub trait Prompter: Send + Sync {
    /// Ask the user to pick one of `options` by index. `prompt` is the
    /// short headline shown above the choices.
    ///
    /// # Errors
    ///
    /// Returns [`ManageError::Cancelled`] if the user aborts (Ctrl+C or
    /// EOF), or [`ManageError::Io`] for underlying I/O failures.
    fn select(&self, prompt: &str, options: &[String]) -> Result<usize, ManageError>;

    /// Ask the user a yes/no question. Returns `Ok(true)` for "yes" and
    /// `Ok(false)` for "no".
    ///
    /// # Errors
    ///
    /// Returns [`ManageError::Cancelled`] on EOF or signal, or
    /// [`ManageError::Io`] for underlying I/O failures.
    fn confirm(&self, prompt: &str) -> Result<bool, ManageError>;
}

/// Default [`Prompter`] backed by the `dialoguer` crate.
///
/// Each method runs synchronously on the calling thread. Callers driving
/// the prompter from a `tokio::main` runtime should wrap calls in
/// [`tokio::task::spawn_blocking`] when responsiveness matters; the
/// management CLI today drives prompts serially between async I/O calls,
/// so a brief blocking read is acceptable.
#[derive(Debug, Default, Clone, Copy)]
pub struct DialoguerPrompter;

impl Prompter for DialoguerPrompter {
    fn select(&self, prompt: &str, options: &[String]) -> Result<usize, ManageError> {
        Ok(dialoguer::Select::new()
            .with_prompt(prompt)
            .items(options)
            .default(0)
            .interact()?)
    }

    fn confirm(&self, prompt: &str) -> Result<bool, ManageError> {
        Ok(dialoguer::Confirm::new()
            .with_prompt(prompt)
            .default(false)
            .interact()?)
    }
}

impl From<dialoguer::Error> for ManageError {
    fn from(err: dialoguer::Error) -> Self {
        match err {
            dialoguer::Error::IO(io_err) if io_err.kind() == io::ErrorKind::Interrupted => {
                ManageError::Cancelled
            }
            dialoguer::Error::IO(io_err) => ManageError::Io(io_err),
        }
    }
}

#[cfg(any(test, feature = "test-util"))]
pub use scripted::ScriptedPrompter;

#[cfg(any(test, feature = "test-util"))]
mod scripted {
    use std::collections::VecDeque;
    use std::sync::Mutex;

    use super::{ManageError, Prompter};

    /// Test-only [`Prompter`] that returns a queued answer for each prompt.
    ///
    /// Construct with [`ScriptedPrompter::new`], then drive one answer per
    /// call. Running out of answers returns [`ManageError::Cancelled`] —
    /// tests should queue exactly the answers they expect, so an unexpected
    /// extra prompt fails loudly.
    pub struct ScriptedPrompter {
        answers: Mutex<VecDeque<Answer>>,
    }

    /// One queued response in a [`ScriptedPrompter`] script.
    #[derive(Debug, Clone)]
    pub enum Answer {
        /// Reply to a `select` prompt with this index.
        Select(usize),
        /// Reply to a `confirm` prompt with this boolean.
        Confirm(bool),
        /// Treat the next prompt as cancelled.
        Cancel,
    }

    impl ScriptedPrompter {
        /// Build a prompter that returns `answers` in order.
        #[must_use]
        pub fn new(answers: impl IntoIterator<Item = Answer>) -> Self {
            Self {
                answers: Mutex::new(answers.into_iter().collect()),
            }
        }

        /// Number of queued answers not yet consumed. Tests assert this is
        /// `0` to catch over-armed scripts.
        ///
        /// # Panics
        ///
        /// Panics if the inner mutex was poisoned by a previous panic
        /// while holding the lock.
        #[must_use]
        pub fn remaining(&self) -> usize {
            self.answers.lock().expect("scripted mutex poisoned").len()
        }

        fn pop(&self) -> Result<Answer, ManageError> {
            self.answers
                .lock()
                .expect("scripted mutex poisoned")
                .pop_front()
                .ok_or(ManageError::Cancelled)
        }
    }

    impl Prompter for ScriptedPrompter {
        fn select(&self, _prompt: &str, _options: &[String]) -> Result<usize, ManageError> {
            match self.pop()? {
                Answer::Select(i) => Ok(i),
                Answer::Cancel => Err(ManageError::Cancelled),
                Answer::Confirm(_) => panic!("expected Select answer, got Confirm"),
            }
        }

        fn confirm(&self, _prompt: &str) -> Result<bool, ManageError> {
            match self.pop()? {
                Answer::Confirm(b) => Ok(b),
                Answer::Cancel => Err(ManageError::Cancelled),
                Answer::Select(_) => panic!("expected Confirm answer, got Select"),
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // Issue #199: `ManageError::StaleSnapshot` is raised from two
    // observably different conditions. The Display must distinguish
    // them so the operator-facing wording matches the on-bucket
    // reality. These two tests pin the wording for each branch — if a
    // future refactor collapses them back into a single string, the
    // tests fail loudly.
    #[test]
    fn stale_snapshot_deleted_display_names_branch_and_uses_deleted_wording() {
        let err = ManageError::StaleSnapshot {
            entity: "refs/heads/main".to_owned(),
            reason: StaleReason::Deleted,
        };
        let rendered = err.to_string();
        assert!(
            rendered.contains("refs/heads/main"),
            "Display must name the entity: {rendered}",
        );
        assert!(
            rendered.contains("was deleted between selection and write"),
            "Deleted branch must use the 'was deleted' wording: {rendered}",
        );
        assert!(
            rendered.contains("re-run doctor"),
            "Display must instruct the operator to re-run: {rendered}",
        );
    }

    #[test]
    fn stale_snapshot_residue_only_display_names_branch_and_uses_residue_wording() {
        let err = ManageError::StaleSnapshot {
            entity: "refs/heads/main".to_owned(),
            reason: StaleReason::ResidueOnly,
        };
        let rendered = err.to_string();
        assert!(
            rendered.contains("refs/heads/main"),
            "Display must name the entity: {rendered}",
        );
        assert!(
            rendered.contains("only operational metadata"),
            "ResidueOnly must mention operational metadata: {rendered}",
        );
        assert!(
            rendered.contains("PROTECTED# marker"),
            "ResidueOnly must mention the PROTECTED# marker: {rendered}",
        );
        assert!(
            !rendered.contains("was deleted between selection and write"),
            "ResidueOnly must NOT use the 'was deleted' wording — that's \
             precisely the bug issue #199 fixed: {rendered}",
        );
        assert!(
            rendered.contains("re-run doctor"),
            "Display must instruct the operator to re-run: {rendered}",
        );
    }
}