Skip to main content

vcs_cli_support/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![deny(rustdoc::broken_intra_doc_links)]
3//! `vcs-cli-support` — the [`processkit`]-coupled plumbing the CLI wrappers reuse.
4//!
5//! `vcs-git` / `vcs-jj` / `vcs-github` all drive a CLI through [`processkit`], so
6//! they share three concerns that *touch* [`processkit::Error`]: an argv injection
7//! guard, a fetch-retry policy, and a set of [`Error`] classifiers. Extracting them
8//! here keeps the std-only `vcs-diff` clean of the `processkit` dependency, and —
9//! more to the point — keeps the marker lists and classifier logic from drifting
10//! between backends. The wrapper crates re-export these items (so you reach them
11//! as `vcs_git::is_merge_conflict`, not via this crate's name) and rarely name
12//! `vcs-cli-support` directly.
13//!
14//! # The surface
15//!
16//! - **[`reject_flag_like`]** — the injection guard for bare positional argv slots.
17//!   A caller value that is empty/whitespace, or starts with `-`, is refused before
18//!   spawning (the CLI would parse it as a flag); flag-*value* slots (`-m <msg>`)
19//!   are consumed verbatim and skip the check. Wrappers call it with their own
20//!   binary name so the surfaced [`Error::Spawn`] names the right `program`.
21//! - **[`FETCH_ATTEMPTS`] / [`FETCH_BACKOFF`]** — the shared transient-retry policy
22//!   for `fetch` (one try plus two retries, fixed backoff between them).
23//! - **[`is_merge_conflict`] / [`is_nothing_to_commit`] / [`is_transient_fetch_error`]
24//!   / [`is_lock_contention`]** — classify a returned [`Error`] so callers branch on
25//!   *intent* ("conflict, resolve it"; "nothing to commit, no-op"; "transient,
26//!   retry"; "another process holds the lock, retry") instead of matching on error
27//!   internals. They inspect captured [`Error::Exit`] output against fixed marker
28//!   lists; a [`processkit`] [`Error::Timeout`] is **not** treated as a transient
29//!   fetch error (it already spent the full deadline — see
30//!   [`is_transient_fetch_error`]); any unfamiliar `#[non_exhaustive]` variant falls
31//!   through to "no".
32//! - **[`RetryPolicy`] / [`retry_async`] / [`ManagedClient`]** — an opt-in retry
33//!   strategy (attempts + exponential, jittered backoff) for **lock-contention**
34//!   failures. `ManagedClient` wraps a [`processkit`] `CliClient` and applies the
35//!   policy to every command, so the `vcs-git`/`vcs-jj` clients gain retry via
36//!   `with_retry(...)` without changing a call site. Lock-acquisition failures are
37//!   pre-execution, so retrying is safe even for mutating commands.
38//! - **[`CredentialProvider`] / [`Credential`] / [`Secret`]** — an opt-in seam for
39//!   supplying a secret *per operation* (a CI token, a vault lookup) instead of
40//!   relying on ambient CLI auth. `ManagedClient` injects the resolved token into
41//!   each command (the forge `GH_TOKEN`/`GITLAB_TOKEN` env); git uses
42//!   [`git_credential_helper`] to keep the secret out of `argv`. Default is no
43//!   provider → ambient auth, unchanged. See the [`credentials`](mod@credentials)
44//!   module for the full picture.
45//!
46//! # Recipes
47//!
48//! Classify a failed `fetch` to drive a retry decision — branch on intent, not on
49//! the error's internals:
50//!
51//! ```no_run
52//! use vcs_cli_support::{is_transient_fetch_error, FETCH_ATTEMPTS, FETCH_BACKOFF};
53//! # fn run() -> Result<(), processkit::Error> { todo!() }
54//! # fn demo() -> Result<(), processkit::Error> {
55//! for attempt in 1..=FETCH_ATTEMPTS {
56//!     match run() {
57//!         Ok(()) => break,
58//!         Err(e) if is_transient_fetch_error(&e) && attempt < FETCH_ATTEMPTS => {
59//!             std::thread::sleep(FETCH_BACKOFF); // DNS / dropped connection — worth a retry
60//!         }
61//!         Err(e) => return Err(e),               // anything else: give up
62//!     }
63//! }
64//! # Ok(()) }
65//! ```
66
67use std::ffi::OsStr;
68use std::fmt;
69use std::future::Future;
70use std::path::Path;
71use std::sync::Arc;
72use std::time::Duration;
73
74use processkit::{
75    CliClient, Command, Error, IntoCommand, JobRunner, ProcessResult, ProcessRunner, Result,
76};
77
78pub mod credentials;
79pub use credentials::{
80    Credential, CredentialProvider, CredentialRequest, CredentialService, EnvToken, FnProvider,
81    GitCredentialHelper, Secret, StaticCredential, git_credential_helper, https_host, provider_fn,
82};
83
84/// JSON helpers shared by the forge wrappers, behind the `serde` feature — so the
85/// three forge parsers share one `null -> ""` and parse-error convention.
86#[cfg(feature = "serde")]
87#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
88pub mod json {
89    use processkit::{Error, Result};
90    use serde::Deserialize;
91    use serde::de::DeserializeOwned;
92
93    /// Deserialize a `String` a forge CLI may send as JSON `null` for an empty
94    /// optional value: `null` -> empty string, same as an absent key. `#[serde(default)]`
95    /// alone covers only an absent key; a present `null` would fail the whole-object
96    /// parse. Use as `#[serde(deserialize_with = "vcs_cli_support::json::null_to_empty")]`.
97    pub fn null_to_empty<'de, D>(deserializer: D) -> ::core::result::Result<String, D::Error>
98    where
99        D: serde::Deserializer<'de>,
100    {
101        Ok(Option::<String>::deserialize(deserializer)?.unwrap_or_default())
102    }
103
104    /// Deserialize a forge CLI's `--json` output into `T`, mapping a parse failure to
105    /// [`Error::Parse`] tagged with `program` (the CLI's binary name).
106    pub fn from_json<T: DeserializeOwned>(program: &str, json: &str) -> Result<T> {
107        serde_json::from_str(json).map_err(|e| Error::Parse {
108            program: program.to_string(),
109            message: e.to_string(),
110        })
111    }
112}
113
114/// Generate the cwd-bound forwarders for a CLI wrapper's `…At` view.
115///
116/// Each CLI wrapper (`vcs-git`, `vcs-jj`, `vcs-github`, `vcs-gitlab`, `vcs-gitea`)
117/// exposes a cwd-bound view — `GitAt`, `JjAt`, `GitHubAt`, `GitLabAt`, `GiteaAt` —
118/// that holds a reference to the client plus a pre-bound `dir`, and re-exposes the
119/// client's methods with `dir` already supplied. The forwarder bodies are
120/// byte-identical across the five backends but for three things, so they live here
121/// once instead of as a copied `macro_rules!` per crate:
122///
123/// - `$view` — the bound view type (e.g. `GitAt`). It must be generic over
124///   `<'a, R: ProcessRunner>` and have a field named `$field` holding the client
125///   plus a `dir: &'a Path` field.
126/// - `$field` — the inner field naming the client (e.g. `git`, `gh`, `glab`,
127///   `tea`).
128/// - `$client` — a **string literal** naming the client type, used in the
129///   generated doc strings and rendered as an intra-doc link (e.g. `"Git"` →
130///   ``[`Git`]``).
131/// - `bare { … }` — methods forwarded verbatim to `self.$field`.
132/// - `dir  { … }` — methods that take `self.dir` as their first argument.
133///
134/// The argument and return types in the method lists resolve in the **calling**
135/// crate, so they are written exactly as that wrapper's own methods are. The
136/// `ProcessRunner` bound is fully qualified (`::processkit::ProcessRunner`) so the
137/// expansion compiles regardless of which items the caller has imported.
138///
139/// ```ignore
140/// vcs_cli_support::at_forwarders! {
141///     GitAt, git, "Git",
142///     bare { fn version() -> Result<String>; }
143///     dir  { fn status() -> Result<Vec<StatusEntry>>; }
144/// }
145/// ```
146#[macro_export]
147macro_rules! at_forwarders {
148    (
149        $view:ident, $field:ident, $client:literal,
150        bare { $( fn $bn:ident( $($ba:ident: $bt:ty),* $(,)? ) -> $br:ty; )* }
151        dir  { $( fn $dn:ident( $($da:ident: $dt:ty),* $(,)? ) -> $dr:ty; )* }
152    ) => {
153        impl<'a, R: ::processkit::ProcessRunner> $view<'a, R> {
154            $(
155                #[doc = concat!("Bound form of [`", $client, "`]'s `", stringify!($bn), "`.")]
156                pub async fn $bn(&self, $($ba: $bt),*) -> $br {
157                    self.$field.$bn($($ba),*).await
158                }
159            )*
160            $(
161                #[doc = concat!("Bound form of [`", $client, "`]'s `", stringify!($dn), "` (with `dir` pre-bound).")]
162                pub async fn $dn(&self, $($da: $dt),*) -> $dr {
163                    self.$field.$dn(self.dir, $($da),*).await
164                }
165            )*
166        }
167    };
168}
169
170/// Emit the common client scaffold every CLI wrapper hand-writes around a
171/// [`ManagedClient`].
172///
173/// `vcs-git`, `vcs-jj`, `vcs-github`, and `vcs-gitlab` each wrap a
174/// [`ManagedClient`] in a thin newtype that re-exposes the same handful of
175/// constructors and default-applying builders — `new` / `Default` /
176/// `with_runner` / `default_timeout` / `default_env` / `default_env_remove` /
177/// `default_cancel_on` — with byte-identical bodies and doc strings. This macro
178/// generates that shared part so it can't drift between backends; each wrapper
179/// keeps its *capability* builders (`with_retry`, `with_credentials`, every verb,
180/// the `…At` view, …) hand-written in a separate `impl` block.
181///
182/// The generated newtype is `struct $name<R: ProcessRunner = JobRunner>` with a
183/// single private `core: ManagedClient<R>` field — accessible to the rest of the
184/// wrapper crate (same module). All paths are fully qualified, so the expansion
185/// compiles regardless of what the caller has imported.
186///
187/// - `$name` — the wrapper type (e.g. `Git`). The struct-level doc comment (and
188///   any other attributes) written before `struct` are attached to it verbatim.
189/// - `$binary` — the program the client drives (an expression, typically the
190///   crate's `BINARY` const).
191/// - `token_env = ($svc, $var)` — *optional*. When given, `new`/`with_runner`
192///   chain [`ManagedClient::with_token_env`] so a resolved credential is injected
193///   into the `$var` environment variable for service `$svc` (the forge case:
194///   `GH_TOKEN`, `GITLAB_TOKEN`). Omit it for the ambient-auth backends (git, jj).
195/// - `scrub_env = [ $var, … ]` — *optional*. When given, `new`/`with_runner`
196///   chain [`ManagedClient::default_env_remove`] for each var, so **every** client
197///   the macro generates drops those inherited environment variables by default
198///   (`vcs-git` uses it to scrub the repo-redirector vars — `GIT_DIR`, … — so a
199///   value leaking from the parent process can't retarget commands). Must come
200///   *after* `token_env` when both are present.
201///
202/// ```ignore
203/// vcs_cli_support::managed_client! {
204///     /// The real GitHub client.
205///     pub struct GitHub => BINARY, token_env = (CredentialService::GitHub, "GH_TOKEN")
206/// }
207/// vcs_cli_support::managed_client! {
208///     /// The real Git client — scrubs the repo-redirector env vars by default.
209///     pub struct Git => BINARY, scrub_env = ["GIT_DIR", "GIT_WORK_TREE"]
210/// }
211/// ```
212#[macro_export]
213macro_rules! managed_client {
214    (
215        $(#[$meta:meta])*
216        $vis:vis struct $name:ident => $binary:expr
217        $(, token_env = ($svc:expr, $var:expr) )?
218        $(, scrub_env = [ $($scrub:expr),* $(,)? ] )?
219        $(,)?
220    ) => {
221        $(#[$meta])*
222        $vis struct $name<R: ::processkit::ProcessRunner = ::processkit::JobRunner> {
223            core: $crate::ManagedClient<R>,
224        }
225
226        impl $name<::processkit::JobRunner> {
227            /// Create a client driving the real job-backed runner.
228            pub fn new() -> Self {
229                Self { core: $crate::ManagedClient::new($binary)
230                    $(.with_token_env($svc, $var))?
231                    $($(.default_env_remove($scrub))*)?
232                }
233            }
234        }
235
236        impl ::core::default::Default for $name<::processkit::JobRunner> {
237            fn default() -> Self {
238                Self::new()
239            }
240        }
241
242        impl<R: ::processkit::ProcessRunner> $name<R> {
243            /// Create a client driving `runner` — inject a fake in tests.
244            pub fn with_runner(runner: R) -> Self {
245                Self {
246                    core: $crate::ManagedClient::with_runner($binary, runner)
247                        $(.with_token_env($svc, $var))?
248                        $($(.default_env_remove($scrub))*)?,
249                }
250            }
251
252            /// Apply a default timeout to every command this client builds.
253            pub fn default_timeout(mut self, timeout: ::core::time::Duration) -> Self {
254                self.core = self.core.default_timeout(timeout);
255                self
256            }
257
258            /// Set an environment variable on every command this client builds.
259            pub fn default_env(
260                mut self,
261                key: impl ::core::convert::AsRef<::std::ffi::OsStr>,
262                value: impl ::core::convert::AsRef<::std::ffi::OsStr>,
263            ) -> Self {
264                self.core = self.core.default_env(key, value);
265                self
266            }
267
268            /// Remove an inherited environment variable on every command this client builds.
269            pub fn default_env_remove(
270                mut self,
271                key: impl ::core::convert::AsRef<::std::ffi::OsStr>,
272            ) -> Self {
273                self.core = self.core.default_env_remove(key);
274                self
275            }
276
277            /// Cancel every command this client builds when `token` fires.
278            pub fn default_cancel_on(mut self, token: ::processkit::CancellationToken) -> Self {
279                self.core = self.core.default_cancel_on(token);
280                self
281            }
282        }
283    };
284}
285
286/// Injection guard for bare positional argv slots: a caller-supplied value with a
287/// leading `-` would be parsed by the CLI as a *flag* (verified: `git checkout
288/// -evil` → "unknown switch"; jj likewise), and an empty (or whitespace-only)
289/// value silently changes most commands' meaning. Refuse both before anything
290/// spawns, surfacing an [`Error::Spawn`] naming `program`. An interior NUL is
291/// refused too (it can't be passed in argv and otherwise surfaces as an opaque
292/// OS spawn error). Flag-VALUE positions (`-m <msg>`, `--branch <b>`) don't need
293/// this — the CLI consumes the next token verbatim there.
294///
295/// The leading-`-` test is applied to the **trimmed** value, so a value like
296/// `" --upload-pack=…"` (leading whitespace) is still refused — the empty-check
297/// and the flag-check now agree on what "the value" is.
298pub fn reject_flag_like(program: &str, what: &str, value: &str) -> Result<()> {
299    let trimmed = value.trim();
300    if trimmed.is_empty() || trimmed.starts_with('-') || value.contains('\0') {
301        return Err(Error::Spawn {
302            program: program.to_string(),
303            source: std::io::Error::new(
304                std::io::ErrorKind::InvalidInput,
305                format!(
306                    "{what} {value:?} would be parsed as a flag (or is empty / contains NUL) — \
307                     refusing to pass it as a positional argument"
308                ),
309            ),
310        });
311    }
312    Ok(())
313}
314
315/// Total attempts for a transient-retried `fetch` (1 try + 2 retries).
316pub const FETCH_ATTEMPTS: u32 = 3;
317/// Fixed backoff between fetch retries.
318pub const FETCH_BACKOFF: Duration = Duration::from_millis(500);
319/// Grace period for a timed-out fetch: on the deadline processkit signals the
320/// process tree (terminate), waits this long for it to exit cleanly — flush, close
321/// the connection, drop any lock — then hard-kills. Only takes effect when a
322/// per-client timeout is set (`Git::default_timeout` / `Jj::default_timeout`); a
323/// fetch with no deadline is unaffected.
324pub const FETCH_TIMEOUT_GRACE: Duration = Duration::from_secs(2);
325
326/// Lower-case substrings marking a merge that stopped on conflicts.
327const CONFLICT_MARKERS: &[&str] = &["conflict (", "automatic merge failed"];
328/// Lower-case substrings marking a commit that found nothing to record.
329const NOTHING_TO_COMMIT_MARKERS: &[&str] = &["nothing to commit", "nothing added to commit"];
330/// Lower-case substrings marking a transient (retryable) network/fetch failure.
331/// The timeout markers are kept *specific* (`connection timed out` /
332/// `operation timed out`) rather than a bare `timed out`, which would also match
333/// unrelated, non-network "timed out" messages (a lock wait, a hook) and trigger a
334/// spurious fetch retry.
335const TRANSIENT_FETCH_MARKERS: &[&str] = &[
336    "could not resolve host",
337    "couldn't resolve host",
338    "temporary failure in name resolution",
339    "connection timed out",
340    "connection refused",
341    "operation timed out",
342    "network is unreachable",
343    "failed to connect",
344    "could not read from remote repository",
345    "the remote end hung up",
346    "early eof",
347    "rpc failed",
348];
349
350/// Whether `err` is an [`Error::Exit`] whose captured output contains any marker.
351fn exit_output_matches(err: &Error, markers: &[&str]) -> bool {
352    let Error::Exit { stdout, stderr, .. } = err else {
353        return false;
354    };
355    let out = stdout.to_ascii_lowercase();
356    let errt = stderr.to_ascii_lowercase();
357    markers.iter().any(|m| out.contains(m) || errt.contains(m))
358}
359
360/// Whether a failed `merge`/`merge_commit` stopped on a merge conflict. (jj
361/// surfaces conflicts as state rather than as errors, so this only fires on git
362/// output — see `vcs_core::Error::is_merge_conflict`.)
363pub fn is_merge_conflict(err: &Error) -> bool {
364    exit_output_matches(err, CONFLICT_MARKERS)
365}
366
367/// Whether a failed `commit`/`commit_paths` reported nothing to commit (a clean
368/// tree), as opposed to a real error.
369pub fn is_nothing_to_commit(err: &Error) -> bool {
370    exit_output_matches(err, NOTHING_TO_COMMIT_MARKERS)
371}
372
373/// Whether a failed `fetch`/`fetch_branch`/`remote_branch_exists` looks
374/// transient (DNS, a dropped connection, a fast network blip) and is worth
375/// retrying.
376///
377/// A processkit-level **timeout** is deliberately **not** classified transient
378/// (R6). A `.timeout()`-bounded run that expired has already consumed the caller's
379/// full deadline — retrying it would multiply the wall-clock by [`FETCH_ATTEMPTS`]
380/// (e.g. a black-holed remote under a 120 s deadline would block ≈ 6 min, three
381/// times the advertised ceiling). The deadline *is* the patience budget; a caller
382/// who wants longer should raise the timeout, not have it silently tripled. Fast
383/// transient failures (the io-level and marker cases below) still retry, because
384/// they fail quickly and a retry is cheap.
385pub fn is_transient_fetch_error(err: &Error) -> bool {
386    // An io-level transient from the spawn itself (interrupted / would-block / busy),
387    // which processkit classifies via `Error::is_transient()` (it covers `Spawn`/`Io`,
388    // not `Exit`/`Timeout`, so it composes cleanly with the marker scan below).
389    err.is_transient() || exit_output_matches(err, TRANSIENT_FETCH_MARKERS)
390}
391
392/// Lower-case substrings marking a **whole-repository / working-copy lock**
393/// contention failure — another process held the *one* repo-wide lock, so the
394/// command **never started** (clean, pre-execution) and touched nothing.
395///
396/// These are deliberately limited to the locks that guard the *entire* operation
397/// up front, so retrying is safe even on a **mutating** command: the repo was not
398/// modified at all. We intentionally do **not** include per-ref lock messages
399/// (`cannot lock ref`, `<ref>.lock`/`packed-refs.lock: File exists`): a multi-ref
400/// `push`/`fetch` updates refs sequentially, so a ref-lock failure can arrive
401/// *after* earlier refs already moved — replaying that is not idempotent. Network
402/// markers
403/// ([`TRANSIENT_FETCH_MARKERS`]) and conflict/exit failures are likewise absent.
404const LOCK_CONTENTION_MARKERS: &[&str] = &[
405    // git: the whole-repo index lock (pre-write). Match the **locale-stable path
406    // fragment** `index.lock`, not the translated `': File exists'` suffix — git
407    // localizes its messages, so a `LANG=de_DE` runner would never match the full
408    // English phrase. `index.lock` names the index lock specifically; per-ref locks
409    // (`<ref>.lock`, `packed-refs.lock`) are ruled out by the `refs/` guard in
410    // `is_lock_contention`. (This matches any `index.lock` *create* failure — a
411    // held lock, or e.g. `Permission denied` — all pre-write, so retrying is safe.)
412    "index.lock",
413    // jj: the working-copy lock and the operation-heads lock (both pre-mutation).
414    // These are jj's exact wordings (lower-cased for the classifier). NOTE: modern
415    // jj generally **blocks** on these locks until they're free rather than failing,
416    // so contention usually surfaces as a wait, not a classifiable error — these
417    // markers catch only the residual cases where jj does surface a lock error.
418    "failed to lock working copy",
419    "failed to lock operation heads store",
420];
421
422/// Whether `err` is a **whole-repository lock-contention** failure — another
423/// process held git's `index.lock` or jj's working-copy / op-heads lock, so the
424/// command couldn't even start. Such a failure is *pre-execution* and therefore
425/// safe to retry even on a **mutating** operation (the repo was never modified).
426/// Per-ref lock failures (`cannot lock ref`, `<ref>.lock`) are deliberately **not**
427/// classified here — they can occur mid-way through a multi-ref `push`/`fetch`,
428/// where a retry would not be idempotent. Conflict, "nothing to commit", a real
429/// non-zero exit, a timeout, a signal, or a missing binary are also **not** lock
430/// contention and must not be retried this way.
431pub fn is_lock_contention(err: &Error) -> bool {
432    // Rule out a **per-ref** lock first: it is *not* safely retryable (a multi-ref
433    // push/fetch can fail one ref's lock after earlier refs already moved). git's
434    // per-ref lock lives under `refs/` (`…/refs/heads/<name>.lock`) and its message
435    // names `refs/…`, whereas the whole-repo `index.lock` (`<gitdir>/index.lock`)
436    // never does — so a `refs/` mention excludes it, locale-independently. This also
437    // stops a branch literally named `index`/`reindex` (whose `…/reindex.lock`
438    // contains the substring `index.lock`) from matching the bare `index.lock`
439    // marker. (A repo whose *path* contains `refs/` then misses the index-lock retry
440    // — a benign false-negative, safer than a wrong retry.)
441    if exit_output_matches(err, &["refs/"]) {
442        return false;
443    }
444    exit_output_matches(err, LOCK_CONTENTION_MARKERS)
445}
446
447/// Whether `err` is an **input rejection** — a bad caller argument, encoded as an
448/// [`Error::Spawn`] whose source is `io::ErrorKind::InvalidInput`. This is the
449/// pattern the toolkit's own argument guards raise ([`reject_flag_like`] and the
450/// validating newtypes `RefName`/`RevSpec`/`RevsetExpr`) for a value that would be
451/// misparsed as a flag, is empty, or contains a NUL — and it also covers the
452/// spawn-time `InvalidInput` the OS raises for an un-spawnable argument (an interior
453/// NUL in a flag-value, or Windows' batch-arg-escaping refusal). All are genuine
454/// bad input, distinct from a real spawn failure (missing binary → `NotFound`, no
455/// perms → `PermissionDenied`) or a non-zero exit. A binding maps this to a
456/// `ValueError`; the facades re-expose it as `Error::is_invalid_input()`.
457pub fn is_invalid_input(err: &Error) -> bool {
458    matches!(
459        err,
460        Error::Spawn { source, .. } if source.kind() == std::io::ErrorKind::InvalidInput
461    )
462}
463
464/// A bounded retry strategy: how many attempts, the (exponential) backoff between
465/// them, and whether to add full jitter. Used by [`ManagedClient`] to retry
466/// [`is_lock_contention`] failures. The [`Default`] is [`none`](RetryPolicy::none)
467/// (no retry) — retry is **opt-in**.
468#[derive(Debug, Clone, Copy, PartialEq, Eq)]
469#[non_exhaustive]
470pub struct RetryPolicy {
471    /// Total attempts including the first; `1` means no retry.
472    pub attempts: u32,
473    /// Delay before the first retry; doubles each subsequent retry (capped by
474    /// [`max_backoff`](RetryPolicy::max_backoff)). `ZERO` means retry immediately.
475    pub base_backoff: Duration,
476    /// Upper bound on the (pre-jitter) backoff delay. `ZERO` means uncapped.
477    pub max_backoff: Duration,
478    /// Apply **full jitter** — the actual delay is uniform in `[0, computed]` — to
479    /// avoid a thundering herd when many workers retry against one repository.
480    pub jitter: bool,
481}
482
483impl RetryPolicy {
484    /// No retry: a single attempt. The default.
485    pub const fn none() -> Self {
486        Self {
487            attempts: 1,
488            base_backoff: Duration::ZERO,
489            max_backoff: Duration::ZERO,
490            jitter: false,
491        }
492    }
493
494    /// A sensible default for repository lock contention: a handful of attempts
495    /// with short, jittered, exponential backoff (25 ms → 500 ms).
496    pub const fn lock_contention() -> Self {
497        Self {
498            attempts: 5,
499            base_backoff: Duration::from_millis(25),
500            max_backoff: Duration::from_millis(500),
501            jitter: true,
502        }
503    }
504
505    /// Set the total number of attempts (clamped to at least 1).
506    pub fn attempts(mut self, attempts: u32) -> Self {
507        self.attempts = attempts.max(1);
508        self
509    }
510
511    /// Set the base backoff (the delay before the first retry).
512    pub fn base_backoff(mut self, backoff: Duration) -> Self {
513        self.base_backoff = backoff;
514        self
515    }
516
517    /// Cap the (pre-jitter) backoff delay; `ZERO` leaves it uncapped.
518    pub fn max_backoff(mut self, max: Duration) -> Self {
519        self.max_backoff = max;
520        self
521    }
522
523    /// Toggle full jitter on the backoff delay.
524    pub fn with_jitter(mut self, jitter: bool) -> Self {
525        self.jitter = jitter;
526        self
527    }
528}
529
530impl Default for RetryPolicy {
531    /// No retry — retry is opt-in.
532    fn default() -> Self {
533        Self::none()
534    }
535}
536
537/// The (possibly jittered) backoff before the `retry_index`-th retry (0 = first).
538fn backoff_for(policy: &RetryPolicy, retry_index: u32) -> Duration {
539    if policy.base_backoff.is_zero() {
540        return Duration::ZERO;
541    }
542    let base = policy.base_backoff.as_nanos();
543    let scaled = base.saturating_mul(1u128 << retry_index.min(20));
544    let capped = if policy.max_backoff.is_zero() {
545        scaled
546    } else {
547        scaled.min(policy.max_backoff.as_nanos())
548    };
549    let delay = Duration::from_nanos(capped.min(u64::MAX as u128) as u64);
550    if policy.jitter {
551        full_jitter(delay)
552    } else {
553        delay
554    }
555}
556
557/// Full jitter: a uniform delay in `[0, max]`. Dependency-free randomness via the
558/// OS-seeded [`RandomState`](std::collections::hash_map::RandomState) — good enough
559/// to de-correlate retries, not cryptographic.
560fn full_jitter(max: Duration) -> Duration {
561    use std::hash::{BuildHasher, Hasher};
562    let nanos = max.as_nanos();
563    if nanos == 0 {
564        return Duration::ZERO;
565    }
566    let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
567    hasher.write_u64(nanos as u64);
568    let r = hasher.finish() as u128;
569    Duration::from_nanos((r % (nanos + 1)).min(u64::MAX as u128) as u64)
570}
571
572/// Run `op`, retrying its result while `should_retry` says so and `policy` has
573/// attempts left, sleeping the (jittered, exponential) backoff between tries. The
574/// op is re-invoked from scratch each attempt, so it must be idempotent for the
575/// errors `should_retry` selects (lock-contention failures are — the command never
576/// ran). Returns the first `Ok`, or the last `Err`.
577pub async fn retry_async<T, Fut>(
578    policy: &RetryPolicy,
579    should_retry: impl Fn(&Error) -> bool,
580    mut op: impl FnMut() -> Fut,
581) -> Result<T>
582where
583    Fut: Future<Output = Result<T>>,
584{
585    let attempts = policy.attempts.max(1);
586    for attempt in 1..=attempts {
587        match op().await {
588            Ok(value) => return Ok(value),
589            Err(err) => {
590                if attempt == attempts || !should_retry(&err) {
591                    return Err(err);
592                }
593                let delay = backoff_for(policy, attempt - 1);
594                if !delay.is_zero() {
595                    tokio::time::sleep(delay).await;
596                }
597            }
598        }
599    }
600    unreachable!("the loop returns on the final attempt")
601}
602
603/// A [`CliClient`] wrapper that adds two opt-in concerns the CLI wrappers
604/// (`vcs-git`, `vcs-jj`, `vcs-github`, `vcs-gitlab`) all share, without touching a
605/// single call site:
606///
607/// 1. **Lock-contention retry** ([`is_lock_contention`]) per a [`RetryPolicy`] —
608///    off by default ([`RetryPolicy::none`]); enable with
609///    [`with_retry`](ManagedClient::with_retry). Safe even for mutating commands,
610///    since lock contention is a clean pre-execution failure.
611/// 2. **Credential injection** from an opt-in [`CredentialProvider`] — off by
612///    default (no provider); attach one with
613///    [`with_credentials`](ManagedClient::with_credentials). When a forge
614///    *token-env* binding is configured
615///    ([`with_token_env`](ManagedClient::with_token_env)), every command run
616///    through this client gets the resolved token in that environment variable
617///    (e.g. `GH_TOKEN`). Backends that inject the secret differently (git's
618///    `credential.helper`) instead call
619///    [`resolve_credential`](ManagedClient::resolve_credential) at the command
620///    site. Resolution happens once per call, before the retry loop.
621///
622/// Both default to inert, so a client with neither configured behaves exactly
623/// like a bare `CliClient`.
624pub struct ManagedClient<R: ProcessRunner = JobRunner> {
625    inner: CliClient<R>,
626    retry: RetryPolicy,
627    credentials: Option<Arc<dyn CredentialProvider>>,
628    /// When set, the token is auto-injected into this env var on every command,
629    /// resolved for this service. Used by the forge clients (`GH_TOKEN`, …).
630    token_env: Option<(CredentialService, &'static str)>,
631}
632
633impl<R: ProcessRunner> fmt::Debug for ManagedClient<R> {
634    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
635        f.debug_struct("ManagedClient")
636            .field("inner", &self.inner)
637            .field("retry", &self.retry)
638            // Never render the provider itself (it may close over a secret); just
639            // whether one is configured, plus the token-env binding.
640            .field("credentials", &self.credentials.is_some())
641            .field("token_env", &self.token_env)
642            .finish()
643    }
644}
645
646impl ManagedClient<JobRunner> {
647    /// A retrying client driving `program` on the real job-backed runner (no retry
648    /// until [`with_retry`](ManagedClient::with_retry)).
649    pub fn new(program: impl AsRef<OsStr>) -> Self {
650        Self {
651            inner: CliClient::new(program),
652            retry: RetryPolicy::none(),
653            credentials: None,
654            token_env: None,
655        }
656    }
657}
658
659impl<R: ProcessRunner> ManagedClient<R> {
660    /// A retrying client driving `program` on `runner` — inject a fake in tests.
661    pub fn with_runner(program: impl AsRef<OsStr>, runner: R) -> Self {
662        Self {
663            inner: CliClient::with_runner(program, runner),
664            retry: RetryPolicy::none(),
665            credentials: None,
666            token_env: None,
667        }
668    }
669
670    /// Set the lock-contention retry policy (opt-in; default is no retry).
671    pub fn with_retry(mut self, policy: RetryPolicy) -> Self {
672        self.retry = policy;
673        self
674    }
675
676    /// The active retry policy.
677    pub fn retry_policy(&self) -> RetryPolicy {
678        self.retry
679    }
680
681    /// Attach a [`CredentialProvider`] (opt-in; default is none → ambient auth).
682    /// The provider is consulted per operation: automatically when a
683    /// [`with_token_env`](ManagedClient::with_token_env) binding is set, or
684    /// on demand via [`resolve_credential`](ManagedClient::resolve_credential).
685    ///
686    /// **Precedence:** a resolved token is injected *after* any
687    /// [`default_env`](ManagedClient::default_env), so the provider wins over a
688    /// static default and over the ambient CLI login. **Cancellation:** a
689    /// [`default_cancel_on`](ManagedClient::default_cancel_on) token bounds the
690    /// spawned *process*, not provider resolution — if your provider does slow I/O
691    /// (a vault lookup), bound it yourself.
692    #[must_use]
693    pub fn with_credentials(mut self, provider: Arc<dyn CredentialProvider>) -> Self {
694        self.credentials = Some(provider);
695        self
696    }
697
698    /// Bind the resolved token to an environment variable injected on **every**
699    /// command this client runs (the forge case: `GH_TOKEN`, `GITLAB_TOKEN`). The
700    /// `service` tags the [`CredentialRequest`]. No effect without a provider.
701    #[must_use]
702    pub fn with_token_env(mut self, service: CredentialService, var: &'static str) -> Self {
703        self.token_env = Some((service, var));
704        self
705    }
706
707    /// Whether a credential provider is configured.
708    #[must_use]
709    pub fn has_credentials(&self) -> bool {
710        self.credentials.is_some()
711    }
712
713    /// Resolve a credential for `service`/`host` from the configured provider, or
714    /// `Ok(None)` if no provider is set or it defers to ambient auth. Backends
715    /// that inject the secret at the command site (git's `credential.helper`) call
716    /// this directly; the forge token-env path uses it internally.
717    pub async fn resolve_credential(
718        &self,
719        service: CredentialService,
720        host: Option<&str>,
721    ) -> Result<Option<Credential>> {
722        let Some(provider) = &self.credentials else {
723            return Ok(None);
724        };
725        let request = CredentialRequest { service, host };
726        // An empty (or whitespace-only) secret is not a usable credential —
727        // injecting an empty `GH_TOKEN`/`GITLAB_TOKEN` (or a `password=` line)
728        // would *override* the ambient login with nothing rather than defer to it.
729        // Treat it as `None` (ambient), keeping the "no usable credential ⇒
730        // ambient auth" contract consistent regardless of which adapter produced
731        // it (matching `EnvToken`'s own whitespace-only ⇒ unset rule).
732        Ok(provider
733            .credential(&request)
734            .await?
735            .filter(|cred| !cred.secret().expose().trim().is_empty()))
736    }
737
738    /// Materialize `call` into a [`Command`], injecting the forge token env if a
739    /// [`with_token_env`](ManagedClient::with_token_env) binding and a provider
740    /// are both configured. The single place the auto-injection happens, shared by
741    /// every retrying verb.
742    async fn prepare(&self, call: impl IntoCommand<R>) -> Result<Command> {
743        let cmd = call.into_command(&self.inner);
744        let Some((service, var)) = self.token_env else {
745            return Ok(cmd);
746        };
747        match self.resolve_credential(service, None).await? {
748            Some(cred) => Ok(cmd.env(var, cred.secret().expose())),
749            None => Ok(cmd),
750        }
751    }
752
753    /// Apply a default timeout to every command this client builds.
754    pub fn default_timeout(mut self, timeout: Duration) -> Self {
755        self.inner = self.inner.default_timeout(timeout);
756        self
757    }
758
759    /// Set an environment variable on every command this client builds.
760    pub fn default_env(mut self, key: impl AsRef<OsStr>, value: impl AsRef<OsStr>) -> Self {
761        self.inner = self.inner.default_env(key, value);
762        self
763    }
764
765    /// Remove an inherited environment variable on every command this client builds.
766    pub fn default_env_remove(mut self, key: impl AsRef<OsStr>) -> Self {
767        self.inner = self.inner.default_env_remove(key);
768        self
769    }
770
771    /// Cancel every command this client builds when `token` fires.
772    pub fn default_cancel_on(mut self, token: processkit::CancellationToken) -> Self {
773        self.inner = self.inner.default_cancel_on(token);
774        self
775    }
776
777    /// Build a [`Command`] for this client's program (passthrough).
778    pub fn command<I, S>(&self, args: I) -> Command
779    where
780        I: IntoIterator<Item = S>,
781        S: AsRef<OsStr>,
782    {
783        self.inner.command(args)
784    }
785
786    /// Build a [`Command`] bound to `dir` (passthrough).
787    pub fn command_in<I, S>(&self, dir: &Path, args: I) -> Command
788    where
789        I: IntoIterator<Item = S>,
790        S: AsRef<OsStr>,
791    {
792        self.inner.command_in(dir, args)
793    }
794
795    /// The underlying process runner (passthrough — e.g. for `output_all`).
796    pub fn runner(&self) -> &R {
797        self.inner.runner()
798    }
799
800    /// Like [`CliClient::run`], with credential injection and lock-retry.
801    pub async fn run(&self, call: impl IntoCommand<R>) -> Result<String> {
802        let cmd = self.prepare(call).await?;
803        retry_async(&self.retry, is_lock_contention, || {
804            self.inner.run(cmd.clone())
805        })
806        .await
807    }
808
809    /// Like [`CliClient::run_unit`], with credential injection and lock-retry.
810    pub async fn run_unit(&self, call: impl IntoCommand<R>) -> Result<()> {
811        let cmd = self.prepare(call).await?;
812        retry_async(&self.retry, is_lock_contention, || {
813            self.inner.run_unit(cmd.clone())
814        })
815        .await
816    }
817
818    /// Like [`CliClient::output_string`], with credential injection. **No lock-retry:**
819    /// `output_string` returns `Ok` on a non-zero exit (it captures the result), so a
820    /// lock failure surfaces as an `Ok` here, not an `Err` the retry predicate could
821    /// match — route mutations that need lock-retry through
822    /// [`run`](Self::run)/[`run_unit`](Self::run_unit) instead.
823    pub async fn output_string(&self, call: impl IntoCommand<R>) -> Result<ProcessResult<String>> {
824        let cmd = self.prepare(call).await?;
825        self.inner.output_string(cmd).await
826    }
827
828    /// Like [`run`](Self::run), but returns stdout **verbatim** — no `trim_end`.
829    /// For **content**-returning verbs (a file's bytes at a rev, a diff, a raw
830    /// template render) where the trailing newline(s) are part of the value, not
831    /// noise: trimming them corrupts a read-modify-write round-trip and desyncs a
832    /// diff's last hunk from its `@@` line count. Exit-checked like `run`; no
833    /// lock-retry (a content read is not a mutation).
834    pub async fn run_untrimmed(&self, call: impl IntoCommand<R>) -> Result<String> {
835        Ok(self
836            .output_string(call)
837            .await?
838            .ensure_success()?
839            .into_stdout())
840    }
841
842    /// Like [`CliClient::probe`] (zero-or-nonzero exit → `bool`), with credential
843    /// injection and lock-retry.
844    pub async fn probe(&self, call: impl IntoCommand<R>) -> Result<bool> {
845        let cmd = self.prepare(call).await?;
846        retry_async(&self.retry, is_lock_contention, || {
847            self.inner.probe(cmd.clone())
848        })
849        .await
850    }
851
852    /// Like [`CliClient::exit_code`] (the raw exit code; a spawn failure or timeout
853    /// still errors), with credential injection and lock-retry.
854    pub async fn exit_code(&self, call: impl IntoCommand<R>) -> Result<i32> {
855        let cmd = self.prepare(call).await?;
856        retry_async(&self.retry, is_lock_contention, || {
857            self.inner.exit_code(cmd.clone())
858        })
859        .await
860    }
861
862    /// Like [`CliClient::parse`] (credential injection applied; the `FnOnce` parser
863    /// can't be re-run, so lock-retry does not — parsing is a read, where lock
864    /// contention is not a concern anyway).
865    pub async fn parse<T>(
866        &self,
867        call: impl IntoCommand<R>,
868        parser: impl FnOnce(&str) -> T + Send,
869    ) -> Result<T>
870    where
871        T: Send,
872    {
873        let cmd = self.prepare(call).await?;
874        self.inner.parse(cmd, parser).await
875    }
876
877    /// Like [`CliClient::try_parse`] (credential injection applied; `FnOnce` parser,
878    /// and a read, so no lock-retry).
879    pub async fn try_parse<T>(
880        &self,
881        call: impl IntoCommand<R>,
882        parser: impl FnOnce(&str) -> Result<T> + Send,
883    ) -> Result<T>
884    where
885        T: Send,
886    {
887        let cmd = self.prepare(call).await?;
888        self.inner.try_parse(cmd, parser).await
889    }
890}
891
892#[cfg(test)]
893mod tests {
894    use super::*;
895
896    #[test]
897    fn rejects_empty_and_leading_dash() {
898        assert!(reject_flag_like("git", "branch name", "-evil").is_err());
899        assert!(reject_flag_like("git", "branch name", "").is_err());
900        // Whitespace-only is as meaning-changing as empty — refuse it too.
901        assert!(reject_flag_like("git", "branch name", "  ").is_err());
902        assert!(reject_flag_like("git", "branch name", "\t").is_err());
903        assert!(reject_flag_like("git", "branch name", "feature").is_ok());
904        // Leading whitespace before a dash is still refused (the flag-check trims).
905        assert!(reject_flag_like("git", "remote", " --upload-pack=evil").is_err());
906        assert!(reject_flag_like("git", "remote", "\t-x").is_err());
907        // An interior NUL is refused (can't go in argv; opaque OS error otherwise).
908        assert!(reject_flag_like("git", "path", "a\0b").is_err());
909        // A leading-whitespace non-flag value is still accepted (not flag-like).
910        assert!(reject_flag_like("git", "branch name", "  feature").is_ok());
911        // The error names the program and surfaces as a spawn-side refusal.
912        let err = reject_flag_like("jj", "revset", "--remote").unwrap_err();
913        assert!(matches!(err, Error::Spawn { program, .. } if program == "jj"));
914    }
915
916    #[test]
917    fn classifies_merge_conflict() {
918        let on_stdout = Error::Exit {
919            program: "git".into(),
920            code: 1,
921            stdout: "CONFLICT (content): Merge conflict in a.rs".into(),
922            stderr: String::new(),
923        };
924        let on_stderr = Error::Exit {
925            program: "git".into(),
926            code: 1,
927            stdout: String::new(),
928            stderr: "Automatic merge failed; fix conflicts and then commit".into(),
929        };
930        let unrelated = Error::Exit {
931            program: "git".into(),
932            code: 128,
933            stdout: String::new(),
934            stderr: "fatal: not a git repository".into(),
935        };
936        assert!(is_merge_conflict(&on_stdout));
937        assert!(is_merge_conflict(&on_stderr));
938        assert!(!is_merge_conflict(&unrelated));
939        assert!(!is_nothing_to_commit(&on_stdout));
940    }
941
942    #[test]
943    fn classifies_nothing_to_commit_and_transient_fetch() {
944        let nothing = Error::Exit {
945            program: "git".into(),
946            code: 1,
947            stdout: "nothing to commit, working tree clean".into(),
948            stderr: String::new(),
949        };
950        assert!(is_nothing_to_commit(&nothing));
951
952        let dns = Error::Exit {
953            program: "git".into(),
954            code: 128,
955            stdout: String::new(),
956            stderr: "fatal: unable to access 'https://x/': Could not resolve host: x".into(),
957        };
958        assert!(is_transient_fetch_error(&dns));
959        assert!(!is_transient_fetch_error(&nothing));
960
961        // A processkit timeout is deliberately NOT retried (R6): it already consumed
962        // the caller's full deadline, so retrying would multiply the wall-clock by
963        // FETCH_ATTEMPTS. The deadline is the patience budget; raise it, don't triple it.
964        let timeout = Error::Timeout {
965            program: "git".into(),
966            timeout: Duration::from_secs(10),
967            stdout: String::new(),
968            stderr: String::new(),
969        };
970        assert!(!is_transient_fetch_error(&timeout));
971    }
972
973    // R9: an io-level transient from the spawn (EINTR / EAGAIN / busy) is fetch-
974    // retryable too, via processkit's `Error::is_transient()`.
975    #[test]
976    fn classifies_io_transient_as_fetch_retryable() {
977        let interrupted = Error::Spawn {
978            program: "git".into(),
979            source: std::io::Error::from(std::io::ErrorKind::Interrupted),
980        };
981        assert!(
982            interrupted.is_transient(),
983            "processkit treats Interrupted as a transient io error"
984        );
985        assert!(is_transient_fetch_error(&interrupted));
986        // A non-transient io error (e.g. NotFound — the binary is missing) is not retried.
987        let missing = Error::Spawn {
988            program: "git".into(),
989            source: std::io::Error::from(std::io::ErrorKind::NotFound),
990        };
991        assert!(!is_transient_fetch_error(&missing));
992    }
993
994    // R2: regression for the processkit 0.9.1 untruncated-`Error::Exit` fix. A large
995    // output (well past the old 4 KiB cap) with the decisive marker near the END must
996    // still classify — proving the classifiers see the whole captured stream.
997    #[test]
998    fn classifies_on_large_output_past_the_old_4kib_cap() {
999        let padding = "noise line that says nothing\n".repeat(500); // ~14 KiB
1000        let conflict = Error::Exit {
1001            program: "git".into(),
1002            code: 1,
1003            stdout: format!("{padding}CONFLICT (content): Merge conflict in late.rs"),
1004            stderr: String::new(),
1005        };
1006        assert!(
1007            is_merge_conflict(&conflict),
1008            "a conflict marker past 4 KiB must still classify"
1009        );
1010
1011        let transient = Error::Exit {
1012            program: "git".into(),
1013            code: 128,
1014            stdout: String::new(),
1015            stderr: format!("{padding}fatal: unable to access: Could not resolve host: x"),
1016        };
1017        assert!(is_transient_fetch_error(&transient));
1018    }
1019
1020    // processkit's `Error` is `#[non_exhaustive]` and grows variants over time
1021    // (`NotReady`/`Unsupported`/`CassetteMiss`/`NotFound`/`Signalled`/`Cancelled`/
1022    // `ResourceLimit`). Unfamiliar variants must fall through every classifier to
1023    // "no" — a not-ready or unsupported run is neither a conflict, nor a clean
1024    // tree, nor worth a fetch retry.
1025    #[test]
1026    fn unfamiliar_error_variants_are_not_classified() {
1027        let not_ready = Error::NotReady {
1028            program: "git".into(),
1029            timeout: Duration::from_secs(5),
1030        };
1031        let unsupported = Error::Unsupported {
1032            operation: "suspend".into(),
1033        };
1034        for err in [&not_ready, &unsupported] {
1035            assert!(!is_merge_conflict(err));
1036            assert!(!is_nothing_to_commit(err));
1037            assert!(!is_transient_fetch_error(err));
1038        }
1039    }
1040
1041    // `Error::Cancelled` (a client-level `default_cancel_on` killing an in-flight
1042    // run; always available since cancellation became core in processkit 0.10) must
1043    // fall through every classifier to "no" — a cancelled fetch was *deliberately*
1044    // stopped, so replaying it would fight the cancellation. (Behaviour already held
1045    // via the `#[non_exhaustive]` fall-through above; this pins it as a first-class
1046    // assertion.)
1047    #[test]
1048    fn cancelled_is_not_transient_or_otherwise_classified() {
1049        let cancelled = Error::Cancelled {
1050            program: "git".into(),
1051        };
1052        assert!(!is_transient_fetch_error(&cancelled));
1053        assert!(!is_merge_conflict(&cancelled));
1054        assert!(!is_nothing_to_commit(&cancelled));
1055    }
1056
1057    // `Error::Signalled` (a process killed by a signal — e.g. an external SIGTERM/
1058    // SIGKILL, surfaced first-class since processkit 0.9.2 and carrying partial
1059    // `stdout`/`stderr` since 0.10) is *terminal*, not transient: a deliberate kill
1060    // should not be auto-retried, and a signal death is neither a merge conflict nor
1061    // a clean tree. processkit's own `is_transient()` agrees (false for `Signalled`),
1062    // so it falls through every classifier to "no" — pinned here, including the case
1063    // where the captured stderr happens to contain an otherwise-transient marker (a
1064    // killed fetch is still not ours to silently replay).
1065    #[test]
1066    fn signalled_is_terminal_not_transient() {
1067        let signalled = Error::Signalled {
1068            program: "git".into(),
1069            signal: Some(15),
1070            stdout: String::new(),
1071            stderr: "fatal: unable to access: Could not resolve host: x".into(),
1072        };
1073        assert!(!signalled.is_transient());
1074        assert!(!is_transient_fetch_error(&signalled));
1075        assert!(!is_merge_conflict(&signalled));
1076        assert!(!is_nothing_to_commit(&signalled));
1077    }
1078
1079    fn exit(program: &str, code: i32, stderr: &str) -> Error {
1080        Error::Exit {
1081            program: program.into(),
1082            code,
1083            stdout: String::new(),
1084            stderr: stderr.into(),
1085        }
1086    }
1087
1088    // `is_lock_contention` recognises ONLY the *whole-repo* / working-copy lock
1089    // failures (git index.lock, jj working-copy/op-heads lock) — the ones where the
1090    // command did nothing, so a retry is idempotent even on a mutation. Per-ref lock
1091    // failures and conflicts/timeouts are deliberately NOT classified (a multi-ref
1092    // op can fail a ref lock mid-way, where a retry would not be idempotent).
1093    #[test]
1094    fn classifies_lock_contention() {
1095        let lock_failures = [
1096            // git always names `index.lock` (locale-stable) in the lock-contention
1097            // message, even on a non-English runner where the surrounding prose is
1098            // translated.
1099            exit(
1100                "git",
1101                128,
1102                "fatal: Unable to create '/r/.git/index.lock': File exists.",
1103            ),
1104            // A German runner: the path fragment `index.lock` still matches.
1105            exit(
1106                "git",
1107                128,
1108                "fatal: Konnte '/r/.git/index.lock' nicht erstellen: Datei existiert bereits",
1109            ),
1110            // jj's *actual* wordings (verified against jj source) — note no "the".
1111            exit("jj", 1, "Error: Failed to lock working copy"),
1112            exit("jj", 1, "Error: Failed to lock operation heads store"),
1113        ];
1114        for e in &lock_failures {
1115            assert!(is_lock_contention(e), "should be lock contention: {e:?}");
1116            // A lock failure is NOT a transient *fetch* error — different class.
1117            assert!(!is_transient_fetch_error(e), "not a fetch error: {e:?}");
1118        }
1119        let not_locks = [
1120            exit("git", 1, "CONFLICT (content): Merge conflict in a.rs"),
1121            exit("git", 1, "error: pathspec 'x' did not match any file(s)"),
1122            exit("git", 128, "fatal: not a git repository"),
1123            // Per-ref locks are NOT classified — a multi-ref push/fetch can fail a
1124            // ref lock after earlier refs already moved (non-idempotent to replay).
1125            exit(
1126                "git",
1127                1,
1128                "error: cannot lock ref 'refs/heads/x': reference already exists",
1129            ),
1130            exit(
1131                "git",
1132                128,
1133                "Unable to create '/r/.git/packed-refs.lock': File exists.",
1134            ),
1135            // A per-ref lock for a branch literally named `index`: its
1136            // `…/refs/heads/index.lock` path contains the substring `index.lock`,
1137            // but the `refs/` mention correctly rules it out (not a whole-repo lock).
1138            exit(
1139                "git",
1140                128,
1141                "error: cannot lock ref 'refs/heads/index': Unable to create \
1142                 '/r/.git/refs/heads/index.lock': File exists.",
1143            ),
1144            Error::Timeout {
1145                program: "git".into(),
1146                timeout: Duration::from_secs(1),
1147                stdout: String::new(),
1148                stderr: String::new(),
1149            },
1150        ];
1151        for e in &not_locks {
1152            assert!(
1153                !is_lock_contention(e),
1154                "should NOT be lock contention: {e:?}"
1155            );
1156        }
1157    }
1158
1159    #[test]
1160    fn classifies_invalid_input_from_the_guards() {
1161        // What `reject_flag_like` / the newtypes actually produce.
1162        let rejected = reject_flag_like("git", "reference", "-x").unwrap_err();
1163        assert!(
1164            is_invalid_input(&rejected),
1165            "guard rejection is invalid input"
1166        );
1167        assert!(is_invalid_input(
1168            &reject_flag_like("git", "x", "").unwrap_err()
1169        ));
1170
1171        // A real spawn failure (missing binary), a non-zero exit, and a timeout are
1172        // NOT invalid input — they're environment/usage failures, not a bad argument.
1173        let not_input = [
1174            Error::Spawn {
1175                program: "git".into(),
1176                source: std::io::Error::from(std::io::ErrorKind::NotFound),
1177            },
1178            exit("git", 1, "fatal: not a git repository"),
1179            Error::Timeout {
1180                program: "git".into(),
1181                timeout: Duration::from_secs(1),
1182                stdout: String::new(),
1183                stderr: String::new(),
1184            },
1185        ];
1186        for e in &not_input {
1187            assert!(!is_invalid_input(e), "should NOT be invalid input: {e:?}");
1188        }
1189    }
1190
1191    // Backoff is exponential off the base, capped at `max_backoff`, and zero when
1192    // there's no base (immediate retry).
1193    #[test]
1194    fn backoff_is_exponential_capped_and_zero_without_base() {
1195        let p = RetryPolicy::none()
1196            .attempts(6)
1197            .base_backoff(Duration::from_millis(10))
1198            .max_backoff(Duration::from_millis(80));
1199        assert_eq!(backoff_for(&p, 0), Duration::from_millis(10));
1200        assert_eq!(backoff_for(&p, 1), Duration::from_millis(20));
1201        assert_eq!(backoff_for(&p, 2), Duration::from_millis(40));
1202        assert_eq!(backoff_for(&p, 3), Duration::from_millis(80));
1203        assert_eq!(
1204            backoff_for(&p, 4),
1205            Duration::from_millis(80),
1206            "capped at max"
1207        );
1208        assert_eq!(
1209            backoff_for(&RetryPolicy::none(), 3),
1210            Duration::ZERO,
1211            "no base → no wait"
1212        );
1213    }
1214
1215    // Full jitter (used by `RetryPolicy::lock_contention`): every sampled backoff
1216    // stays within `[0, exponential cap]`, and successive samples de-correlate
1217    // (more than one distinct value) so retries don't thunder together. Pins the
1218    // jitter path, which the exponential test above deliberately turns off.
1219    #[test]
1220    fn jitter_stays_within_cap_and_decorrelates() {
1221        let p = RetryPolicy::none()
1222            .attempts(8)
1223            .base_backoff(Duration::from_millis(10))
1224            .max_backoff(Duration::from_millis(80))
1225            .with_jitter(true);
1226        // The cap at retry_index 3 is the full 80ms exponential value.
1227        let cap = Duration::from_millis(80);
1228        let mut seen = std::collections::HashSet::new();
1229        for _ in 0..1000 {
1230            let d = backoff_for(&p, 3);
1231            assert!(
1232                d <= cap,
1233                "jittered backoff {d:?} must stay within the cap {cap:?}"
1234            );
1235            seen.insert(d.as_nanos());
1236        }
1237        assert!(
1238            seen.len() > 1,
1239            "full jitter must produce a spread of delays, not a constant"
1240        );
1241        // A zero base still short-circuits to zero even with jitter on.
1242        assert_eq!(
1243            backoff_for(&RetryPolicy::none().with_jitter(true), 2),
1244            Duration::ZERO
1245        );
1246    }
1247
1248    // The executor: retries while the predicate matches and attempts remain, returns
1249    // the first Ok, doesn't retry a non-matching error, and exhausts to the last Err.
1250    #[tokio::test]
1251    async fn retry_async_retries_then_succeeds_and_respects_the_predicate() {
1252        use std::sync::atomic::{AtomicU32, Ordering};
1253        // Zero backoff → no sleep, deterministic & fast.
1254        let policy = RetryPolicy::none().attempts(4);
1255        let lock = || {
1256            exit(
1257                "git",
1258                128,
1259                "Unable to create '/r/.git/index.lock': File exists.",
1260            )
1261        };
1262
1263        // Fails twice with a lock error, then succeeds — retried to success.
1264        let calls = AtomicU32::new(0);
1265        let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
1266            let n = calls.fetch_add(1, Ordering::SeqCst);
1267            let lock = lock();
1268            async move { if n < 2 { Err(lock) } else { Ok(n) } }
1269        })
1270        .await;
1271        assert_eq!(out.unwrap(), 2);
1272        assert_eq!(calls.load(Ordering::SeqCst), 3, "1 try + 2 retries");
1273
1274        // A non-lock error is returned immediately (not retried).
1275        let calls = AtomicU32::new(0);
1276        let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
1277            calls.fetch_add(1, Ordering::SeqCst);
1278            async { Err(exit("git", 1, "real, deterministic failure")) }
1279        })
1280        .await;
1281        assert!(out.is_err());
1282        assert_eq!(
1283            calls.load(Ordering::SeqCst),
1284            1,
1285            "non-retryable → single attempt"
1286        );
1287
1288        // Persistent lock contention exhausts the attempt budget.
1289        let calls = AtomicU32::new(0);
1290        let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
1291            calls.fetch_add(1, Ordering::SeqCst);
1292            async { Err(exit("git", 128, "index.lock': File exists")) }
1293        })
1294        .await;
1295        assert!(out.is_err());
1296        assert_eq!(calls.load(Ordering::SeqCst), 4, "all attempts used");
1297    }
1298
1299    // `resolve_credential` returns `None` until a provider is attached, then the
1300    // provider's credential. (No process is spawned, so the real runner is fine.)
1301    #[tokio::test]
1302    async fn retrying_client_resolves_credential_opt_in() {
1303        let client = ManagedClient::new("git");
1304        assert!(!client.has_credentials());
1305        assert!(
1306            client
1307                .resolve_credential(CredentialService::Git, None)
1308                .await
1309                .unwrap()
1310                .is_none(),
1311            "no provider → ambient (None)"
1312        );
1313
1314        let client = client.with_credentials(Arc::new(StaticCredential::token("t0k")));
1315        assert!(client.has_credentials());
1316        let got = client
1317            .resolve_credential(CredentialService::Git, None)
1318            .await
1319            .unwrap()
1320            .expect("provider yields a credential");
1321        assert_eq!(got.secret().expose(), "t0k");
1322    }
1323
1324    // An empty (or whitespace-only) secret is treated as `None` (ambient):
1325    // injecting an empty token would override the ambient login with nothing
1326    // instead of deferring to it. Mirrors `EnvToken`'s whitespace-only ⇒ unset rule.
1327    #[tokio::test]
1328    async fn resolve_credential_treats_empty_secret_as_ambient() {
1329        // Service-agnostic: both the forge (token-env) and git (helper) paths route
1330        // through this chokepoint, so a blank secret is ambient for either.
1331        for blank in ["", "   ", "\t\n"] {
1332            let client = ManagedClient::new("git")
1333                .with_credentials(Arc::new(StaticCredential::token(blank)));
1334            for service in [CredentialService::GitHub, CredentialService::Git] {
1335                assert!(
1336                    client
1337                        .resolve_credential(service, None)
1338                        .await
1339                        .unwrap()
1340                        .is_none(),
1341                    "blank secret {blank:?} → ambient (None) for {service:?}"
1342                );
1343            }
1344        }
1345    }
1346}