vcs_cli_support/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![deny(rustdoc::broken_intra_doc_links)]
3//! `vcs-cli-support` — the [`processkit`]-coupled plumbing the CLI wrappers reuse.
4//!
5//! `vcs-git` / `vcs-jj` / `vcs-github` all drive a CLI through [`processkit`], so
6//! they share three concerns that *touch* [`processkit::Error`]: an argv injection
7//! guard, a fetch-retry policy, and a set of [`Error`] classifiers. Extracting them
8//! here keeps the std-only `vcs-diff` clean of the `processkit` dependency, and —
9//! more to the point — keeps the marker lists and classifier logic from drifting
10//! between backends. The wrapper crates re-export these items (so you reach them
11//! as `vcs_git::is_merge_conflict`, not via this crate's name) and rarely name
12//! `vcs-cli-support` directly.
13//!
14//! # The surface
15//!
16//! - **[`reject_flag_like`]** — the injection guard for bare positional argv slots.
17//! A caller value that is empty/whitespace, or starts with `-`, is refused before
18//! spawning (the CLI would parse it as a flag); flag-*value* slots (`-m <msg>`)
19//! are consumed verbatim and skip the check. Wrappers call it with their own
20//! binary name so the surfaced [`Error::Spawn`] names the right `program`.
21//! - **[`FETCH_ATTEMPTS`] / [`FETCH_BACKOFF`]** — the shared transient-retry policy
22//! for `fetch` (one try plus two retries, fixed backoff between them).
23//! - **[`is_merge_conflict`] / [`is_nothing_to_commit`] / [`is_transient_fetch_error`]
24//! / [`is_lock_contention`]** — classify a returned [`Error`] so callers branch on
25//! *intent* ("conflict, resolve it"; "nothing to commit, no-op"; "transient,
26//! retry"; "another process holds the lock, retry") instead of matching on error
27//! internals. They inspect captured [`Error::Exit`] output against fixed marker
28//! lists; a [`processkit`] [`Error::Timeout`] is **not** treated as a transient
29//! fetch error (it already spent the full deadline — see
30//! [`is_transient_fetch_error`]); any unfamiliar `#[non_exhaustive]` variant falls
31//! through to "no".
32//! - **[`RetryPolicy`] / [`retry_async`] / [`ManagedClient`]** — an opt-in retry
33//! strategy (attempts + exponential, jittered backoff) for **lock-contention**
34//! failures. `ManagedClient` wraps a [`processkit`] `CliClient` and applies the
35//! policy to every command, so the `vcs-git`/`vcs-jj` clients gain retry via
36//! `with_retry(...)` without changing a call site. Lock-acquisition failures are
37//! pre-execution, so retrying is safe even for mutating commands.
38//! - **[`CredentialProvider`] / [`Credential`] / [`Secret`]** — an opt-in seam for
39//! supplying a secret *per operation* (a CI token, a vault lookup) instead of
40//! relying on ambient CLI auth. `ManagedClient` injects the resolved token into
41//! each command (the forge `GH_TOKEN`/`GITLAB_TOKEN` env); git uses
42//! [`git_credential_helper`] to keep the secret out of `argv`. Default is no
43//! provider → ambient auth, unchanged. See the [`credentials`](mod@credentials)
44//! module for the full picture.
45//!
46//! # Recipes
47//!
48//! Classify a failed `fetch` to drive a retry decision — branch on intent, not on
49//! the error's internals:
50//!
51//! ```no_run
52//! use vcs_cli_support::{is_transient_fetch_error, FETCH_ATTEMPTS, FETCH_BACKOFF};
53//! # fn run() -> Result<(), processkit::Error> { todo!() }
54//! # fn demo() -> Result<(), processkit::Error> {
55//! for attempt in 1..=FETCH_ATTEMPTS {
56//! match run() {
57//! Ok(()) => break,
58//! Err(e) if is_transient_fetch_error(&e) && attempt < FETCH_ATTEMPTS => {
59//! std::thread::sleep(FETCH_BACKOFF); // DNS / dropped connection — worth a retry
60//! }
61//! Err(e) => return Err(e), // anything else: give up
62//! }
63//! }
64//! # Ok(()) }
65//! ```
66
67use std::ffi::OsStr;
68use std::fmt;
69use std::future::Future;
70use std::path::Path;
71use std::sync::Arc;
72use std::time::Duration;
73
74use processkit::{
75 CliClient, Command, Error, IntoCommand, JobRunner, ProcessResult, ProcessRunner, Result,
76};
77
78pub mod credentials;
79pub use credentials::{
80 Credential, CredentialProvider, CredentialRequest, CredentialService, EnvToken, FnProvider,
81 GitCredentialHelper, Secret, StaticCredential, git_credential_helper, https_host, provider_fn,
82};
83
84/// JSON helpers shared by the forge wrappers, behind the `serde` feature — so the
85/// three forge parsers share one `null -> ""` and parse-error convention.
86#[cfg(feature = "serde")]
87#[cfg_attr(docsrs, doc(cfg(feature = "serde")))]
88pub mod json {
89 use processkit::{Error, Result};
90 use serde::Deserialize;
91 use serde::de::DeserializeOwned;
92
93 /// Deserialize a `String` a forge CLI may send as JSON `null` for an empty
94 /// optional value: `null` -> empty string, same as an absent key. `#[serde(default)]`
95 /// alone covers only an absent key; a present `null` would fail the whole-object
96 /// parse. Use as `#[serde(deserialize_with = "vcs_cli_support::json::null_to_empty")]`.
97 pub fn null_to_empty<'de, D>(deserializer: D) -> ::core::result::Result<String, D::Error>
98 where
99 D: serde::Deserializer<'de>,
100 {
101 Ok(Option::<String>::deserialize(deserializer)?.unwrap_or_default())
102 }
103
104 /// Deserialize a forge CLI's `--json` output into `T`, mapping a parse failure to
105 /// [`Error::Parse`] tagged with `program` (the CLI's binary name).
106 pub fn from_json<T: DeserializeOwned>(program: &str, json: &str) -> Result<T> {
107 serde_json::from_str(json).map_err(|e| Error::Parse {
108 program: program.to_string(),
109 message: e.to_string(),
110 })
111 }
112}
113
114/// Generate the cwd-bound forwarders for a CLI wrapper's `…At` view.
115///
116/// Each CLI wrapper (`vcs-git`, `vcs-jj`, `vcs-github`, `vcs-gitlab`, `vcs-gitea`)
117/// exposes a cwd-bound view — `GitAt`, `JjAt`, `GitHubAt`, `GitLabAt`, `GiteaAt` —
118/// that holds a reference to the client plus a pre-bound `dir`, and re-exposes the
119/// client's methods with `dir` already supplied. The forwarder bodies are
120/// byte-identical across the five backends but for three things, so they live here
121/// once instead of as a copied `macro_rules!` per crate:
122///
123/// - `$view` — the bound view type (e.g. `GitAt`). It must be generic over
124/// `<'a, R: ProcessRunner>` and have a field named `$field` holding the client
125/// plus a `dir: &'a Path` field.
126/// - `$field` — the inner field naming the client (e.g. `git`, `gh`, `glab`,
127/// `tea`).
128/// - `$client` — a **string literal** naming the client type, used in the
129/// generated doc strings and rendered as an intra-doc link (e.g. `"Git"` →
130/// ``[`Git`]``).
131/// - `bare { … }` — methods forwarded verbatim to `self.$field`.
132/// - `dir { … }` — methods that take `self.dir` as their first argument.
133///
134/// The argument and return types in the method lists resolve in the **calling**
135/// crate, so they are written exactly as that wrapper's own methods are. The
136/// `ProcessRunner` bound is fully qualified (`::processkit::ProcessRunner`) so the
137/// expansion compiles regardless of which items the caller has imported.
138///
139/// ```ignore
140/// vcs_cli_support::at_forwarders! {
141/// GitAt, git, "Git",
142/// bare { fn version() -> Result<String>; }
143/// dir { fn status() -> Result<Vec<StatusEntry>>; }
144/// }
145/// ```
146#[macro_export]
147macro_rules! at_forwarders {
148 (
149 $view:ident, $field:ident, $client:literal,
150 bare { $( fn $bn:ident( $($ba:ident: $bt:ty),* $(,)? ) -> $br:ty; )* }
151 dir { $( fn $dn:ident( $($da:ident: $dt:ty),* $(,)? ) -> $dr:ty; )* }
152 ) => {
153 impl<'a, R: ::processkit::ProcessRunner> $view<'a, R> {
154 $(
155 #[doc = concat!("Bound form of [`", $client, "`]'s `", stringify!($bn), "`.")]
156 pub async fn $bn(&self, $($ba: $bt),*) -> $br {
157 self.$field.$bn($($ba),*).await
158 }
159 )*
160 $(
161 #[doc = concat!("Bound form of [`", $client, "`]'s `", stringify!($dn), "` (with `dir` pre-bound).")]
162 pub async fn $dn(&self, $($da: $dt),*) -> $dr {
163 self.$field.$dn(self.dir, $($da),*).await
164 }
165 )*
166 }
167 };
168}
169
170/// Emit the common client scaffold every CLI wrapper hand-writes around a
171/// [`ManagedClient`].
172///
173/// `vcs-git`, `vcs-jj`, `vcs-github`, and `vcs-gitlab` each wrap a
174/// [`ManagedClient`] in a thin newtype that re-exposes the same handful of
175/// constructors and default-applying builders — `new` / `Default` /
176/// `with_runner` / `default_timeout` / `default_env` / `default_env_remove` /
177/// `default_cancel_on` — with byte-identical bodies and doc strings. This macro
178/// generates that shared part so it can't drift between backends; each wrapper
179/// keeps its *capability* builders (`with_retry`, `with_credentials`, every verb,
180/// the `…At` view, …) hand-written in a separate `impl` block.
181///
182/// The generated newtype is `struct $name<R: ProcessRunner = JobRunner>` with a
183/// single private `core: ManagedClient<R>` field — accessible to the rest of the
184/// wrapper crate (same module). All paths are fully qualified, so the expansion
185/// compiles regardless of what the caller has imported.
186///
187/// - `$name` — the wrapper type (e.g. `Git`). The struct-level doc comment (and
188/// any other attributes) written before `struct` are attached to it verbatim.
189/// - `$binary` — the program the client drives (an expression, typically the
190/// crate's `BINARY` const).
191/// - `token_env = ($svc, $var)` — *optional*. When given, `new`/`with_runner`
192/// chain [`ManagedClient::with_token_env`] so a resolved credential is injected
193/// into the `$var` environment variable for service `$svc` (the forge case:
194/// `GH_TOKEN`, `GITLAB_TOKEN`). Omit it for the ambient-auth backends (git, jj).
195/// - `scrub_env = [ $var, … ]` — *optional*. When given, `new`/`with_runner`
196/// chain [`ManagedClient::default_env_remove`] for each var, so **every** client
197/// the macro generates drops those inherited environment variables by default
198/// (`vcs-git` uses it to scrub the repo-redirector vars — `GIT_DIR`, … — so a
199/// value leaking from the parent process can't retarget commands). Must come
200/// *after* `token_env` when both are present.
201///
202/// ```ignore
203/// vcs_cli_support::managed_client! {
204/// /// The real GitHub client.
205/// pub struct GitHub => BINARY, token_env = (CredentialService::GitHub, "GH_TOKEN")
206/// }
207/// vcs_cli_support::managed_client! {
208/// /// The real Git client — scrubs the repo-redirector env vars by default.
209/// pub struct Git => BINARY, scrub_env = ["GIT_DIR", "GIT_WORK_TREE"]
210/// }
211/// ```
212#[macro_export]
213macro_rules! managed_client {
214 (
215 $(#[$meta:meta])*
216 $vis:vis struct $name:ident => $binary:expr
217 $(, token_env = ($svc:expr, $var:expr) )?
218 $(, scrub_env = [ $($scrub:expr),* $(,)? ] )?
219 $(,)?
220 ) => {
221 $(#[$meta])*
222 $vis struct $name<R: ::processkit::ProcessRunner = ::processkit::JobRunner> {
223 core: $crate::ManagedClient<R>,
224 }
225
226 impl $name<::processkit::JobRunner> {
227 /// Create a client driving the real job-backed runner.
228 pub fn new() -> Self {
229 Self { core: $crate::ManagedClient::new($binary)
230 $(.with_token_env($svc, $var))?
231 $($(.default_env_remove($scrub))*)?
232 }
233 }
234 }
235
236 impl ::core::default::Default for $name<::processkit::JobRunner> {
237 fn default() -> Self {
238 Self::new()
239 }
240 }
241
242 impl<R: ::processkit::ProcessRunner> $name<R> {
243 /// Create a client driving `runner` — inject a fake in tests.
244 pub fn with_runner(runner: R) -> Self {
245 Self {
246 core: $crate::ManagedClient::with_runner($binary, runner)
247 $(.with_token_env($svc, $var))?
248 $($(.default_env_remove($scrub))*)?,
249 }
250 }
251
252 /// Apply a default timeout to every command this client builds.
253 pub fn default_timeout(mut self, timeout: ::core::time::Duration) -> Self {
254 self.core = self.core.default_timeout(timeout);
255 self
256 }
257
258 /// Set an environment variable on every command this client builds.
259 pub fn default_env(
260 mut self,
261 key: impl ::core::convert::AsRef<::std::ffi::OsStr>,
262 value: impl ::core::convert::AsRef<::std::ffi::OsStr>,
263 ) -> Self {
264 self.core = self.core.default_env(key, value);
265 self
266 }
267
268 /// Remove an inherited environment variable on every command this client builds.
269 pub fn default_env_remove(
270 mut self,
271 key: impl ::core::convert::AsRef<::std::ffi::OsStr>,
272 ) -> Self {
273 self.core = self.core.default_env_remove(key);
274 self
275 }
276
277 /// Cancel every command this client builds when `token` fires.
278 pub fn default_cancel_on(mut self, token: ::processkit::CancellationToken) -> Self {
279 self.core = self.core.default_cancel_on(token);
280 self
281 }
282 }
283 };
284}
285
286/// Injection guard for bare positional argv slots: a caller-supplied value with a
287/// leading `-` would be parsed by the CLI as a *flag* (verified: `git checkout
288/// -evil` → "unknown switch"; jj likewise), and an empty (or whitespace-only)
289/// value silently changes most commands' meaning. Refuse both before anything
290/// spawns, surfacing an [`Error::Spawn`] naming `program`. An interior NUL is
291/// refused too (it can't be passed in argv and otherwise surfaces as an opaque
292/// OS spawn error). Flag-VALUE positions (`-m <msg>`, `--branch <b>`) don't need
293/// this — the CLI consumes the next token verbatim there.
294///
295/// The leading-`-` test is applied to the **trimmed** value, so a value like
296/// `" --upload-pack=…"` (leading whitespace) is still refused — the empty-check
297/// and the flag-check now agree on what "the value" is.
298pub fn reject_flag_like(program: &str, what: &str, value: &str) -> Result<()> {
299 let trimmed = value.trim();
300 if trimmed.is_empty() || trimmed.starts_with('-') || value.contains('\0') {
301 return Err(Error::Spawn {
302 program: program.to_string(),
303 source: std::io::Error::new(
304 std::io::ErrorKind::InvalidInput,
305 format!(
306 "{what} {value:?} would be parsed as a flag (or is empty / contains NUL) — \
307 refusing to pass it as a positional argument"
308 ),
309 ),
310 });
311 }
312 Ok(())
313}
314
315/// Total attempts for a transient-retried `fetch` (1 try + 2 retries).
316pub const FETCH_ATTEMPTS: u32 = 3;
317/// Fixed backoff between fetch retries.
318pub const FETCH_BACKOFF: Duration = Duration::from_millis(500);
319/// Grace period for a timed-out fetch: on the deadline processkit signals the
320/// process tree (terminate), waits this long for it to exit cleanly — flush, close
321/// the connection, drop any lock — then hard-kills. Only takes effect when a
322/// per-client timeout is set (`Git::default_timeout` / `Jj::default_timeout`); a
323/// fetch with no deadline is unaffected.
324pub const FETCH_TIMEOUT_GRACE: Duration = Duration::from_secs(2);
325
326/// Lower-case substrings marking a merge that stopped on conflicts.
327const CONFLICT_MARKERS: &[&str] = &["conflict (", "automatic merge failed"];
328/// Lower-case substrings marking a commit that found nothing to record.
329const NOTHING_TO_COMMIT_MARKERS: &[&str] = &["nothing to commit", "nothing added to commit"];
330/// Lower-case substrings marking a transient (retryable) network/fetch failure.
331/// The timeout markers are kept *specific* (`connection timed out` /
332/// `operation timed out`) rather than a bare `timed out`, which would also match
333/// unrelated, non-network "timed out" messages (a lock wait, a hook) and trigger a
334/// spurious fetch retry.
335const TRANSIENT_FETCH_MARKERS: &[&str] = &[
336 "could not resolve host",
337 "couldn't resolve host",
338 "temporary failure in name resolution",
339 "connection timed out",
340 "connection refused",
341 "operation timed out",
342 "network is unreachable",
343 "failed to connect",
344 "could not read from remote repository",
345 "the remote end hung up",
346 "early eof",
347 "rpc failed",
348];
349
350/// Whether `err` is an [`Error::Exit`] whose captured output contains any marker.
351fn exit_output_matches(err: &Error, markers: &[&str]) -> bool {
352 let Error::Exit { stdout, stderr, .. } = err else {
353 return false;
354 };
355 let out = stdout.to_ascii_lowercase();
356 let errt = stderr.to_ascii_lowercase();
357 markers.iter().any(|m| out.contains(m) || errt.contains(m))
358}
359
360/// Whether a failed `merge`/`merge_commit` stopped on a merge conflict. (jj
361/// surfaces conflicts as state rather than as errors, so this only fires on git
362/// output — see `vcs_core::Error::is_merge_conflict`.)
363pub fn is_merge_conflict(err: &Error) -> bool {
364 exit_output_matches(err, CONFLICT_MARKERS)
365}
366
367/// Whether a failed `commit`/`commit_paths` reported nothing to commit (a clean
368/// tree), as opposed to a real error.
369pub fn is_nothing_to_commit(err: &Error) -> bool {
370 exit_output_matches(err, NOTHING_TO_COMMIT_MARKERS)
371}
372
373/// Whether a failed `fetch`/`fetch_branch`/`remote_branch_exists` looks
374/// transient (DNS, a dropped connection, a fast network blip) and is worth
375/// retrying.
376///
377/// A processkit-level **timeout** is deliberately **not** classified transient
378/// (R6). A `.timeout()`-bounded run that expired has already consumed the caller's
379/// full deadline — retrying it would multiply the wall-clock by [`FETCH_ATTEMPTS`]
380/// (e.g. a black-holed remote under a 120 s deadline would block ≈ 6 min, three
381/// times the advertised ceiling). The deadline *is* the patience budget; a caller
382/// who wants longer should raise the timeout, not have it silently tripled. Fast
383/// transient failures (the io-level and marker cases below) still retry, because
384/// they fail quickly and a retry is cheap.
385pub fn is_transient_fetch_error(err: &Error) -> bool {
386 // An io-level transient from the spawn itself (interrupted / would-block / busy),
387 // which processkit classifies via `Error::is_transient()` (it covers `Spawn`/`Io`,
388 // not `Exit`/`Timeout`, so it composes cleanly with the marker scan below).
389 err.is_transient() || exit_output_matches(err, TRANSIENT_FETCH_MARKERS)
390}
391
392/// Lower-case substrings marking a **whole-repository / working-copy lock**
393/// contention failure — another process held the *one* repo-wide lock, so the
394/// command **never started** (clean, pre-execution) and touched nothing.
395///
396/// These are deliberately limited to the locks that guard the *entire* operation
397/// up front, so retrying is safe even on a **mutating** command: the repo was not
398/// modified at all. We intentionally do **not** include per-ref lock messages
399/// (`cannot lock ref`, `<ref>.lock`/`packed-refs.lock: File exists`): a multi-ref
400/// `push`/`fetch` updates refs sequentially, so a ref-lock failure can arrive
401/// *after* earlier refs already moved — replaying that is not idempotent. Network
402/// markers
403/// ([`TRANSIENT_FETCH_MARKERS`]) and conflict/exit failures are likewise absent.
404const LOCK_CONTENTION_MARKERS: &[&str] = &[
405 // git: the whole-repo index lock (pre-write). Match the **locale-stable path
406 // fragment** `index.lock`, not the translated `': File exists'` suffix — git
407 // localizes its messages, so a `LANG=de_DE` runner would never match the full
408 // English phrase. `index.lock` names the index lock specifically; per-ref locks
409 // (`<ref>.lock`, `packed-refs.lock`) are ruled out by the `refs/` guard in
410 // `is_lock_contention`. (This matches any `index.lock` *create* failure — a
411 // held lock, or e.g. `Permission denied` — all pre-write, so retrying is safe.)
412 "index.lock",
413 // jj: the working-copy lock and the operation-heads lock (both pre-mutation).
414 // These are jj's exact wordings (lower-cased for the classifier). NOTE: modern
415 // jj generally **blocks** on these locks until they're free rather than failing,
416 // so contention usually surfaces as a wait, not a classifiable error — these
417 // markers catch only the residual cases where jj does surface a lock error.
418 "failed to lock working copy",
419 "failed to lock operation heads store",
420];
421
422/// Whether `err` is a **whole-repository lock-contention** failure — another
423/// process held git's `index.lock` or jj's working-copy / op-heads lock, so the
424/// command couldn't even start. Such a failure is *pre-execution* and therefore
425/// safe to retry even on a **mutating** operation (the repo was never modified).
426/// Per-ref lock failures (`cannot lock ref`, `<ref>.lock`) are deliberately **not**
427/// classified here — they can occur mid-way through a multi-ref `push`/`fetch`,
428/// where a retry would not be idempotent. Conflict, "nothing to commit", a real
429/// non-zero exit, a timeout, a signal, or a missing binary are also **not** lock
430/// contention and must not be retried this way.
431pub fn is_lock_contention(err: &Error) -> bool {
432 // Rule out a **per-ref** lock first: it is *not* safely retryable (a multi-ref
433 // push/fetch can fail one ref's lock after earlier refs already moved). git's
434 // per-ref lock lives under `refs/` (`…/refs/heads/<name>.lock`) and its message
435 // names `refs/…`, whereas the whole-repo `index.lock` (`<gitdir>/index.lock`)
436 // never does — so a `refs/` mention excludes it, locale-independently. This also
437 // stops a branch literally named `index`/`reindex` (whose `…/reindex.lock`
438 // contains the substring `index.lock`) from matching the bare `index.lock`
439 // marker. (A repo whose *path* contains `refs/` then misses the index-lock retry
440 // — a benign false-negative, safer than a wrong retry.)
441 if exit_output_matches(err, &["refs/"]) {
442 return false;
443 }
444 exit_output_matches(err, LOCK_CONTENTION_MARKERS)
445}
446
447/// Whether `err` is an **input rejection** — a bad caller argument, encoded as an
448/// [`Error::Spawn`] whose source is `io::ErrorKind::InvalidInput`. This is the
449/// pattern the toolkit's own argument guards raise ([`reject_flag_like`] and the
450/// validating newtypes `RefName`/`RevSpec`/`RevsetExpr`) for a value that would be
451/// misparsed as a flag, is empty, or contains a NUL — and it also covers the
452/// spawn-time `InvalidInput` the OS raises for an un-spawnable argument (an interior
453/// NUL in a flag-value, or Windows' batch-arg-escaping refusal). All are genuine
454/// bad input, distinct from a real spawn failure (missing binary → `NotFound`, no
455/// perms → `PermissionDenied`) or a non-zero exit. A binding maps this to a
456/// `ValueError`; the facades re-expose it as `Error::is_invalid_input()`.
457pub fn is_invalid_input(err: &Error) -> bool {
458 matches!(
459 err,
460 Error::Spawn { source, .. } if source.kind() == std::io::ErrorKind::InvalidInput
461 )
462}
463
464/// A bounded retry strategy: how many attempts, the (exponential) backoff between
465/// them, and whether to add full jitter. Used by [`ManagedClient`] to retry
466/// [`is_lock_contention`] failures. The [`Default`] is [`none`](RetryPolicy::none)
467/// (no retry) — retry is **opt-in**.
468#[derive(Debug, Clone, Copy, PartialEq, Eq)]
469#[non_exhaustive]
470pub struct RetryPolicy {
471 /// Total attempts including the first; `1` means no retry.
472 pub attempts: u32,
473 /// Delay before the first retry; doubles each subsequent retry (capped by
474 /// [`max_backoff`](RetryPolicy::max_backoff)). `ZERO` means retry immediately.
475 pub base_backoff: Duration,
476 /// Upper bound on the (pre-jitter) backoff delay. `ZERO` means uncapped.
477 pub max_backoff: Duration,
478 /// Apply **full jitter** — the actual delay is uniform in `[0, computed]` — to
479 /// avoid a thundering herd when many workers retry against one repository.
480 pub jitter: bool,
481}
482
483impl RetryPolicy {
484 /// No retry: a single attempt. The default.
485 pub const fn none() -> Self {
486 Self {
487 attempts: 1,
488 base_backoff: Duration::ZERO,
489 max_backoff: Duration::ZERO,
490 jitter: false,
491 }
492 }
493
494 /// A sensible default for repository lock contention: a handful of attempts
495 /// with short, jittered, exponential backoff (25 ms → 500 ms).
496 pub const fn lock_contention() -> Self {
497 Self {
498 attempts: 5,
499 base_backoff: Duration::from_millis(25),
500 max_backoff: Duration::from_millis(500),
501 jitter: true,
502 }
503 }
504
505 /// Set the total number of attempts (clamped to at least 1).
506 pub fn attempts(mut self, attempts: u32) -> Self {
507 self.attempts = attempts.max(1);
508 self
509 }
510
511 /// Set the base backoff (the delay before the first retry).
512 pub fn base_backoff(mut self, backoff: Duration) -> Self {
513 self.base_backoff = backoff;
514 self
515 }
516
517 /// Cap the (pre-jitter) backoff delay; `ZERO` leaves it uncapped.
518 pub fn max_backoff(mut self, max: Duration) -> Self {
519 self.max_backoff = max;
520 self
521 }
522
523 /// Toggle full jitter on the backoff delay.
524 pub fn with_jitter(mut self, jitter: bool) -> Self {
525 self.jitter = jitter;
526 self
527 }
528}
529
530impl Default for RetryPolicy {
531 /// No retry — retry is opt-in.
532 fn default() -> Self {
533 Self::none()
534 }
535}
536
537/// The (possibly jittered) backoff before the `retry_index`-th retry (0 = first).
538fn backoff_for(policy: &RetryPolicy, retry_index: u32) -> Duration {
539 if policy.base_backoff.is_zero() {
540 return Duration::ZERO;
541 }
542 let base = policy.base_backoff.as_nanos();
543 let scaled = base.saturating_mul(1u128 << retry_index.min(20));
544 let capped = if policy.max_backoff.is_zero() {
545 scaled
546 } else {
547 scaled.min(policy.max_backoff.as_nanos())
548 };
549 let delay = Duration::from_nanos(capped.min(u64::MAX as u128) as u64);
550 if policy.jitter {
551 full_jitter(delay)
552 } else {
553 delay
554 }
555}
556
557/// Full jitter: a uniform delay in `[0, max]`. Dependency-free randomness via the
558/// OS-seeded [`RandomState`](std::collections::hash_map::RandomState) — good enough
559/// to de-correlate retries, not cryptographic.
560fn full_jitter(max: Duration) -> Duration {
561 use std::hash::{BuildHasher, Hasher};
562 let nanos = max.as_nanos();
563 if nanos == 0 {
564 return Duration::ZERO;
565 }
566 let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
567 hasher.write_u64(nanos as u64);
568 let r = hasher.finish() as u128;
569 Duration::from_nanos((r % (nanos + 1)).min(u64::MAX as u128) as u64)
570}
571
572/// Run `op`, retrying its result while `should_retry` says so and `policy` has
573/// attempts left, sleeping the (jittered, exponential) backoff between tries. The
574/// op is re-invoked from scratch each attempt, so it must be idempotent for the
575/// errors `should_retry` selects (lock-contention failures are — the command never
576/// ran). Returns the first `Ok`, or the last `Err`.
577pub async fn retry_async<T, Fut>(
578 policy: &RetryPolicy,
579 should_retry: impl Fn(&Error) -> bool,
580 mut op: impl FnMut() -> Fut,
581) -> Result<T>
582where
583 Fut: Future<Output = Result<T>>,
584{
585 let attempts = policy.attempts.max(1);
586 for attempt in 1..=attempts {
587 match op().await {
588 Ok(value) => return Ok(value),
589 Err(err) => {
590 if attempt == attempts || !should_retry(&err) {
591 return Err(err);
592 }
593 let delay = backoff_for(policy, attempt - 1);
594 if !delay.is_zero() {
595 tokio::time::sleep(delay).await;
596 }
597 }
598 }
599 }
600 unreachable!("the loop returns on the final attempt")
601}
602
603/// A [`CliClient`] wrapper that adds two opt-in concerns the CLI wrappers
604/// (`vcs-git`, `vcs-jj`, `vcs-github`, `vcs-gitlab`) all share, without touching a
605/// single call site:
606///
607/// 1. **Lock-contention retry** ([`is_lock_contention`]) per a [`RetryPolicy`] —
608/// off by default ([`RetryPolicy::none`]); enable with
609/// [`with_retry`](ManagedClient::with_retry). Safe even for mutating commands,
610/// since lock contention is a clean pre-execution failure.
611/// 2. **Credential injection** from an opt-in [`CredentialProvider`] — off by
612/// default (no provider); attach one with
613/// [`with_credentials`](ManagedClient::with_credentials). When a forge
614/// *token-env* binding is configured
615/// ([`with_token_env`](ManagedClient::with_token_env)), every command run
616/// through this client gets the resolved token in that environment variable
617/// (e.g. `GH_TOKEN`). Backends that inject the secret differently (git's
618/// `credential.helper`) instead call
619/// [`resolve_credential`](ManagedClient::resolve_credential) at the command
620/// site. Resolution happens once per call, before the retry loop.
621///
622/// Both default to inert, so a client with neither configured behaves exactly
623/// like a bare `CliClient`.
624pub struct ManagedClient<R: ProcessRunner = JobRunner> {
625 inner: CliClient<R>,
626 retry: RetryPolicy,
627 credentials: Option<Arc<dyn CredentialProvider>>,
628 /// When set, the token is auto-injected into this env var on every command,
629 /// resolved for this service. Used by the forge clients (`GH_TOKEN`, …).
630 token_env: Option<(CredentialService, &'static str)>,
631}
632
633impl<R: ProcessRunner> fmt::Debug for ManagedClient<R> {
634 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
635 f.debug_struct("ManagedClient")
636 .field("inner", &self.inner)
637 .field("retry", &self.retry)
638 // Never render the provider itself (it may close over a secret); just
639 // whether one is configured, plus the token-env binding.
640 .field("credentials", &self.credentials.is_some())
641 .field("token_env", &self.token_env)
642 .finish()
643 }
644}
645
646impl ManagedClient<JobRunner> {
647 /// A retrying client driving `program` on the real job-backed runner (no retry
648 /// until [`with_retry`](ManagedClient::with_retry)).
649 pub fn new(program: impl AsRef<OsStr>) -> Self {
650 Self {
651 inner: CliClient::new(program),
652 retry: RetryPolicy::none(),
653 credentials: None,
654 token_env: None,
655 }
656 }
657}
658
659impl<R: ProcessRunner> ManagedClient<R> {
660 /// A retrying client driving `program` on `runner` — inject a fake in tests.
661 pub fn with_runner(program: impl AsRef<OsStr>, runner: R) -> Self {
662 Self {
663 inner: CliClient::with_runner(program, runner),
664 retry: RetryPolicy::none(),
665 credentials: None,
666 token_env: None,
667 }
668 }
669
670 /// Set the lock-contention retry policy (opt-in; default is no retry).
671 pub fn with_retry(mut self, policy: RetryPolicy) -> Self {
672 self.retry = policy;
673 self
674 }
675
676 /// The active retry policy.
677 pub fn retry_policy(&self) -> RetryPolicy {
678 self.retry
679 }
680
681 /// Attach a [`CredentialProvider`] (opt-in; default is none → ambient auth).
682 /// The provider is consulted per operation: automatically when a
683 /// [`with_token_env`](ManagedClient::with_token_env) binding is set, or
684 /// on demand via [`resolve_credential`](ManagedClient::resolve_credential).
685 ///
686 /// **Precedence:** a resolved token is injected *after* any
687 /// [`default_env`](ManagedClient::default_env), so the provider wins over a
688 /// static default and over the ambient CLI login. **Cancellation:** a
689 /// [`default_cancel_on`](ManagedClient::default_cancel_on) token bounds the
690 /// spawned *process*, not provider resolution — if your provider does slow I/O
691 /// (a vault lookup), bound it yourself.
692 #[must_use]
693 pub fn with_credentials(mut self, provider: Arc<dyn CredentialProvider>) -> Self {
694 self.credentials = Some(provider);
695 self
696 }
697
698 /// Bind the resolved token to an environment variable injected on **every**
699 /// command this client runs (the forge case: `GH_TOKEN`, `GITLAB_TOKEN`). The
700 /// `service` tags the [`CredentialRequest`]. No effect without a provider.
701 #[must_use]
702 pub fn with_token_env(mut self, service: CredentialService, var: &'static str) -> Self {
703 self.token_env = Some((service, var));
704 self
705 }
706
707 /// Whether a credential provider is configured.
708 #[must_use]
709 pub fn has_credentials(&self) -> bool {
710 self.credentials.is_some()
711 }
712
713 /// Resolve a credential for `service`/`host` from the configured provider, or
714 /// `Ok(None)` if no provider is set or it defers to ambient auth. Backends
715 /// that inject the secret at the command site (git's `credential.helper`) call
716 /// this directly; the forge token-env path uses it internally.
717 pub async fn resolve_credential(
718 &self,
719 service: CredentialService,
720 host: Option<&str>,
721 ) -> Result<Option<Credential>> {
722 let Some(provider) = &self.credentials else {
723 return Ok(None);
724 };
725 let request = CredentialRequest { service, host };
726 // An empty (or whitespace-only) secret is not a usable credential —
727 // injecting an empty `GH_TOKEN`/`GITLAB_TOKEN` (or a `password=` line)
728 // would *override* the ambient login with nothing rather than defer to it.
729 // Treat it as `None` (ambient), keeping the "no usable credential ⇒
730 // ambient auth" contract consistent regardless of which adapter produced
731 // it (matching `EnvToken`'s own whitespace-only ⇒ unset rule).
732 Ok(provider
733 .credential(&request)
734 .await?
735 .filter(|cred| !cred.secret().expose().trim().is_empty()))
736 }
737
738 /// Materialize `call` into a [`Command`], injecting the forge token env if a
739 /// [`with_token_env`](ManagedClient::with_token_env) binding and a provider
740 /// are both configured. The single place the auto-injection happens, shared by
741 /// every retrying verb.
742 async fn prepare(&self, call: impl IntoCommand<R>) -> Result<Command> {
743 let cmd = call.into_command(&self.inner);
744 let Some((service, var)) = self.token_env else {
745 return Ok(cmd);
746 };
747 match self.resolve_credential(service, None).await? {
748 Some(cred) => Ok(cmd.env(var, cred.secret().expose())),
749 None => Ok(cmd),
750 }
751 }
752
753 /// Apply a default timeout to every command this client builds.
754 pub fn default_timeout(mut self, timeout: Duration) -> Self {
755 self.inner = self.inner.default_timeout(timeout);
756 self
757 }
758
759 /// Set an environment variable on every command this client builds.
760 pub fn default_env(mut self, key: impl AsRef<OsStr>, value: impl AsRef<OsStr>) -> Self {
761 self.inner = self.inner.default_env(key, value);
762 self
763 }
764
765 /// Remove an inherited environment variable on every command this client builds.
766 pub fn default_env_remove(mut self, key: impl AsRef<OsStr>) -> Self {
767 self.inner = self.inner.default_env_remove(key);
768 self
769 }
770
771 /// Cancel every command this client builds when `token` fires.
772 pub fn default_cancel_on(mut self, token: processkit::CancellationToken) -> Self {
773 self.inner = self.inner.default_cancel_on(token);
774 self
775 }
776
777 /// Build a [`Command`] for this client's program (passthrough).
778 pub fn command<I, S>(&self, args: I) -> Command
779 where
780 I: IntoIterator<Item = S>,
781 S: AsRef<OsStr>,
782 {
783 self.inner.command(args)
784 }
785
786 /// Build a [`Command`] bound to `dir` (passthrough).
787 pub fn command_in<I, S>(&self, dir: &Path, args: I) -> Command
788 where
789 I: IntoIterator<Item = S>,
790 S: AsRef<OsStr>,
791 {
792 self.inner.command_in(dir, args)
793 }
794
795 /// The underlying process runner (passthrough — e.g. for `output_all`).
796 pub fn runner(&self) -> &R {
797 self.inner.runner()
798 }
799
800 /// Like [`CliClient::run`], with credential injection and lock-retry.
801 pub async fn run(&self, call: impl IntoCommand<R>) -> Result<String> {
802 let cmd = self.prepare(call).await?;
803 retry_async(&self.retry, is_lock_contention, || {
804 self.inner.run(cmd.clone())
805 })
806 .await
807 }
808
809 /// Like [`CliClient::run_unit`], with credential injection and lock-retry.
810 pub async fn run_unit(&self, call: impl IntoCommand<R>) -> Result<()> {
811 let cmd = self.prepare(call).await?;
812 retry_async(&self.retry, is_lock_contention, || {
813 self.inner.run_unit(cmd.clone())
814 })
815 .await
816 }
817
818 /// Like [`CliClient::output_string`], with credential injection. **No lock-retry:**
819 /// `output_string` returns `Ok` on a non-zero exit (it captures the result), so a
820 /// lock failure surfaces as an `Ok` here, not an `Err` the retry predicate could
821 /// match — route mutations that need lock-retry through
822 /// [`run`](Self::run)/[`run_unit`](Self::run_unit) instead.
823 pub async fn output_string(&self, call: impl IntoCommand<R>) -> Result<ProcessResult<String>> {
824 let cmd = self.prepare(call).await?;
825 self.inner.output_string(cmd).await
826 }
827
828 /// Like [`run`](Self::run), but returns stdout **verbatim** — no `trim_end`.
829 /// For **content**-returning verbs (a file's bytes at a rev, a diff, a raw
830 /// template render) where the trailing newline(s) are part of the value, not
831 /// noise: trimming them corrupts a read-modify-write round-trip and desyncs a
832 /// diff's last hunk from its `@@` line count. Exit-checked like `run`; no
833 /// lock-retry (a content read is not a mutation).
834 pub async fn run_untrimmed(&self, call: impl IntoCommand<R>) -> Result<String> {
835 Ok(self
836 .output_string(call)
837 .await?
838 .ensure_success()?
839 .into_stdout())
840 }
841
842 /// Like [`CliClient::probe`] (zero-or-nonzero exit → `bool`), with credential
843 /// injection and lock-retry.
844 pub async fn probe(&self, call: impl IntoCommand<R>) -> Result<bool> {
845 let cmd = self.prepare(call).await?;
846 retry_async(&self.retry, is_lock_contention, || {
847 self.inner.probe(cmd.clone())
848 })
849 .await
850 }
851
852 /// Like [`CliClient::exit_code`] (the raw exit code; a spawn failure or timeout
853 /// still errors), with credential injection and lock-retry.
854 pub async fn exit_code(&self, call: impl IntoCommand<R>) -> Result<i32> {
855 let cmd = self.prepare(call).await?;
856 retry_async(&self.retry, is_lock_contention, || {
857 self.inner.exit_code(cmd.clone())
858 })
859 .await
860 }
861
862 /// Like [`CliClient::parse`] (credential injection applied; the `FnOnce` parser
863 /// can't be re-run, so lock-retry does not — parsing is a read, where lock
864 /// contention is not a concern anyway).
865 pub async fn parse<T>(
866 &self,
867 call: impl IntoCommand<R>,
868 parser: impl FnOnce(&str) -> T + Send,
869 ) -> Result<T>
870 where
871 T: Send,
872 {
873 let cmd = self.prepare(call).await?;
874 self.inner.parse(cmd, parser).await
875 }
876
877 /// Like [`CliClient::try_parse`] (credential injection applied; `FnOnce` parser,
878 /// and a read, so no lock-retry).
879 pub async fn try_parse<T>(
880 &self,
881 call: impl IntoCommand<R>,
882 parser: impl FnOnce(&str) -> Result<T> + Send,
883 ) -> Result<T>
884 where
885 T: Send,
886 {
887 let cmd = self.prepare(call).await?;
888 self.inner.try_parse(cmd, parser).await
889 }
890}
891
892#[cfg(test)]
893mod tests {
894 use super::*;
895
896 #[test]
897 fn rejects_empty_and_leading_dash() {
898 assert!(reject_flag_like("git", "branch name", "-evil").is_err());
899 assert!(reject_flag_like("git", "branch name", "").is_err());
900 // Whitespace-only is as meaning-changing as empty — refuse it too.
901 assert!(reject_flag_like("git", "branch name", " ").is_err());
902 assert!(reject_flag_like("git", "branch name", "\t").is_err());
903 assert!(reject_flag_like("git", "branch name", "feature").is_ok());
904 // Leading whitespace before a dash is still refused (the flag-check trims).
905 assert!(reject_flag_like("git", "remote", " --upload-pack=evil").is_err());
906 assert!(reject_flag_like("git", "remote", "\t-x").is_err());
907 // An interior NUL is refused (can't go in argv; opaque OS error otherwise).
908 assert!(reject_flag_like("git", "path", "a\0b").is_err());
909 // A leading-whitespace non-flag value is still accepted (not flag-like).
910 assert!(reject_flag_like("git", "branch name", " feature").is_ok());
911 // The error names the program and surfaces as a spawn-side refusal.
912 let err = reject_flag_like("jj", "revset", "--remote").unwrap_err();
913 assert!(matches!(err, Error::Spawn { program, .. } if program == "jj"));
914 }
915
916 #[test]
917 fn classifies_merge_conflict() {
918 let on_stdout = Error::Exit {
919 program: "git".into(),
920 code: 1,
921 stdout: "CONFLICT (content): Merge conflict in a.rs".into(),
922 stderr: String::new(),
923 };
924 let on_stderr = Error::Exit {
925 program: "git".into(),
926 code: 1,
927 stdout: String::new(),
928 stderr: "Automatic merge failed; fix conflicts and then commit".into(),
929 };
930 let unrelated = Error::Exit {
931 program: "git".into(),
932 code: 128,
933 stdout: String::new(),
934 stderr: "fatal: not a git repository".into(),
935 };
936 assert!(is_merge_conflict(&on_stdout));
937 assert!(is_merge_conflict(&on_stderr));
938 assert!(!is_merge_conflict(&unrelated));
939 assert!(!is_nothing_to_commit(&on_stdout));
940 }
941
942 #[test]
943 fn classifies_nothing_to_commit_and_transient_fetch() {
944 let nothing = Error::Exit {
945 program: "git".into(),
946 code: 1,
947 stdout: "nothing to commit, working tree clean".into(),
948 stderr: String::new(),
949 };
950 assert!(is_nothing_to_commit(¬hing));
951
952 let dns = Error::Exit {
953 program: "git".into(),
954 code: 128,
955 stdout: String::new(),
956 stderr: "fatal: unable to access 'https://x/': Could not resolve host: x".into(),
957 };
958 assert!(is_transient_fetch_error(&dns));
959 assert!(!is_transient_fetch_error(¬hing));
960
961 // A processkit timeout is deliberately NOT retried (R6): it already consumed
962 // the caller's full deadline, so retrying would multiply the wall-clock by
963 // FETCH_ATTEMPTS. The deadline is the patience budget; raise it, don't triple it.
964 let timeout = Error::Timeout {
965 program: "git".into(),
966 timeout: Duration::from_secs(10),
967 stdout: String::new(),
968 stderr: String::new(),
969 };
970 assert!(!is_transient_fetch_error(&timeout));
971 }
972
973 // R9: an io-level transient from the spawn (EINTR / EAGAIN / busy) is fetch-
974 // retryable too, via processkit's `Error::is_transient()`.
975 #[test]
976 fn classifies_io_transient_as_fetch_retryable() {
977 let interrupted = Error::Spawn {
978 program: "git".into(),
979 source: std::io::Error::from(std::io::ErrorKind::Interrupted),
980 };
981 assert!(
982 interrupted.is_transient(),
983 "processkit treats Interrupted as a transient io error"
984 );
985 assert!(is_transient_fetch_error(&interrupted));
986 // A non-transient io error (e.g. NotFound — the binary is missing) is not retried.
987 let missing = Error::Spawn {
988 program: "git".into(),
989 source: std::io::Error::from(std::io::ErrorKind::NotFound),
990 };
991 assert!(!is_transient_fetch_error(&missing));
992 }
993
994 // R2: regression for the processkit 0.9.1 untruncated-`Error::Exit` fix. A large
995 // output (well past the old 4 KiB cap) with the decisive marker near the END must
996 // still classify — proving the classifiers see the whole captured stream.
997 #[test]
998 fn classifies_on_large_output_past_the_old_4kib_cap() {
999 let padding = "noise line that says nothing\n".repeat(500); // ~14 KiB
1000 let conflict = Error::Exit {
1001 program: "git".into(),
1002 code: 1,
1003 stdout: format!("{padding}CONFLICT (content): Merge conflict in late.rs"),
1004 stderr: String::new(),
1005 };
1006 assert!(
1007 is_merge_conflict(&conflict),
1008 "a conflict marker past 4 KiB must still classify"
1009 );
1010
1011 let transient = Error::Exit {
1012 program: "git".into(),
1013 code: 128,
1014 stdout: String::new(),
1015 stderr: format!("{padding}fatal: unable to access: Could not resolve host: x"),
1016 };
1017 assert!(is_transient_fetch_error(&transient));
1018 }
1019
1020 // processkit's `Error` is `#[non_exhaustive]` and grows variants over time
1021 // (`NotReady`/`Unsupported`/`CassetteMiss`/`NotFound`/`Signalled`/`Cancelled`/
1022 // `ResourceLimit`). Unfamiliar variants must fall through every classifier to
1023 // "no" — a not-ready or unsupported run is neither a conflict, nor a clean
1024 // tree, nor worth a fetch retry.
1025 #[test]
1026 fn unfamiliar_error_variants_are_not_classified() {
1027 let not_ready = Error::NotReady {
1028 program: "git".into(),
1029 timeout: Duration::from_secs(5),
1030 };
1031 let unsupported = Error::Unsupported {
1032 operation: "suspend".into(),
1033 };
1034 for err in [¬_ready, &unsupported] {
1035 assert!(!is_merge_conflict(err));
1036 assert!(!is_nothing_to_commit(err));
1037 assert!(!is_transient_fetch_error(err));
1038 }
1039 }
1040
1041 // `Error::Cancelled` (a client-level `default_cancel_on` killing an in-flight
1042 // run; always available since cancellation became core in processkit 0.10) must
1043 // fall through every classifier to "no" — a cancelled fetch was *deliberately*
1044 // stopped, so replaying it would fight the cancellation. (Behaviour already held
1045 // via the `#[non_exhaustive]` fall-through above; this pins it as a first-class
1046 // assertion.)
1047 #[test]
1048 fn cancelled_is_not_transient_or_otherwise_classified() {
1049 let cancelled = Error::Cancelled {
1050 program: "git".into(),
1051 };
1052 assert!(!is_transient_fetch_error(&cancelled));
1053 assert!(!is_merge_conflict(&cancelled));
1054 assert!(!is_nothing_to_commit(&cancelled));
1055 }
1056
1057 // `Error::Signalled` (a process killed by a signal — e.g. an external SIGTERM/
1058 // SIGKILL, surfaced first-class since processkit 0.9.2 and carrying partial
1059 // `stdout`/`stderr` since 0.10) is *terminal*, not transient: a deliberate kill
1060 // should not be auto-retried, and a signal death is neither a merge conflict nor
1061 // a clean tree. processkit's own `is_transient()` agrees (false for `Signalled`),
1062 // so it falls through every classifier to "no" — pinned here, including the case
1063 // where the captured stderr happens to contain an otherwise-transient marker (a
1064 // killed fetch is still not ours to silently replay).
1065 #[test]
1066 fn signalled_is_terminal_not_transient() {
1067 let signalled = Error::Signalled {
1068 program: "git".into(),
1069 signal: Some(15),
1070 stdout: String::new(),
1071 stderr: "fatal: unable to access: Could not resolve host: x".into(),
1072 };
1073 assert!(!signalled.is_transient());
1074 assert!(!is_transient_fetch_error(&signalled));
1075 assert!(!is_merge_conflict(&signalled));
1076 assert!(!is_nothing_to_commit(&signalled));
1077 }
1078
1079 fn exit(program: &str, code: i32, stderr: &str) -> Error {
1080 Error::Exit {
1081 program: program.into(),
1082 code,
1083 stdout: String::new(),
1084 stderr: stderr.into(),
1085 }
1086 }
1087
1088 // `is_lock_contention` recognises ONLY the *whole-repo* / working-copy lock
1089 // failures (git index.lock, jj working-copy/op-heads lock) — the ones where the
1090 // command did nothing, so a retry is idempotent even on a mutation. Per-ref lock
1091 // failures and conflicts/timeouts are deliberately NOT classified (a multi-ref
1092 // op can fail a ref lock mid-way, where a retry would not be idempotent).
1093 #[test]
1094 fn classifies_lock_contention() {
1095 let lock_failures = [
1096 // git always names `index.lock` (locale-stable) in the lock-contention
1097 // message, even on a non-English runner where the surrounding prose is
1098 // translated.
1099 exit(
1100 "git",
1101 128,
1102 "fatal: Unable to create '/r/.git/index.lock': File exists.",
1103 ),
1104 // A German runner: the path fragment `index.lock` still matches.
1105 exit(
1106 "git",
1107 128,
1108 "fatal: Konnte '/r/.git/index.lock' nicht erstellen: Datei existiert bereits",
1109 ),
1110 // jj's *actual* wordings (verified against jj source) — note no "the".
1111 exit("jj", 1, "Error: Failed to lock working copy"),
1112 exit("jj", 1, "Error: Failed to lock operation heads store"),
1113 ];
1114 for e in &lock_failures {
1115 assert!(is_lock_contention(e), "should be lock contention: {e:?}");
1116 // A lock failure is NOT a transient *fetch* error — different class.
1117 assert!(!is_transient_fetch_error(e), "not a fetch error: {e:?}");
1118 }
1119 let not_locks = [
1120 exit("git", 1, "CONFLICT (content): Merge conflict in a.rs"),
1121 exit("git", 1, "error: pathspec 'x' did not match any file(s)"),
1122 exit("git", 128, "fatal: not a git repository"),
1123 // Per-ref locks are NOT classified — a multi-ref push/fetch can fail a
1124 // ref lock after earlier refs already moved (non-idempotent to replay).
1125 exit(
1126 "git",
1127 1,
1128 "error: cannot lock ref 'refs/heads/x': reference already exists",
1129 ),
1130 exit(
1131 "git",
1132 128,
1133 "Unable to create '/r/.git/packed-refs.lock': File exists.",
1134 ),
1135 // A per-ref lock for a branch literally named `index`: its
1136 // `…/refs/heads/index.lock` path contains the substring `index.lock`,
1137 // but the `refs/` mention correctly rules it out (not a whole-repo lock).
1138 exit(
1139 "git",
1140 128,
1141 "error: cannot lock ref 'refs/heads/index': Unable to create \
1142 '/r/.git/refs/heads/index.lock': File exists.",
1143 ),
1144 Error::Timeout {
1145 program: "git".into(),
1146 timeout: Duration::from_secs(1),
1147 stdout: String::new(),
1148 stderr: String::new(),
1149 },
1150 ];
1151 for e in ¬_locks {
1152 assert!(
1153 !is_lock_contention(e),
1154 "should NOT be lock contention: {e:?}"
1155 );
1156 }
1157 }
1158
1159 #[test]
1160 fn classifies_invalid_input_from_the_guards() {
1161 // What `reject_flag_like` / the newtypes actually produce.
1162 let rejected = reject_flag_like("git", "reference", "-x").unwrap_err();
1163 assert!(
1164 is_invalid_input(&rejected),
1165 "guard rejection is invalid input"
1166 );
1167 assert!(is_invalid_input(
1168 &reject_flag_like("git", "x", "").unwrap_err()
1169 ));
1170
1171 // A real spawn failure (missing binary), a non-zero exit, and a timeout are
1172 // NOT invalid input — they're environment/usage failures, not a bad argument.
1173 let not_input = [
1174 Error::Spawn {
1175 program: "git".into(),
1176 source: std::io::Error::from(std::io::ErrorKind::NotFound),
1177 },
1178 exit("git", 1, "fatal: not a git repository"),
1179 Error::Timeout {
1180 program: "git".into(),
1181 timeout: Duration::from_secs(1),
1182 stdout: String::new(),
1183 stderr: String::new(),
1184 },
1185 ];
1186 for e in ¬_input {
1187 assert!(!is_invalid_input(e), "should NOT be invalid input: {e:?}");
1188 }
1189 }
1190
1191 // Backoff is exponential off the base, capped at `max_backoff`, and zero when
1192 // there's no base (immediate retry).
1193 #[test]
1194 fn backoff_is_exponential_capped_and_zero_without_base() {
1195 let p = RetryPolicy::none()
1196 .attempts(6)
1197 .base_backoff(Duration::from_millis(10))
1198 .max_backoff(Duration::from_millis(80));
1199 assert_eq!(backoff_for(&p, 0), Duration::from_millis(10));
1200 assert_eq!(backoff_for(&p, 1), Duration::from_millis(20));
1201 assert_eq!(backoff_for(&p, 2), Duration::from_millis(40));
1202 assert_eq!(backoff_for(&p, 3), Duration::from_millis(80));
1203 assert_eq!(
1204 backoff_for(&p, 4),
1205 Duration::from_millis(80),
1206 "capped at max"
1207 );
1208 assert_eq!(
1209 backoff_for(&RetryPolicy::none(), 3),
1210 Duration::ZERO,
1211 "no base → no wait"
1212 );
1213 }
1214
1215 // Full jitter (used by `RetryPolicy::lock_contention`): every sampled backoff
1216 // stays within `[0, exponential cap]`, and successive samples de-correlate
1217 // (more than one distinct value) so retries don't thunder together. Pins the
1218 // jitter path, which the exponential test above deliberately turns off.
1219 #[test]
1220 fn jitter_stays_within_cap_and_decorrelates() {
1221 let p = RetryPolicy::none()
1222 .attempts(8)
1223 .base_backoff(Duration::from_millis(10))
1224 .max_backoff(Duration::from_millis(80))
1225 .with_jitter(true);
1226 // The cap at retry_index 3 is the full 80ms exponential value.
1227 let cap = Duration::from_millis(80);
1228 let mut seen = std::collections::HashSet::new();
1229 for _ in 0..1000 {
1230 let d = backoff_for(&p, 3);
1231 assert!(
1232 d <= cap,
1233 "jittered backoff {d:?} must stay within the cap {cap:?}"
1234 );
1235 seen.insert(d.as_nanos());
1236 }
1237 assert!(
1238 seen.len() > 1,
1239 "full jitter must produce a spread of delays, not a constant"
1240 );
1241 // A zero base still short-circuits to zero even with jitter on.
1242 assert_eq!(
1243 backoff_for(&RetryPolicy::none().with_jitter(true), 2),
1244 Duration::ZERO
1245 );
1246 }
1247
1248 // The executor: retries while the predicate matches and attempts remain, returns
1249 // the first Ok, doesn't retry a non-matching error, and exhausts to the last Err.
1250 #[tokio::test]
1251 async fn retry_async_retries_then_succeeds_and_respects_the_predicate() {
1252 use std::sync::atomic::{AtomicU32, Ordering};
1253 // Zero backoff → no sleep, deterministic & fast.
1254 let policy = RetryPolicy::none().attempts(4);
1255 let lock = || {
1256 exit(
1257 "git",
1258 128,
1259 "Unable to create '/r/.git/index.lock': File exists.",
1260 )
1261 };
1262
1263 // Fails twice with a lock error, then succeeds — retried to success.
1264 let calls = AtomicU32::new(0);
1265 let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
1266 let n = calls.fetch_add(1, Ordering::SeqCst);
1267 let lock = lock();
1268 async move { if n < 2 { Err(lock) } else { Ok(n) } }
1269 })
1270 .await;
1271 assert_eq!(out.unwrap(), 2);
1272 assert_eq!(calls.load(Ordering::SeqCst), 3, "1 try + 2 retries");
1273
1274 // A non-lock error is returned immediately (not retried).
1275 let calls = AtomicU32::new(0);
1276 let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
1277 calls.fetch_add(1, Ordering::SeqCst);
1278 async { Err(exit("git", 1, "real, deterministic failure")) }
1279 })
1280 .await;
1281 assert!(out.is_err());
1282 assert_eq!(
1283 calls.load(Ordering::SeqCst),
1284 1,
1285 "non-retryable → single attempt"
1286 );
1287
1288 // Persistent lock contention exhausts the attempt budget.
1289 let calls = AtomicU32::new(0);
1290 let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
1291 calls.fetch_add(1, Ordering::SeqCst);
1292 async { Err(exit("git", 128, "index.lock': File exists")) }
1293 })
1294 .await;
1295 assert!(out.is_err());
1296 assert_eq!(calls.load(Ordering::SeqCst), 4, "all attempts used");
1297 }
1298
1299 // `resolve_credential` returns `None` until a provider is attached, then the
1300 // provider's credential. (No process is spawned, so the real runner is fine.)
1301 #[tokio::test]
1302 async fn retrying_client_resolves_credential_opt_in() {
1303 let client = ManagedClient::new("git");
1304 assert!(!client.has_credentials());
1305 assert!(
1306 client
1307 .resolve_credential(CredentialService::Git, None)
1308 .await
1309 .unwrap()
1310 .is_none(),
1311 "no provider → ambient (None)"
1312 );
1313
1314 let client = client.with_credentials(Arc::new(StaticCredential::token("t0k")));
1315 assert!(client.has_credentials());
1316 let got = client
1317 .resolve_credential(CredentialService::Git, None)
1318 .await
1319 .unwrap()
1320 .expect("provider yields a credential");
1321 assert_eq!(got.secret().expose(), "t0k");
1322 }
1323
1324 // An empty (or whitespace-only) secret is treated as `None` (ambient):
1325 // injecting an empty token would override the ambient login with nothing
1326 // instead of deferring to it. Mirrors `EnvToken`'s whitespace-only ⇒ unset rule.
1327 #[tokio::test]
1328 async fn resolve_credential_treats_empty_secret_as_ambient() {
1329 // Service-agnostic: both the forge (token-env) and git (helper) paths route
1330 // through this chokepoint, so a blank secret is ambient for either.
1331 for blank in ["", " ", "\t\n"] {
1332 let client = ManagedClient::new("git")
1333 .with_credentials(Arc::new(StaticCredential::token(blank)));
1334 for service in [CredentialService::GitHub, CredentialService::Git] {
1335 assert!(
1336 client
1337 .resolve_credential(service, None)
1338 .await
1339 .unwrap()
1340 .is_none(),
1341 "blank secret {blank:?} → ambient (None) for {service:?}"
1342 );
1343 }
1344 }
1345 }
1346}