Skip to main content

vcs_cli_support/
lib.rs

1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![deny(rustdoc::broken_intra_doc_links)]
3//! `vcs-cli-support` — the [`processkit`]-coupled plumbing the CLI wrappers reuse.
4//!
5//! `vcs-git` / `vcs-jj` / `vcs-github` all drive a CLI through [`processkit`], so
6//! they share three concerns that *touch* [`processkit::Error`]: an argv injection
7//! guard, a fetch-retry policy, and a set of [`Error`] classifiers. Extracting them
8//! here keeps the std-only `vcs-diff` clean of the `processkit` dependency, and —
9//! more to the point — keeps the marker lists and classifier logic from drifting
10//! between backends. The wrapper crates re-export these items (so you reach them
11//! as `vcs_git::is_merge_conflict`, not via this crate's name) and rarely name
12//! `vcs-cli-support` directly.
13//!
14//! # The surface
15//!
16//! - **[`reject_flag_like`]** — the injection guard for bare positional argv slots.
17//!   A caller value that is empty/whitespace, or starts with `-`, is refused before
18//!   spawning (the CLI would parse it as a flag); flag-*value* slots (`-m <msg>`)
19//!   are consumed verbatim and skip the check. Wrappers call it with their own
20//!   binary name so the surfaced [`Error::Spawn`] names the right `program`.
21//! - **[`FETCH_ATTEMPTS`] / [`FETCH_BACKOFF`]** — the shared transient-retry policy
22//!   for `fetch` (one try plus two retries, fixed backoff between them).
23//! - **[`is_merge_conflict`] / [`is_nothing_to_commit`] / [`is_transient_fetch_error`]
24//!   / [`is_lock_contention`]** — classify a returned [`Error`] so callers branch on
25//!   *intent* ("conflict, resolve it"; "nothing to commit, no-op"; "transient,
26//!   retry"; "another process holds the lock, retry") instead of matching on error
27//!   internals. They inspect captured [`Error::Exit`] output against fixed marker
28//!   lists (and treat a [`processkit`] [`Error::Timeout`] as transient); any
29//!   unfamiliar `#[non_exhaustive]` variant falls through to "no".
30//! - **[`RetryPolicy`] / [`retry_async`] / [`ManagedClient`]** — an opt-in retry
31//!   strategy (attempts + exponential, jittered backoff) for **lock-contention**
32//!   failures. `ManagedClient` wraps a [`processkit`] `CliClient` and applies the
33//!   policy to every command, so the `vcs-git`/`vcs-jj` clients gain retry via
34//!   `with_retry(...)` without changing a call site. Lock-acquisition failures are
35//!   pre-execution, so retrying is safe even for mutating commands.
36//! - **[`CredentialProvider`] / [`Credential`] / [`Secret`]** — an opt-in seam for
37//!   supplying a secret *per operation* (a CI token, a vault lookup) instead of
38//!   relying on ambient CLI auth. `ManagedClient` injects the resolved token into
39//!   each command (the forge `GH_TOKEN`/`GITLAB_TOKEN` env); git uses
40//!   [`git_credential_helper`] to keep the secret out of `argv`. Default is no
41//!   provider → ambient auth, unchanged. See the [`credentials`](mod@credentials)
42//!   module for the full picture.
43//!
44//! # Recipes
45//!
46//! Classify a failed `fetch` to drive a retry decision — branch on intent, not on
47//! the error's internals:
48//!
49//! ```no_run
50//! use vcs_cli_support::{is_transient_fetch_error, FETCH_ATTEMPTS, FETCH_BACKOFF};
51//! # fn run() -> Result<(), processkit::Error> { todo!() }
52//! # fn demo() -> Result<(), processkit::Error> {
53//! for attempt in 1..=FETCH_ATTEMPTS {
54//!     match run() {
55//!         Ok(()) => break,
56//!         Err(e) if is_transient_fetch_error(&e) && attempt < FETCH_ATTEMPTS => {
57//!             std::thread::sleep(FETCH_BACKOFF); // DNS/timeout — worth a retry
58//!         }
59//!         Err(e) => return Err(e),               // anything else: give up
60//!     }
61//! }
62//! # Ok(()) }
63//! ```
64
65use std::ffi::OsStr;
66use std::fmt;
67use std::future::Future;
68use std::path::Path;
69use std::sync::Arc;
70use std::time::Duration;
71
72use processkit::{
73    CliClient, Command, Error, IntoCommand, JobRunner, ProcessResult, ProcessRunner, Result,
74};
75
76pub mod credentials;
77pub use credentials::{
78    Credential, CredentialProvider, CredentialRequest, CredentialService, EnvToken, FnProvider,
79    GitCredentialHelper, Secret, StaticCredential, git_credential_helper, provider_fn,
80};
81
82/// Injection guard for bare positional argv slots: a caller-supplied value with a
83/// leading `-` would be parsed by the CLI as a *flag* (verified: `git checkout
84/// -evil` → "unknown switch"; jj likewise), and an empty (or whitespace-only)
85/// value silently changes most commands' meaning. Refuse both before anything
86/// spawns, surfacing an [`Error::Spawn`] naming `program`. An interior NUL is
87/// refused too (it can't be passed in argv and otherwise surfaces as an opaque
88/// OS spawn error). Flag-VALUE positions (`-m <msg>`, `--branch <b>`) don't need
89/// this — the CLI consumes the next token verbatim there.
90///
91/// The leading-`-` test is applied to the **trimmed** value, so a value like
92/// `" --upload-pack=…"` (leading whitespace) is still refused — the empty-check
93/// and the flag-check now agree on what "the value" is.
94pub fn reject_flag_like(program: &str, what: &str, value: &str) -> Result<()> {
95    let trimmed = value.trim();
96    if trimmed.is_empty() || trimmed.starts_with('-') || value.contains('\0') {
97        return Err(Error::Spawn {
98            program: program.to_string(),
99            source: std::io::Error::new(
100                std::io::ErrorKind::InvalidInput,
101                format!(
102                    "{what} {value:?} would be parsed as a flag (or is empty / contains NUL) — \
103                     refusing to pass it as a positional argument"
104                ),
105            ),
106        });
107    }
108    Ok(())
109}
110
111/// Total attempts for a transient-retried `fetch` (1 try + 2 retries).
112pub const FETCH_ATTEMPTS: u32 = 3;
113/// Fixed backoff between fetch retries.
114pub const FETCH_BACKOFF: Duration = Duration::from_millis(500);
115/// Grace period for a timed-out fetch: on the deadline processkit signals the
116/// process tree (terminate), waits this long for it to exit cleanly — flush, close
117/// the connection, drop any lock — then hard-kills. Only takes effect when a
118/// per-client timeout is set (`Git::default_timeout` / `Jj::default_timeout`); a
119/// fetch with no deadline is unaffected.
120pub const FETCH_TIMEOUT_GRACE: Duration = Duration::from_secs(2);
121
122/// Lower-case substrings marking a merge that stopped on conflicts.
123const CONFLICT_MARKERS: &[&str] = &["conflict (", "automatic merge failed"];
124/// Lower-case substrings marking a commit that found nothing to record.
125const NOTHING_TO_COMMIT_MARKERS: &[&str] = &["nothing to commit", "nothing added to commit"];
126/// Lower-case substrings marking a transient (retryable) network/fetch failure.
127/// The timeout markers are kept *specific* (`connection timed out` /
128/// `operation timed out`) rather than a bare `timed out`, which would also match
129/// unrelated, non-network "timed out" messages (a lock wait, a hook) and trigger a
130/// spurious fetch retry.
131const TRANSIENT_FETCH_MARKERS: &[&str] = &[
132    "could not resolve host",
133    "couldn't resolve host",
134    "temporary failure in name resolution",
135    "connection timed out",
136    "connection refused",
137    "operation timed out",
138    "network is unreachable",
139    "failed to connect",
140    "could not read from remote repository",
141    "the remote end hung up",
142    "early eof",
143    "rpc failed",
144];
145
146/// Whether `err` is an [`Error::Exit`] whose captured output contains any marker.
147fn exit_output_matches(err: &Error, markers: &[&str]) -> bool {
148    let Error::Exit { stdout, stderr, .. } = err else {
149        return false;
150    };
151    let out = stdout.to_ascii_lowercase();
152    let errt = stderr.to_ascii_lowercase();
153    markers.iter().any(|m| out.contains(m) || errt.contains(m))
154}
155
156/// Whether a failed `merge`/`merge_commit` stopped on a merge conflict. (jj
157/// surfaces conflicts as state rather than as errors, so this only fires on git
158/// output — see `vcs_core::Error::is_merge_conflict`.)
159pub fn is_merge_conflict(err: &Error) -> bool {
160    exit_output_matches(err, CONFLICT_MARKERS)
161}
162
163/// Whether a failed `commit`/`commit_paths` reported nothing to commit (a clean
164/// tree), as opposed to a real error.
165pub fn is_nothing_to_commit(err: &Error) -> bool {
166    exit_output_matches(err, NOTHING_TO_COMMIT_MARKERS)
167}
168
169/// Whether a failed `fetch`/`fetch_remote_branch`/`remote_branch_exists` looks
170/// transient (DNS, timeout, dropped connection) and is worth retrying.
171pub fn is_transient_fetch_error(err: &Error) -> bool {
172    // A processkit-level timeout (a `.timeout()`-bounded run that expired) is
173    // inherently transient; treat it as retryable too, regardless of any partial
174    // output it captured before the deadline (as of processkit 0.10 a `Timeout`
175    // carries the partial `stdout`/`stderr`, but the retry decision doesn't depend
176    // on it). So is an io-level transient from the spawn itself (interrupted /
177    // would-block / busy), which processkit classifies via `Error::is_transient()`
178    // (it covers `Spawn`/`Io`, not `Exit`, so it composes cleanly with the marker
179    // scan below).
180    matches!(err, Error::Timeout { .. })
181        || err.is_transient()
182        || exit_output_matches(err, TRANSIENT_FETCH_MARKERS)
183}
184
185/// Lower-case substrings marking a **whole-repository / working-copy lock**
186/// contention failure — another process held the *one* repo-wide lock, so the
187/// command **never started** (clean, pre-execution) and touched nothing.
188///
189/// These are deliberately limited to the locks that guard the *entire* operation
190/// up front, so retrying is safe even on a **mutating** command: the repo was not
191/// modified at all. We intentionally do **not** include per-ref lock messages
192/// (`cannot lock ref`, `<ref>.lock`/`packed-refs.lock: File exists`): a multi-ref
193/// `push`/`fetch` updates refs sequentially, so a ref-lock failure can arrive
194/// *after* earlier refs already moved — replaying that is not idempotent. Network
195/// markers
196/// ([`TRANSIENT_FETCH_MARKERS`]) and conflict/exit failures are likewise absent.
197const LOCK_CONTENTION_MARKERS: &[&str] = &[
198    "index.lock': file exists", // git: the whole-repo index lock (pre-write)
199    "another git process seems to be running", // git's index-lock hint
200    "failed to lock the working copy", // jj: the working-copy lock (pre-snapshot)
201    "failed to lock op heads",  // jj: the operation-log lock (pre-commit of the op)
202];
203
204/// Whether `err` is a **whole-repository lock-contention** failure — another
205/// process held git's `index.lock` or jj's working-copy / op-heads lock, so the
206/// command couldn't even start. Such a failure is *pre-execution* and therefore
207/// safe to retry even on a **mutating** operation (the repo was never modified).
208/// Per-ref lock failures (`cannot lock ref`, `<ref>.lock`) are deliberately **not**
209/// classified here — they can occur mid-way through a multi-ref `push`/`fetch`,
210/// where a retry would not be idempotent. Conflict, "nothing to commit", a real
211/// non-zero exit, a timeout, a signal, or a missing binary are also **not** lock
212/// contention and must not be retried this way.
213pub fn is_lock_contention(err: &Error) -> bool {
214    exit_output_matches(err, LOCK_CONTENTION_MARKERS)
215}
216
217/// A bounded retry strategy: how many attempts, the (exponential) backoff between
218/// them, and whether to add full jitter. Used by [`ManagedClient`] to retry
219/// [`is_lock_contention`] failures. The [`Default`] is [`none`](RetryPolicy::none)
220/// (no retry) — retry is **opt-in**.
221#[derive(Debug, Clone, Copy, PartialEq, Eq)]
222#[non_exhaustive]
223pub struct RetryPolicy {
224    /// Total attempts including the first; `1` means no retry.
225    pub attempts: u32,
226    /// Delay before the first retry; doubles each subsequent retry (capped by
227    /// [`max_backoff`](RetryPolicy::max_backoff)). `ZERO` means retry immediately.
228    pub base_backoff: Duration,
229    /// Upper bound on the (pre-jitter) backoff delay. `ZERO` means uncapped.
230    pub max_backoff: Duration,
231    /// Apply **full jitter** — the actual delay is uniform in `[0, computed]` — to
232    /// avoid a thundering herd when many workers retry against one repository.
233    pub jitter: bool,
234}
235
236impl RetryPolicy {
237    /// No retry: a single attempt. The default.
238    pub const fn none() -> Self {
239        Self {
240            attempts: 1,
241            base_backoff: Duration::ZERO,
242            max_backoff: Duration::ZERO,
243            jitter: false,
244        }
245    }
246
247    /// A sensible default for repository lock contention: a handful of attempts
248    /// with short, jittered, exponential backoff (25 ms → 500 ms).
249    pub const fn lock_contention() -> Self {
250        Self {
251            attempts: 5,
252            base_backoff: Duration::from_millis(25),
253            max_backoff: Duration::from_millis(500),
254            jitter: true,
255        }
256    }
257
258    /// Set the total number of attempts (clamped to at least 1).
259    pub fn attempts(mut self, attempts: u32) -> Self {
260        self.attempts = attempts.max(1);
261        self
262    }
263
264    /// Set the base backoff (the delay before the first retry).
265    pub fn base_backoff(mut self, backoff: Duration) -> Self {
266        self.base_backoff = backoff;
267        self
268    }
269
270    /// Cap the (pre-jitter) backoff delay; `ZERO` leaves it uncapped.
271    pub fn max_backoff(mut self, max: Duration) -> Self {
272        self.max_backoff = max;
273        self
274    }
275
276    /// Toggle full jitter on the backoff delay.
277    pub fn with_jitter(mut self, jitter: bool) -> Self {
278        self.jitter = jitter;
279        self
280    }
281}
282
283impl Default for RetryPolicy {
284    /// No retry — retry is opt-in.
285    fn default() -> Self {
286        Self::none()
287    }
288}
289
290/// The (possibly jittered) backoff before the `retry_index`-th retry (0 = first).
291fn backoff_for(policy: &RetryPolicy, retry_index: u32) -> Duration {
292    if policy.base_backoff.is_zero() {
293        return Duration::ZERO;
294    }
295    let base = policy.base_backoff.as_nanos();
296    let scaled = base.saturating_mul(1u128 << retry_index.min(20));
297    let capped = if policy.max_backoff.is_zero() {
298        scaled
299    } else {
300        scaled.min(policy.max_backoff.as_nanos())
301    };
302    let delay = Duration::from_nanos(capped.min(u64::MAX as u128) as u64);
303    if policy.jitter {
304        full_jitter(delay)
305    } else {
306        delay
307    }
308}
309
310/// Full jitter: a uniform delay in `[0, max]`. Dependency-free randomness via the
311/// OS-seeded [`RandomState`](std::collections::hash_map::RandomState) — good enough
312/// to de-correlate retries, not cryptographic.
313fn full_jitter(max: Duration) -> Duration {
314    use std::hash::{BuildHasher, Hasher};
315    let nanos = max.as_nanos();
316    if nanos == 0 {
317        return Duration::ZERO;
318    }
319    let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
320    hasher.write_u64(nanos as u64);
321    let r = hasher.finish() as u128;
322    Duration::from_nanos((r % (nanos + 1)).min(u64::MAX as u128) as u64)
323}
324
325/// Run `op`, retrying its result while `should_retry` says so and `policy` has
326/// attempts left, sleeping the (jittered, exponential) backoff between tries. The
327/// op is re-invoked from scratch each attempt, so it must be idempotent for the
328/// errors `should_retry` selects (lock-contention failures are — the command never
329/// ran). Returns the first `Ok`, or the last `Err`.
330pub async fn retry_async<T, Fut>(
331    policy: &RetryPolicy,
332    should_retry: impl Fn(&Error) -> bool,
333    mut op: impl FnMut() -> Fut,
334) -> Result<T>
335where
336    Fut: Future<Output = Result<T>>,
337{
338    let attempts = policy.attempts.max(1);
339    for attempt in 1..=attempts {
340        match op().await {
341            Ok(value) => return Ok(value),
342            Err(err) => {
343                if attempt == attempts || !should_retry(&err) {
344                    return Err(err);
345                }
346                let delay = backoff_for(policy, attempt - 1);
347                if !delay.is_zero() {
348                    tokio::time::sleep(delay).await;
349                }
350            }
351        }
352    }
353    unreachable!("the loop returns on the final attempt")
354}
355
356/// A [`CliClient`] wrapper that adds two opt-in concerns the CLI wrappers
357/// (`vcs-git`, `vcs-jj`, `vcs-github`, `vcs-gitlab`) all share, without touching a
358/// single call site:
359///
360/// 1. **Lock-contention retry** ([`is_lock_contention`]) per a [`RetryPolicy`] —
361///    off by default ([`RetryPolicy::none`]); enable with
362///    [`with_retry`](ManagedClient::with_retry). Safe even for mutating commands,
363///    since lock contention is a clean pre-execution failure.
364/// 2. **Credential injection** from an opt-in [`CredentialProvider`] — off by
365///    default (no provider); attach one with
366///    [`with_credentials`](ManagedClient::with_credentials). When a forge
367///    *token-env* binding is configured
368///    ([`with_token_env`](ManagedClient::with_token_env)), every command run
369///    through this client gets the resolved token in that environment variable
370///    (e.g. `GH_TOKEN`). Backends that inject the secret differently (git's
371///    `credential.helper`) instead call
372///    [`resolve_credential`](ManagedClient::resolve_credential) at the command
373///    site. Resolution happens once per call, before the retry loop.
374///
375/// Both default to inert, so a client with neither configured behaves exactly
376/// like a bare `CliClient`.
377pub struct ManagedClient<R: ProcessRunner = JobRunner> {
378    inner: CliClient<R>,
379    retry: RetryPolicy,
380    credentials: Option<Arc<dyn CredentialProvider>>,
381    /// When set, the token is auto-injected into this env var on every command,
382    /// resolved for this service. Used by the forge clients (`GH_TOKEN`, …).
383    token_env: Option<(CredentialService, &'static str)>,
384}
385
386impl<R: ProcessRunner> fmt::Debug for ManagedClient<R> {
387    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
388        f.debug_struct("ManagedClient")
389            .field("inner", &self.inner)
390            .field("retry", &self.retry)
391            // Never render the provider itself (it may close over a secret); just
392            // whether one is configured, plus the token-env binding.
393            .field("credentials", &self.credentials.is_some())
394            .field("token_env", &self.token_env)
395            .finish()
396    }
397}
398
399impl ManagedClient<JobRunner> {
400    /// A retrying client driving `program` on the real job-backed runner (no retry
401    /// until [`with_retry`](ManagedClient::with_retry)).
402    pub fn new(program: impl AsRef<OsStr>) -> Self {
403        Self {
404            inner: CliClient::new(program),
405            retry: RetryPolicy::none(),
406            credentials: None,
407            token_env: None,
408        }
409    }
410}
411
412impl<R: ProcessRunner> ManagedClient<R> {
413    /// A retrying client driving `program` on `runner` — inject a fake in tests.
414    pub fn with_runner(program: impl AsRef<OsStr>, runner: R) -> Self {
415        Self {
416            inner: CliClient::with_runner(program, runner),
417            retry: RetryPolicy::none(),
418            credentials: None,
419            token_env: None,
420        }
421    }
422
423    /// Set the lock-contention retry policy (opt-in; default is no retry).
424    pub fn with_retry(mut self, policy: RetryPolicy) -> Self {
425        self.retry = policy;
426        self
427    }
428
429    /// The active retry policy.
430    pub fn retry_policy(&self) -> RetryPolicy {
431        self.retry
432    }
433
434    /// Attach a [`CredentialProvider`] (opt-in; default is none → ambient auth).
435    /// The provider is consulted per operation: automatically when a
436    /// [`with_token_env`](ManagedClient::with_token_env) binding is set, or
437    /// on demand via [`resolve_credential`](ManagedClient::resolve_credential).
438    ///
439    /// **Precedence:** a resolved token is injected *after* any
440    /// [`default_env`](ManagedClient::default_env), so the provider wins over a
441    /// static default and over the ambient CLI login. **Cancellation:** a
442    /// [`default_cancel_on`](ManagedClient::default_cancel_on) token bounds the
443    /// spawned *process*, not provider resolution — if your provider does slow I/O
444    /// (a vault lookup), bound it yourself.
445    #[must_use]
446    pub fn with_credentials(mut self, provider: Arc<dyn CredentialProvider>) -> Self {
447        self.credentials = Some(provider);
448        self
449    }
450
451    /// Bind the resolved token to an environment variable injected on **every**
452    /// command this client runs (the forge case: `GH_TOKEN`, `GITLAB_TOKEN`). The
453    /// `service` tags the [`CredentialRequest`]. No effect without a provider.
454    #[must_use]
455    pub fn with_token_env(mut self, service: CredentialService, var: &'static str) -> Self {
456        self.token_env = Some((service, var));
457        self
458    }
459
460    /// Whether a credential provider is configured.
461    #[must_use]
462    pub fn has_credentials(&self) -> bool {
463        self.credentials.is_some()
464    }
465
466    /// Resolve a credential for `service`/`host` from the configured provider, or
467    /// `Ok(None)` if no provider is set or it defers to ambient auth. Backends
468    /// that inject the secret at the command site (git's `credential.helper`) call
469    /// this directly; the forge token-env path uses it internally.
470    pub async fn resolve_credential(
471        &self,
472        service: CredentialService,
473        host: Option<&str>,
474    ) -> Result<Option<Credential>> {
475        let Some(provider) = &self.credentials else {
476            return Ok(None);
477        };
478        let request = CredentialRequest { service, host };
479        // An empty (or whitespace-only) secret is not a usable credential —
480        // injecting an empty `GH_TOKEN`/`GITLAB_TOKEN` (or a `password=` line)
481        // would *override* the ambient login with nothing rather than defer to it.
482        // Treat it as `None` (ambient), keeping the "no usable credential ⇒
483        // ambient auth" contract consistent regardless of which adapter produced
484        // it (matching `EnvToken`'s own whitespace-only ⇒ unset rule).
485        Ok(provider
486            .credential(&request)
487            .await?
488            .filter(|cred| !cred.secret().expose().trim().is_empty()))
489    }
490
491    /// Materialize `call` into a [`Command`], injecting the forge token env if a
492    /// [`with_token_env`](ManagedClient::with_token_env) binding and a provider
493    /// are both configured. The single place the auto-injection happens, shared by
494    /// every retrying verb.
495    async fn prepare(&self, call: impl IntoCommand<R>) -> Result<Command> {
496        let cmd = call.into_command(&self.inner);
497        let Some((service, var)) = self.token_env else {
498            return Ok(cmd);
499        };
500        match self.resolve_credential(service, None).await? {
501            Some(cred) => Ok(cmd.env(var, cred.secret().expose())),
502            None => Ok(cmd),
503        }
504    }
505
506    /// Apply a default timeout to every command this client builds.
507    pub fn default_timeout(mut self, timeout: Duration) -> Self {
508        self.inner = self.inner.default_timeout(timeout);
509        self
510    }
511
512    /// Set an environment variable on every command this client builds.
513    pub fn default_env(mut self, key: impl AsRef<OsStr>, value: impl AsRef<OsStr>) -> Self {
514        self.inner = self.inner.default_env(key, value);
515        self
516    }
517
518    /// Remove an inherited environment variable on every command this client builds.
519    pub fn default_env_remove(mut self, key: impl AsRef<OsStr>) -> Self {
520        self.inner = self.inner.default_env_remove(key);
521        self
522    }
523
524    /// Cancel every command this client builds when `token` fires.
525    pub fn default_cancel_on(mut self, token: processkit::CancellationToken) -> Self {
526        self.inner = self.inner.default_cancel_on(token);
527        self
528    }
529
530    /// Build a [`Command`] for this client's program (passthrough).
531    pub fn command<I, S>(&self, args: I) -> Command
532    where
533        I: IntoIterator<Item = S>,
534        S: AsRef<OsStr>,
535    {
536        self.inner.command(args)
537    }
538
539    /// Build a [`Command`] bound to `dir` (passthrough).
540    pub fn command_in<I, S>(&self, dir: &Path, args: I) -> Command
541    where
542        I: IntoIterator<Item = S>,
543        S: AsRef<OsStr>,
544    {
545        self.inner.command_in(dir, args)
546    }
547
548    /// The underlying process runner (passthrough — e.g. for `output_all`).
549    pub fn runner(&self) -> &R {
550        self.inner.runner()
551    }
552
553    /// Like [`CliClient::run`], with credential injection and lock-retry.
554    pub async fn run(&self, call: impl IntoCommand<R>) -> Result<String> {
555        let cmd = self.prepare(call).await?;
556        retry_async(&self.retry, is_lock_contention, || {
557            self.inner.run(cmd.clone())
558        })
559        .await
560    }
561
562    /// Like [`CliClient::run_unit`], with credential injection and lock-retry.
563    pub async fn run_unit(&self, call: impl IntoCommand<R>) -> Result<()> {
564        let cmd = self.prepare(call).await?;
565        retry_async(&self.retry, is_lock_contention, || {
566            self.inner.run_unit(cmd.clone())
567        })
568        .await
569    }
570
571    /// Like [`CliClient::output`], with credential injection. **No lock-retry:**
572    /// `output` returns `Ok` on a non-zero exit (it captures the result), so a lock
573    /// failure surfaces as an `Ok` here, not an `Err` the retry predicate could
574    /// match — route mutations that need lock-retry through
575    /// [`run`](Self::run)/[`run_unit`](Self::run_unit) instead.
576    pub async fn output(&self, call: impl IntoCommand<R>) -> Result<ProcessResult<String>> {
577        let cmd = self.prepare(call).await?;
578        self.inner.output(cmd).await
579    }
580
581    /// Like [`CliClient::probe`] (zero-or-nonzero exit → `bool`), with credential
582    /// injection and lock-retry.
583    pub async fn probe(&self, call: impl IntoCommand<R>) -> Result<bool> {
584        let cmd = self.prepare(call).await?;
585        retry_async(&self.retry, is_lock_contention, || {
586            self.inner.probe(cmd.clone())
587        })
588        .await
589    }
590
591    /// Like [`CliClient::exit_code`] (the raw exit code; a spawn failure or timeout
592    /// still errors), with credential injection and lock-retry.
593    pub async fn exit_code(&self, call: impl IntoCommand<R>) -> Result<i32> {
594        let cmd = self.prepare(call).await?;
595        retry_async(&self.retry, is_lock_contention, || {
596            self.inner.exit_code(cmd.clone())
597        })
598        .await
599    }
600
601    /// Like [`CliClient::parse`] (credential injection applied; the `FnOnce` parser
602    /// can't be re-run, so lock-retry does not — parsing is a read, where lock
603    /// contention is not a concern anyway).
604    pub async fn parse<T>(
605        &self,
606        call: impl IntoCommand<R>,
607        parser: impl FnOnce(&str) -> T,
608    ) -> Result<T> {
609        let cmd = self.prepare(call).await?;
610        self.inner.parse(cmd, parser).await
611    }
612
613    /// Like [`CliClient::try_parse`] (credential injection applied; `FnOnce` parser,
614    /// and a read, so no lock-retry).
615    pub async fn try_parse<T>(
616        &self,
617        call: impl IntoCommand<R>,
618        parser: impl FnOnce(&str) -> Result<T>,
619    ) -> Result<T> {
620        let cmd = self.prepare(call).await?;
621        self.inner.try_parse(cmd, parser).await
622    }
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628
629    #[test]
630    fn rejects_empty_and_leading_dash() {
631        assert!(reject_flag_like("git", "branch name", "-evil").is_err());
632        assert!(reject_flag_like("git", "branch name", "").is_err());
633        // Whitespace-only is as meaning-changing as empty — refuse it too.
634        assert!(reject_flag_like("git", "branch name", "  ").is_err());
635        assert!(reject_flag_like("git", "branch name", "\t").is_err());
636        assert!(reject_flag_like("git", "branch name", "feature").is_ok());
637        // Leading whitespace before a dash is still refused (the flag-check trims).
638        assert!(reject_flag_like("git", "remote", " --upload-pack=evil").is_err());
639        assert!(reject_flag_like("git", "remote", "\t-x").is_err());
640        // An interior NUL is refused (can't go in argv; opaque OS error otherwise).
641        assert!(reject_flag_like("git", "path", "a\0b").is_err());
642        // A leading-whitespace non-flag value is still accepted (not flag-like).
643        assert!(reject_flag_like("git", "branch name", "  feature").is_ok());
644        // The error names the program and surfaces as a spawn-side refusal.
645        let err = reject_flag_like("jj", "revset", "--remote").unwrap_err();
646        assert!(matches!(err, Error::Spawn { program, .. } if program == "jj"));
647    }
648
649    #[test]
650    fn classifies_merge_conflict() {
651        let on_stdout = Error::Exit {
652            program: "git".into(),
653            code: 1,
654            stdout: "CONFLICT (content): Merge conflict in a.rs".into(),
655            stderr: String::new(),
656        };
657        let on_stderr = Error::Exit {
658            program: "git".into(),
659            code: 1,
660            stdout: String::new(),
661            stderr: "Automatic merge failed; fix conflicts and then commit".into(),
662        };
663        let unrelated = Error::Exit {
664            program: "git".into(),
665            code: 128,
666            stdout: String::new(),
667            stderr: "fatal: not a git repository".into(),
668        };
669        assert!(is_merge_conflict(&on_stdout));
670        assert!(is_merge_conflict(&on_stderr));
671        assert!(!is_merge_conflict(&unrelated));
672        assert!(!is_nothing_to_commit(&on_stdout));
673    }
674
675    #[test]
676    fn classifies_nothing_to_commit_and_transient_fetch() {
677        let nothing = Error::Exit {
678            program: "git".into(),
679            code: 1,
680            stdout: "nothing to commit, working tree clean".into(),
681            stderr: String::new(),
682        };
683        assert!(is_nothing_to_commit(&nothing));
684
685        let dns = Error::Exit {
686            program: "git".into(),
687            code: 128,
688            stdout: String::new(),
689            stderr: "fatal: unable to access 'https://x/': Could not resolve host: x".into(),
690        };
691        assert!(is_transient_fetch_error(&dns));
692        assert!(!is_transient_fetch_error(&nothing));
693
694        // A processkit timeout is transient too. (As of processkit 0.10 a `Timeout`
695        // carries whatever partial `stdout`/`stderr` was captured before the
696        // deadline; we still treat it as unconditionally retryable regardless.)
697        let timeout = Error::Timeout {
698            program: "git".into(),
699            timeout: Duration::from_secs(10),
700            stdout: String::new(),
701            stderr: String::new(),
702        };
703        assert!(is_transient_fetch_error(&timeout));
704    }
705
706    // R9: an io-level transient from the spawn (EINTR / EAGAIN / busy) is fetch-
707    // retryable too, via processkit's `Error::is_transient()`.
708    #[test]
709    fn classifies_io_transient_as_fetch_retryable() {
710        let interrupted = Error::Spawn {
711            program: "git".into(),
712            source: std::io::Error::from(std::io::ErrorKind::Interrupted),
713        };
714        assert!(
715            interrupted.is_transient(),
716            "processkit treats Interrupted as a transient io error"
717        );
718        assert!(is_transient_fetch_error(&interrupted));
719        // A non-transient io error (e.g. NotFound — the binary is missing) is not retried.
720        let missing = Error::Spawn {
721            program: "git".into(),
722            source: std::io::Error::from(std::io::ErrorKind::NotFound),
723        };
724        assert!(!is_transient_fetch_error(&missing));
725    }
726
727    // R2: regression for the processkit 0.9.1 untruncated-`Error::Exit` fix. A large
728    // output (well past the old 4 KiB cap) with the decisive marker near the END must
729    // still classify — proving the classifiers see the whole captured stream.
730    #[test]
731    fn classifies_on_large_output_past_the_old_4kib_cap() {
732        let padding = "noise line that says nothing\n".repeat(500); // ~14 KiB
733        let conflict = Error::Exit {
734            program: "git".into(),
735            code: 1,
736            stdout: format!("{padding}CONFLICT (content): Merge conflict in late.rs"),
737            stderr: String::new(),
738        };
739        assert!(
740            is_merge_conflict(&conflict),
741            "a conflict marker past 4 KiB must still classify"
742        );
743
744        let transient = Error::Exit {
745            program: "git".into(),
746            code: 128,
747            stdout: String::new(),
748            stderr: format!("{padding}fatal: unable to access: Could not resolve host: x"),
749        };
750        assert!(is_transient_fetch_error(&transient));
751    }
752
753    // processkit's `Error` is `#[non_exhaustive]` and grows variants over time
754    // (`NotReady`/`Unsupported`/`CassetteMiss`/`NotFound`/`Signalled`/`Cancelled`/
755    // `ResourceLimit`). Unfamiliar variants must fall through every classifier to
756    // "no" — a not-ready or unsupported run is neither a conflict, nor a clean
757    // tree, nor worth a fetch retry.
758    #[test]
759    fn unfamiliar_error_variants_are_not_classified() {
760        let not_ready = Error::NotReady {
761            program: "git".into(),
762            timeout: Duration::from_secs(5),
763        };
764        let unsupported = Error::Unsupported {
765            operation: "suspend".into(),
766        };
767        for err in [&not_ready, &unsupported] {
768            assert!(!is_merge_conflict(err));
769            assert!(!is_nothing_to_commit(err));
770            assert!(!is_transient_fetch_error(err));
771        }
772    }
773
774    // `Error::Cancelled` (a client-level `default_cancel_on` killing an in-flight
775    // run; always available since cancellation became core in processkit 0.10) must
776    // fall through every classifier to "no" — a cancelled fetch was *deliberately*
777    // stopped, so replaying it would fight the cancellation. (Behaviour already held
778    // via the `#[non_exhaustive]` fall-through above; this pins it as a first-class
779    // assertion.)
780    #[test]
781    fn cancelled_is_not_transient_or_otherwise_classified() {
782        let cancelled = Error::Cancelled {
783            program: "git".into(),
784        };
785        assert!(!is_transient_fetch_error(&cancelled));
786        assert!(!is_merge_conflict(&cancelled));
787        assert!(!is_nothing_to_commit(&cancelled));
788    }
789
790    // `Error::Signalled` (a process killed by a signal — e.g. an external SIGTERM/
791    // SIGKILL, surfaced first-class since processkit 0.9.2 and carrying partial
792    // `stdout`/`stderr` since 0.10) is *terminal*, not transient: a deliberate kill
793    // should not be auto-retried, and a signal death is neither a merge conflict nor
794    // a clean tree. processkit's own `is_transient()` agrees (false for `Signalled`),
795    // so it falls through every classifier to "no" — pinned here, including the case
796    // where the captured stderr happens to contain an otherwise-transient marker (a
797    // killed fetch is still not ours to silently replay).
798    #[test]
799    fn signalled_is_terminal_not_transient() {
800        let signalled = Error::Signalled {
801            program: "git".into(),
802            signal: Some(15),
803            stdout: String::new(),
804            stderr: "fatal: unable to access: Could not resolve host: x".into(),
805        };
806        assert!(!signalled.is_transient());
807        assert!(!is_transient_fetch_error(&signalled));
808        assert!(!is_merge_conflict(&signalled));
809        assert!(!is_nothing_to_commit(&signalled));
810    }
811
812    fn exit(program: &str, code: i32, stderr: &str) -> Error {
813        Error::Exit {
814            program: program.into(),
815            code,
816            stdout: String::new(),
817            stderr: stderr.into(),
818        }
819    }
820
821    // `is_lock_contention` recognises ONLY the *whole-repo* / working-copy lock
822    // failures (git index.lock, jj working-copy/op-heads lock) — the ones where the
823    // command did nothing, so a retry is idempotent even on a mutation. Per-ref lock
824    // failures and conflicts/timeouts are deliberately NOT classified (a multi-ref
825    // op can fail a ref lock mid-way, where a retry would not be idempotent).
826    #[test]
827    fn classifies_lock_contention() {
828        let lock_failures = [
829            exit(
830                "git",
831                128,
832                "fatal: Unable to create '/r/.git/index.lock': File exists.",
833            ),
834            exit(
835                "git",
836                128,
837                "Another git process seems to be running in this repository",
838            ),
839            exit("jj", 1, "Error: Failed to lock the working copy"),
840            exit("jj", 1, "Error: Failed to lock op heads"),
841        ];
842        for e in &lock_failures {
843            assert!(is_lock_contention(e), "should be lock contention: {e:?}");
844            // A lock failure is NOT a transient *fetch* error — different class.
845            assert!(!is_transient_fetch_error(e), "not a fetch error: {e:?}");
846        }
847        let not_locks = [
848            exit("git", 1, "CONFLICT (content): Merge conflict in a.rs"),
849            exit("git", 1, "error: pathspec 'x' did not match any file(s)"),
850            exit("git", 128, "fatal: not a git repository"),
851            // Per-ref locks are NOT classified — a multi-ref push/fetch can fail a
852            // ref lock after earlier refs already moved (non-idempotent to replay).
853            exit(
854                "git",
855                1,
856                "error: cannot lock ref 'refs/heads/x': reference already exists",
857            ),
858            exit(
859                "git",
860                128,
861                "Unable to create '/r/.git/packed-refs.lock': File exists.",
862            ),
863            Error::Timeout {
864                program: "git".into(),
865                timeout: Duration::from_secs(1),
866                stdout: String::new(),
867                stderr: String::new(),
868            },
869        ];
870        for e in &not_locks {
871            assert!(
872                !is_lock_contention(e),
873                "should NOT be lock contention: {e:?}"
874            );
875        }
876    }
877
878    // Backoff is exponential off the base, capped at `max_backoff`, and zero when
879    // there's no base (immediate retry).
880    #[test]
881    fn backoff_is_exponential_capped_and_zero_without_base() {
882        let p = RetryPolicy::none()
883            .attempts(6)
884            .base_backoff(Duration::from_millis(10))
885            .max_backoff(Duration::from_millis(80));
886        assert_eq!(backoff_for(&p, 0), Duration::from_millis(10));
887        assert_eq!(backoff_for(&p, 1), Duration::from_millis(20));
888        assert_eq!(backoff_for(&p, 2), Duration::from_millis(40));
889        assert_eq!(backoff_for(&p, 3), Duration::from_millis(80));
890        assert_eq!(
891            backoff_for(&p, 4),
892            Duration::from_millis(80),
893            "capped at max"
894        );
895        assert_eq!(
896            backoff_for(&RetryPolicy::none(), 3),
897            Duration::ZERO,
898            "no base → no wait"
899        );
900    }
901
902    // Full jitter (used by `RetryPolicy::lock_contention`): every sampled backoff
903    // stays within `[0, exponential cap]`, and successive samples de-correlate
904    // (more than one distinct value) so retries don't thunder together. Pins the
905    // jitter path, which the exponential test above deliberately turns off.
906    #[test]
907    fn jitter_stays_within_cap_and_decorrelates() {
908        let p = RetryPolicy::none()
909            .attempts(8)
910            .base_backoff(Duration::from_millis(10))
911            .max_backoff(Duration::from_millis(80))
912            .with_jitter(true);
913        // The cap at retry_index 3 is the full 80ms exponential value.
914        let cap = Duration::from_millis(80);
915        let mut seen = std::collections::HashSet::new();
916        for _ in 0..1000 {
917            let d = backoff_for(&p, 3);
918            assert!(
919                d <= cap,
920                "jittered backoff {d:?} must stay within the cap {cap:?}"
921            );
922            seen.insert(d.as_nanos());
923        }
924        assert!(
925            seen.len() > 1,
926            "full jitter must produce a spread of delays, not a constant"
927        );
928        // A zero base still short-circuits to zero even with jitter on.
929        assert_eq!(
930            backoff_for(&RetryPolicy::none().with_jitter(true), 2),
931            Duration::ZERO
932        );
933    }
934
935    // The executor: retries while the predicate matches and attempts remain, returns
936    // the first Ok, doesn't retry a non-matching error, and exhausts to the last Err.
937    #[tokio::test]
938    async fn retry_async_retries_then_succeeds_and_respects_the_predicate() {
939        use std::sync::atomic::{AtomicU32, Ordering};
940        // Zero backoff → no sleep, deterministic & fast.
941        let policy = RetryPolicy::none().attempts(4);
942        let lock = || {
943            exit(
944                "git",
945                128,
946                "Unable to create '/r/.git/index.lock': File exists.",
947            )
948        };
949
950        // Fails twice with a lock error, then succeeds — retried to success.
951        let calls = AtomicU32::new(0);
952        let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
953            let n = calls.fetch_add(1, Ordering::SeqCst);
954            let lock = lock();
955            async move { if n < 2 { Err(lock) } else { Ok(n) } }
956        })
957        .await;
958        assert_eq!(out.unwrap(), 2);
959        assert_eq!(calls.load(Ordering::SeqCst), 3, "1 try + 2 retries");
960
961        // A non-lock error is returned immediately (not retried).
962        let calls = AtomicU32::new(0);
963        let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
964            calls.fetch_add(1, Ordering::SeqCst);
965            async { Err(exit("git", 1, "real, deterministic failure")) }
966        })
967        .await;
968        assert!(out.is_err());
969        assert_eq!(
970            calls.load(Ordering::SeqCst),
971            1,
972            "non-retryable → single attempt"
973        );
974
975        // Persistent lock contention exhausts the attempt budget.
976        let calls = AtomicU32::new(0);
977        let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
978            calls.fetch_add(1, Ordering::SeqCst);
979            async { Err(exit("git", 128, "index.lock': File exists")) }
980        })
981        .await;
982        assert!(out.is_err());
983        assert_eq!(calls.load(Ordering::SeqCst), 4, "all attempts used");
984    }
985
986    // `resolve_credential` returns `None` until a provider is attached, then the
987    // provider's credential. (No process is spawned, so the real runner is fine.)
988    #[tokio::test]
989    async fn retrying_client_resolves_credential_opt_in() {
990        let client = ManagedClient::new("git");
991        assert!(!client.has_credentials());
992        assert!(
993            client
994                .resolve_credential(CredentialService::Git, None)
995                .await
996                .unwrap()
997                .is_none(),
998            "no provider → ambient (None)"
999        );
1000
1001        let client = client.with_credentials(Arc::new(StaticCredential::token("t0k")));
1002        assert!(client.has_credentials());
1003        let got = client
1004            .resolve_credential(CredentialService::Git, None)
1005            .await
1006            .unwrap()
1007            .expect("provider yields a credential");
1008        assert_eq!(got.secret().expose(), "t0k");
1009    }
1010
1011    // An empty (or whitespace-only) secret is treated as `None` (ambient):
1012    // injecting an empty token would override the ambient login with nothing
1013    // instead of deferring to it. Mirrors `EnvToken`'s whitespace-only ⇒ unset rule.
1014    #[tokio::test]
1015    async fn resolve_credential_treats_empty_secret_as_ambient() {
1016        // Service-agnostic: both the forge (token-env) and git (helper) paths route
1017        // through this chokepoint, so a blank secret is ambient for either.
1018        for blank in ["", "   ", "\t\n"] {
1019            let client = ManagedClient::new("git")
1020                .with_credentials(Arc::new(StaticCredential::token(blank)));
1021            for service in [CredentialService::GitHub, CredentialService::Git] {
1022                assert!(
1023                    client
1024                        .resolve_credential(service, None)
1025                        .await
1026                        .unwrap()
1027                        .is_none(),
1028                    "blank secret {blank:?} → ambient (None) for {service:?}"
1029                );
1030            }
1031        }
1032    }
1033}