vcs_cli_support/lib.rs
1#![cfg_attr(docsrs, feature(doc_cfg))]
2#![deny(rustdoc::broken_intra_doc_links)]
3//! `vcs-cli-support` — the [`processkit`]-coupled plumbing the CLI wrappers reuse.
4//!
5//! `vcs-git` / `vcs-jj` / `vcs-github` all drive a CLI through [`processkit`], so
6//! they share three concerns that *touch* [`processkit::Error`]: an argv injection
7//! guard, a fetch-retry policy, and a set of [`Error`] classifiers. Extracting them
8//! here keeps the std-only `vcs-diff` clean of the `processkit` dependency, and —
9//! more to the point — keeps the marker lists and classifier logic from drifting
10//! between backends. The wrapper crates re-export these items (so you reach them
11//! as `vcs_git::is_merge_conflict`, not via this crate's name) and rarely name
12//! `vcs-cli-support` directly.
13//!
14//! # The surface
15//!
16//! - **[`reject_flag_like`]** — the injection guard for bare positional argv slots.
17//! A caller value that is empty/whitespace, or starts with `-`, is refused before
18//! spawning (the CLI would parse it as a flag); flag-*value* slots (`-m <msg>`)
19//! are consumed verbatim and skip the check. Wrappers call it with their own
20//! binary name so the surfaced [`Error::Spawn`] names the right `program`.
21//! - **[`FETCH_ATTEMPTS`] / [`FETCH_BACKOFF`]** — the shared transient-retry policy
22//! for `fetch` (one try plus two retries, fixed backoff between them).
23//! - **[`is_merge_conflict`] / [`is_nothing_to_commit`] / [`is_transient_fetch_error`]
24//! / [`is_lock_contention`]** — classify a returned [`Error`] so callers branch on
25//! *intent* ("conflict, resolve it"; "nothing to commit, no-op"; "transient,
26//! retry"; "another process holds the lock, retry") instead of matching on error
27//! internals. They inspect captured [`Error::Exit`] output against fixed marker
28//! lists (and treat a [`processkit`] [`Error::Timeout`] as transient); any
29//! unfamiliar `#[non_exhaustive]` variant falls through to "no".
30//! - **[`RetryPolicy`] / [`retry_async`] / [`ManagedClient`]** — an opt-in retry
31//! strategy (attempts + exponential, jittered backoff) for **lock-contention**
32//! failures. `ManagedClient` wraps a [`processkit`] `CliClient` and applies the
33//! policy to every command, so the `vcs-git`/`vcs-jj` clients gain retry via
34//! `with_retry(...)` without changing a call site. Lock-acquisition failures are
35//! pre-execution, so retrying is safe even for mutating commands.
36//! - **[`CredentialProvider`] / [`Credential`] / [`Secret`]** — an opt-in seam for
37//! supplying a secret *per operation* (a CI token, a vault lookup) instead of
38//! relying on ambient CLI auth. `ManagedClient` injects the resolved token into
39//! each command (the forge `GH_TOKEN`/`GITLAB_TOKEN` env); git uses
40//! [`git_credential_helper`] to keep the secret out of `argv`. Default is no
41//! provider → ambient auth, unchanged. See the [`credentials`](mod@credentials)
42//! module for the full picture.
43//!
44//! # Recipes
45//!
46//! Classify a failed `fetch` to drive a retry decision — branch on intent, not on
47//! the error's internals:
48//!
49//! ```no_run
50//! use vcs_cli_support::{is_transient_fetch_error, FETCH_ATTEMPTS, FETCH_BACKOFF};
51//! # fn run() -> Result<(), processkit::Error> { todo!() }
52//! # fn demo() -> Result<(), processkit::Error> {
53//! for attempt in 1..=FETCH_ATTEMPTS {
54//! match run() {
55//! Ok(()) => break,
56//! Err(e) if is_transient_fetch_error(&e) && attempt < FETCH_ATTEMPTS => {
57//! std::thread::sleep(FETCH_BACKOFF); // DNS/timeout — worth a retry
58//! }
59//! Err(e) => return Err(e), // anything else: give up
60//! }
61//! }
62//! # Ok(()) }
63//! ```
64
65use std::ffi::OsStr;
66use std::fmt;
67use std::future::Future;
68use std::path::Path;
69use std::sync::Arc;
70use std::time::Duration;
71
72use processkit::{
73 CliClient, Command, Error, IntoCommand, JobRunner, ProcessResult, ProcessRunner, Result,
74};
75
76pub mod credentials;
77pub use credentials::{
78 Credential, CredentialProvider, CredentialRequest, CredentialService, EnvToken, FnProvider,
79 GitCredentialHelper, Secret, StaticCredential, git_credential_helper, provider_fn,
80};
81
82/// Injection guard for bare positional argv slots: a caller-supplied value with a
83/// leading `-` would be parsed by the CLI as a *flag* (verified: `git checkout
84/// -evil` → "unknown switch"; jj likewise), and an empty (or whitespace-only)
85/// value silently changes most commands' meaning. Refuse both before anything
86/// spawns, surfacing an [`Error::Spawn`] naming `program`. An interior NUL is
87/// refused too (it can't be passed in argv and otherwise surfaces as an opaque
88/// OS spawn error). Flag-VALUE positions (`-m <msg>`, `--branch <b>`) don't need
89/// this — the CLI consumes the next token verbatim there.
90///
91/// The leading-`-` test is applied to the **trimmed** value, so a value like
92/// `" --upload-pack=…"` (leading whitespace) is still refused — the empty-check
93/// and the flag-check now agree on what "the value" is.
94pub fn reject_flag_like(program: &str, what: &str, value: &str) -> Result<()> {
95 let trimmed = value.trim();
96 if trimmed.is_empty() || trimmed.starts_with('-') || value.contains('\0') {
97 return Err(Error::Spawn {
98 program: program.to_string(),
99 source: std::io::Error::new(
100 std::io::ErrorKind::InvalidInput,
101 format!(
102 "{what} {value:?} would be parsed as a flag (or is empty / contains NUL) — \
103 refusing to pass it as a positional argument"
104 ),
105 ),
106 });
107 }
108 Ok(())
109}
110
111/// Total attempts for a transient-retried `fetch` (1 try + 2 retries).
112pub const FETCH_ATTEMPTS: u32 = 3;
113/// Fixed backoff between fetch retries.
114pub const FETCH_BACKOFF: Duration = Duration::from_millis(500);
115/// Grace period for a timed-out fetch: on the deadline processkit signals the
116/// process tree (terminate), waits this long for it to exit cleanly — flush, close
117/// the connection, drop any lock — then hard-kills. Only takes effect when a
118/// per-client timeout is set (`Git::default_timeout` / `Jj::default_timeout`); a
119/// fetch with no deadline is unaffected.
120pub const FETCH_TIMEOUT_GRACE: Duration = Duration::from_secs(2);
121
122/// Lower-case substrings marking a merge that stopped on conflicts.
123const CONFLICT_MARKERS: &[&str] = &["conflict (", "automatic merge failed"];
124/// Lower-case substrings marking a commit that found nothing to record.
125const NOTHING_TO_COMMIT_MARKERS: &[&str] = &["nothing to commit", "nothing added to commit"];
126/// Lower-case substrings marking a transient (retryable) network/fetch failure.
127/// The timeout markers are kept *specific* (`connection timed out` /
128/// `operation timed out`) rather than a bare `timed out`, which would also match
129/// unrelated, non-network "timed out" messages (a lock wait, a hook) and trigger a
130/// spurious fetch retry.
131const TRANSIENT_FETCH_MARKERS: &[&str] = &[
132 "could not resolve host",
133 "couldn't resolve host",
134 "temporary failure in name resolution",
135 "connection timed out",
136 "connection refused",
137 "operation timed out",
138 "network is unreachable",
139 "failed to connect",
140 "could not read from remote repository",
141 "the remote end hung up",
142 "early eof",
143 "rpc failed",
144];
145
146/// Whether `err` is an [`Error::Exit`] whose captured output contains any marker.
147fn exit_output_matches(err: &Error, markers: &[&str]) -> bool {
148 let Error::Exit { stdout, stderr, .. } = err else {
149 return false;
150 };
151 let out = stdout.to_ascii_lowercase();
152 let errt = stderr.to_ascii_lowercase();
153 markers.iter().any(|m| out.contains(m) || errt.contains(m))
154}
155
156/// Whether a failed `merge`/`merge_commit` stopped on a merge conflict. (jj
157/// surfaces conflicts as state rather than as errors, so this only fires on git
158/// output — see `vcs_core::Error::is_merge_conflict`.)
159pub fn is_merge_conflict(err: &Error) -> bool {
160 exit_output_matches(err, CONFLICT_MARKERS)
161}
162
163/// Whether a failed `commit`/`commit_paths` reported nothing to commit (a clean
164/// tree), as opposed to a real error.
165pub fn is_nothing_to_commit(err: &Error) -> bool {
166 exit_output_matches(err, NOTHING_TO_COMMIT_MARKERS)
167}
168
169/// Whether a failed `fetch`/`fetch_remote_branch`/`remote_branch_exists` looks
170/// transient (DNS, timeout, dropped connection) and is worth retrying.
171pub fn is_transient_fetch_error(err: &Error) -> bool {
172 // A processkit-level timeout (a `.timeout()`-bounded run that expired) is
173 // inherently transient; treat it as retryable too, regardless of any partial
174 // output it captured before the deadline (as of processkit 0.10 a `Timeout`
175 // carries the partial `stdout`/`stderr`, but the retry decision doesn't depend
176 // on it). So is an io-level transient from the spawn itself (interrupted /
177 // would-block / busy), which processkit classifies via `Error::is_transient()`
178 // (it covers `Spawn`/`Io`, not `Exit`, so it composes cleanly with the marker
179 // scan below).
180 matches!(err, Error::Timeout { .. })
181 || err.is_transient()
182 || exit_output_matches(err, TRANSIENT_FETCH_MARKERS)
183}
184
185/// Lower-case substrings marking a **whole-repository / working-copy lock**
186/// contention failure — another process held the *one* repo-wide lock, so the
187/// command **never started** (clean, pre-execution) and touched nothing.
188///
189/// These are deliberately limited to the locks that guard the *entire* operation
190/// up front, so retrying is safe even on a **mutating** command: the repo was not
191/// modified at all. We intentionally do **not** include per-ref lock messages
192/// (`cannot lock ref`, `<ref>.lock`/`packed-refs.lock: File exists`): a multi-ref
193/// `push`/`fetch` updates refs sequentially, so a ref-lock failure can arrive
194/// *after* earlier refs already moved — replaying that is not idempotent. Network
195/// markers
196/// ([`TRANSIENT_FETCH_MARKERS`]) and conflict/exit failures are likewise absent.
197const LOCK_CONTENTION_MARKERS: &[&str] = &[
198 "index.lock': file exists", // git: the whole-repo index lock (pre-write)
199 "another git process seems to be running", // git's index-lock hint
200 "failed to lock the working copy", // jj: the working-copy lock (pre-snapshot)
201 "failed to lock op heads", // jj: the operation-log lock (pre-commit of the op)
202];
203
204/// Whether `err` is a **whole-repository lock-contention** failure — another
205/// process held git's `index.lock` or jj's working-copy / op-heads lock, so the
206/// command couldn't even start. Such a failure is *pre-execution* and therefore
207/// safe to retry even on a **mutating** operation (the repo was never modified).
208/// Per-ref lock failures (`cannot lock ref`, `<ref>.lock`) are deliberately **not**
209/// classified here — they can occur mid-way through a multi-ref `push`/`fetch`,
210/// where a retry would not be idempotent. Conflict, "nothing to commit", a real
211/// non-zero exit, a timeout, a signal, or a missing binary are also **not** lock
212/// contention and must not be retried this way.
213pub fn is_lock_contention(err: &Error) -> bool {
214 exit_output_matches(err, LOCK_CONTENTION_MARKERS)
215}
216
217/// A bounded retry strategy: how many attempts, the (exponential) backoff between
218/// them, and whether to add full jitter. Used by [`ManagedClient`] to retry
219/// [`is_lock_contention`] failures. The [`Default`] is [`none`](RetryPolicy::none)
220/// (no retry) — retry is **opt-in**.
221#[derive(Debug, Clone, Copy, PartialEq, Eq)]
222#[non_exhaustive]
223pub struct RetryPolicy {
224 /// Total attempts including the first; `1` means no retry.
225 pub attempts: u32,
226 /// Delay before the first retry; doubles each subsequent retry (capped by
227 /// [`max_backoff`](RetryPolicy::max_backoff)). `ZERO` means retry immediately.
228 pub base_backoff: Duration,
229 /// Upper bound on the (pre-jitter) backoff delay. `ZERO` means uncapped.
230 pub max_backoff: Duration,
231 /// Apply **full jitter** — the actual delay is uniform in `[0, computed]` — to
232 /// avoid a thundering herd when many workers retry against one repository.
233 pub jitter: bool,
234}
235
236impl RetryPolicy {
237 /// No retry: a single attempt. The default.
238 pub const fn none() -> Self {
239 Self {
240 attempts: 1,
241 base_backoff: Duration::ZERO,
242 max_backoff: Duration::ZERO,
243 jitter: false,
244 }
245 }
246
247 /// A sensible default for repository lock contention: a handful of attempts
248 /// with short, jittered, exponential backoff (25 ms → 500 ms).
249 pub const fn lock_contention() -> Self {
250 Self {
251 attempts: 5,
252 base_backoff: Duration::from_millis(25),
253 max_backoff: Duration::from_millis(500),
254 jitter: true,
255 }
256 }
257
258 /// Set the total number of attempts (clamped to at least 1).
259 pub fn attempts(mut self, attempts: u32) -> Self {
260 self.attempts = attempts.max(1);
261 self
262 }
263
264 /// Set the base backoff (the delay before the first retry).
265 pub fn base_backoff(mut self, backoff: Duration) -> Self {
266 self.base_backoff = backoff;
267 self
268 }
269
270 /// Cap the (pre-jitter) backoff delay; `ZERO` leaves it uncapped.
271 pub fn max_backoff(mut self, max: Duration) -> Self {
272 self.max_backoff = max;
273 self
274 }
275
276 /// Toggle full jitter on the backoff delay.
277 pub fn with_jitter(mut self, jitter: bool) -> Self {
278 self.jitter = jitter;
279 self
280 }
281}
282
283impl Default for RetryPolicy {
284 /// No retry — retry is opt-in.
285 fn default() -> Self {
286 Self::none()
287 }
288}
289
290/// The (possibly jittered) backoff before the `retry_index`-th retry (0 = first).
291fn backoff_for(policy: &RetryPolicy, retry_index: u32) -> Duration {
292 if policy.base_backoff.is_zero() {
293 return Duration::ZERO;
294 }
295 let base = policy.base_backoff.as_nanos();
296 let scaled = base.saturating_mul(1u128 << retry_index.min(20));
297 let capped = if policy.max_backoff.is_zero() {
298 scaled
299 } else {
300 scaled.min(policy.max_backoff.as_nanos())
301 };
302 let delay = Duration::from_nanos(capped.min(u64::MAX as u128) as u64);
303 if policy.jitter {
304 full_jitter(delay)
305 } else {
306 delay
307 }
308}
309
310/// Full jitter: a uniform delay in `[0, max]`. Dependency-free randomness via the
311/// OS-seeded [`RandomState`](std::collections::hash_map::RandomState) — good enough
312/// to de-correlate retries, not cryptographic.
313fn full_jitter(max: Duration) -> Duration {
314 use std::hash::{BuildHasher, Hasher};
315 let nanos = max.as_nanos();
316 if nanos == 0 {
317 return Duration::ZERO;
318 }
319 let mut hasher = std::collections::hash_map::RandomState::new().build_hasher();
320 hasher.write_u64(nanos as u64);
321 let r = hasher.finish() as u128;
322 Duration::from_nanos((r % (nanos + 1)).min(u64::MAX as u128) as u64)
323}
324
325/// Run `op`, retrying its result while `should_retry` says so and `policy` has
326/// attempts left, sleeping the (jittered, exponential) backoff between tries. The
327/// op is re-invoked from scratch each attempt, so it must be idempotent for the
328/// errors `should_retry` selects (lock-contention failures are — the command never
329/// ran). Returns the first `Ok`, or the last `Err`.
330pub async fn retry_async<T, Fut>(
331 policy: &RetryPolicy,
332 should_retry: impl Fn(&Error) -> bool,
333 mut op: impl FnMut() -> Fut,
334) -> Result<T>
335where
336 Fut: Future<Output = Result<T>>,
337{
338 let attempts = policy.attempts.max(1);
339 for attempt in 1..=attempts {
340 match op().await {
341 Ok(value) => return Ok(value),
342 Err(err) => {
343 if attempt == attempts || !should_retry(&err) {
344 return Err(err);
345 }
346 let delay = backoff_for(policy, attempt - 1);
347 if !delay.is_zero() {
348 tokio::time::sleep(delay).await;
349 }
350 }
351 }
352 }
353 unreachable!("the loop returns on the final attempt")
354}
355
356/// A [`CliClient`] wrapper that adds two opt-in concerns the CLI wrappers
357/// (`vcs-git`, `vcs-jj`, `vcs-github`, `vcs-gitlab`) all share, without touching a
358/// single call site:
359///
360/// 1. **Lock-contention retry** ([`is_lock_contention`]) per a [`RetryPolicy`] —
361/// off by default ([`RetryPolicy::none`]); enable with
362/// [`with_retry`](ManagedClient::with_retry). Safe even for mutating commands,
363/// since lock contention is a clean pre-execution failure.
364/// 2. **Credential injection** from an opt-in [`CredentialProvider`] — off by
365/// default (no provider); attach one with
366/// [`with_credentials`](ManagedClient::with_credentials). When a forge
367/// *token-env* binding is configured
368/// ([`with_token_env`](ManagedClient::with_token_env)), every command run
369/// through this client gets the resolved token in that environment variable
370/// (e.g. `GH_TOKEN`). Backends that inject the secret differently (git's
371/// `credential.helper`) instead call
372/// [`resolve_credential`](ManagedClient::resolve_credential) at the command
373/// site. Resolution happens once per call, before the retry loop.
374///
375/// Both default to inert, so a client with neither configured behaves exactly
376/// like a bare `CliClient`.
377pub struct ManagedClient<R: ProcessRunner = JobRunner> {
378 inner: CliClient<R>,
379 retry: RetryPolicy,
380 credentials: Option<Arc<dyn CredentialProvider>>,
381 /// When set, the token is auto-injected into this env var on every command,
382 /// resolved for this service. Used by the forge clients (`GH_TOKEN`, …).
383 token_env: Option<(CredentialService, &'static str)>,
384}
385
386impl<R: ProcessRunner> fmt::Debug for ManagedClient<R> {
387 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
388 f.debug_struct("ManagedClient")
389 .field("inner", &self.inner)
390 .field("retry", &self.retry)
391 // Never render the provider itself (it may close over a secret); just
392 // whether one is configured, plus the token-env binding.
393 .field("credentials", &self.credentials.is_some())
394 .field("token_env", &self.token_env)
395 .finish()
396 }
397}
398
399impl ManagedClient<JobRunner> {
400 /// A retrying client driving `program` on the real job-backed runner (no retry
401 /// until [`with_retry`](ManagedClient::with_retry)).
402 pub fn new(program: impl AsRef<OsStr>) -> Self {
403 Self {
404 inner: CliClient::new(program),
405 retry: RetryPolicy::none(),
406 credentials: None,
407 token_env: None,
408 }
409 }
410}
411
412impl<R: ProcessRunner> ManagedClient<R> {
413 /// A retrying client driving `program` on `runner` — inject a fake in tests.
414 pub fn with_runner(program: impl AsRef<OsStr>, runner: R) -> Self {
415 Self {
416 inner: CliClient::with_runner(program, runner),
417 retry: RetryPolicy::none(),
418 credentials: None,
419 token_env: None,
420 }
421 }
422
423 /// Set the lock-contention retry policy (opt-in; default is no retry).
424 pub fn with_retry(mut self, policy: RetryPolicy) -> Self {
425 self.retry = policy;
426 self
427 }
428
429 /// The active retry policy.
430 pub fn retry_policy(&self) -> RetryPolicy {
431 self.retry
432 }
433
434 /// Attach a [`CredentialProvider`] (opt-in; default is none → ambient auth).
435 /// The provider is consulted per operation: automatically when a
436 /// [`with_token_env`](ManagedClient::with_token_env) binding is set, or
437 /// on demand via [`resolve_credential`](ManagedClient::resolve_credential).
438 ///
439 /// **Precedence:** a resolved token is injected *after* any
440 /// [`default_env`](ManagedClient::default_env), so the provider wins over a
441 /// static default and over the ambient CLI login. **Cancellation:** a
442 /// [`default_cancel_on`](ManagedClient::default_cancel_on) token bounds the
443 /// spawned *process*, not provider resolution — if your provider does slow I/O
444 /// (a vault lookup), bound it yourself.
445 #[must_use]
446 pub fn with_credentials(mut self, provider: Arc<dyn CredentialProvider>) -> Self {
447 self.credentials = Some(provider);
448 self
449 }
450
451 /// Bind the resolved token to an environment variable injected on **every**
452 /// command this client runs (the forge case: `GH_TOKEN`, `GITLAB_TOKEN`). The
453 /// `service` tags the [`CredentialRequest`]. No effect without a provider.
454 #[must_use]
455 pub fn with_token_env(mut self, service: CredentialService, var: &'static str) -> Self {
456 self.token_env = Some((service, var));
457 self
458 }
459
460 /// Whether a credential provider is configured.
461 #[must_use]
462 pub fn has_credentials(&self) -> bool {
463 self.credentials.is_some()
464 }
465
466 /// Resolve a credential for `service`/`host` from the configured provider, or
467 /// `Ok(None)` if no provider is set or it defers to ambient auth. Backends
468 /// that inject the secret at the command site (git's `credential.helper`) call
469 /// this directly; the forge token-env path uses it internally.
470 pub async fn resolve_credential(
471 &self,
472 service: CredentialService,
473 host: Option<&str>,
474 ) -> Result<Option<Credential>> {
475 let Some(provider) = &self.credentials else {
476 return Ok(None);
477 };
478 let request = CredentialRequest { service, host };
479 // An empty (or whitespace-only) secret is not a usable credential —
480 // injecting an empty `GH_TOKEN`/`GITLAB_TOKEN` (or a `password=` line)
481 // would *override* the ambient login with nothing rather than defer to it.
482 // Treat it as `None` (ambient), keeping the "no usable credential ⇒
483 // ambient auth" contract consistent regardless of which adapter produced
484 // it (matching `EnvToken`'s own whitespace-only ⇒ unset rule).
485 Ok(provider
486 .credential(&request)
487 .await?
488 .filter(|cred| !cred.secret().expose().trim().is_empty()))
489 }
490
491 /// Materialize `call` into a [`Command`], injecting the forge token env if a
492 /// [`with_token_env`](ManagedClient::with_token_env) binding and a provider
493 /// are both configured. The single place the auto-injection happens, shared by
494 /// every retrying verb.
495 async fn prepare(&self, call: impl IntoCommand<R>) -> Result<Command> {
496 let cmd = call.into_command(&self.inner);
497 let Some((service, var)) = self.token_env else {
498 return Ok(cmd);
499 };
500 match self.resolve_credential(service, None).await? {
501 Some(cred) => Ok(cmd.env(var, cred.secret().expose())),
502 None => Ok(cmd),
503 }
504 }
505
506 /// Apply a default timeout to every command this client builds.
507 pub fn default_timeout(mut self, timeout: Duration) -> Self {
508 self.inner = self.inner.default_timeout(timeout);
509 self
510 }
511
512 /// Set an environment variable on every command this client builds.
513 pub fn default_env(mut self, key: impl AsRef<OsStr>, value: impl AsRef<OsStr>) -> Self {
514 self.inner = self.inner.default_env(key, value);
515 self
516 }
517
518 /// Remove an inherited environment variable on every command this client builds.
519 pub fn default_env_remove(mut self, key: impl AsRef<OsStr>) -> Self {
520 self.inner = self.inner.default_env_remove(key);
521 self
522 }
523
524 /// Cancel every command this client builds when `token` fires.
525 pub fn default_cancel_on(mut self, token: processkit::CancellationToken) -> Self {
526 self.inner = self.inner.default_cancel_on(token);
527 self
528 }
529
530 /// Build a [`Command`] for this client's program (passthrough).
531 pub fn command<I, S>(&self, args: I) -> Command
532 where
533 I: IntoIterator<Item = S>,
534 S: AsRef<OsStr>,
535 {
536 self.inner.command(args)
537 }
538
539 /// Build a [`Command`] bound to `dir` (passthrough).
540 pub fn command_in<I, S>(&self, dir: &Path, args: I) -> Command
541 where
542 I: IntoIterator<Item = S>,
543 S: AsRef<OsStr>,
544 {
545 self.inner.command_in(dir, args)
546 }
547
548 /// The underlying process runner (passthrough — e.g. for `output_all`).
549 pub fn runner(&self) -> &R {
550 self.inner.runner()
551 }
552
553 /// Like [`CliClient::run`], with credential injection and lock-retry.
554 pub async fn run(&self, call: impl IntoCommand<R>) -> Result<String> {
555 let cmd = self.prepare(call).await?;
556 retry_async(&self.retry, is_lock_contention, || {
557 self.inner.run(cmd.clone())
558 })
559 .await
560 }
561
562 /// Like [`CliClient::run_unit`], with credential injection and lock-retry.
563 pub async fn run_unit(&self, call: impl IntoCommand<R>) -> Result<()> {
564 let cmd = self.prepare(call).await?;
565 retry_async(&self.retry, is_lock_contention, || {
566 self.inner.run_unit(cmd.clone())
567 })
568 .await
569 }
570
571 /// Like [`CliClient::output`], with credential injection. **No lock-retry:**
572 /// `output` returns `Ok` on a non-zero exit (it captures the result), so a lock
573 /// failure surfaces as an `Ok` here, not an `Err` the retry predicate could
574 /// match — route mutations that need lock-retry through
575 /// [`run`](Self::run)/[`run_unit`](Self::run_unit) instead.
576 pub async fn output(&self, call: impl IntoCommand<R>) -> Result<ProcessResult<String>> {
577 let cmd = self.prepare(call).await?;
578 self.inner.output(cmd).await
579 }
580
581 /// Like [`CliClient::probe`] (zero-or-nonzero exit → `bool`), with credential
582 /// injection and lock-retry.
583 pub async fn probe(&self, call: impl IntoCommand<R>) -> Result<bool> {
584 let cmd = self.prepare(call).await?;
585 retry_async(&self.retry, is_lock_contention, || {
586 self.inner.probe(cmd.clone())
587 })
588 .await
589 }
590
591 /// Like [`CliClient::exit_code`] (the raw exit code; a spawn failure or timeout
592 /// still errors), with credential injection and lock-retry.
593 pub async fn exit_code(&self, call: impl IntoCommand<R>) -> Result<i32> {
594 let cmd = self.prepare(call).await?;
595 retry_async(&self.retry, is_lock_contention, || {
596 self.inner.exit_code(cmd.clone())
597 })
598 .await
599 }
600
601 /// Like [`CliClient::parse`] (credential injection applied; the `FnOnce` parser
602 /// can't be re-run, so lock-retry does not — parsing is a read, where lock
603 /// contention is not a concern anyway).
604 pub async fn parse<T>(
605 &self,
606 call: impl IntoCommand<R>,
607 parser: impl FnOnce(&str) -> T,
608 ) -> Result<T> {
609 let cmd = self.prepare(call).await?;
610 self.inner.parse(cmd, parser).await
611 }
612
613 /// Like [`CliClient::try_parse`] (credential injection applied; `FnOnce` parser,
614 /// and a read, so no lock-retry).
615 pub async fn try_parse<T>(
616 &self,
617 call: impl IntoCommand<R>,
618 parser: impl FnOnce(&str) -> Result<T>,
619 ) -> Result<T> {
620 let cmd = self.prepare(call).await?;
621 self.inner.try_parse(cmd, parser).await
622 }
623}
624
625#[cfg(test)]
626mod tests {
627 use super::*;
628
629 #[test]
630 fn rejects_empty_and_leading_dash() {
631 assert!(reject_flag_like("git", "branch name", "-evil").is_err());
632 assert!(reject_flag_like("git", "branch name", "").is_err());
633 // Whitespace-only is as meaning-changing as empty — refuse it too.
634 assert!(reject_flag_like("git", "branch name", " ").is_err());
635 assert!(reject_flag_like("git", "branch name", "\t").is_err());
636 assert!(reject_flag_like("git", "branch name", "feature").is_ok());
637 // Leading whitespace before a dash is still refused (the flag-check trims).
638 assert!(reject_flag_like("git", "remote", " --upload-pack=evil").is_err());
639 assert!(reject_flag_like("git", "remote", "\t-x").is_err());
640 // An interior NUL is refused (can't go in argv; opaque OS error otherwise).
641 assert!(reject_flag_like("git", "path", "a\0b").is_err());
642 // A leading-whitespace non-flag value is still accepted (not flag-like).
643 assert!(reject_flag_like("git", "branch name", " feature").is_ok());
644 // The error names the program and surfaces as a spawn-side refusal.
645 let err = reject_flag_like("jj", "revset", "--remote").unwrap_err();
646 assert!(matches!(err, Error::Spawn { program, .. } if program == "jj"));
647 }
648
649 #[test]
650 fn classifies_merge_conflict() {
651 let on_stdout = Error::Exit {
652 program: "git".into(),
653 code: 1,
654 stdout: "CONFLICT (content): Merge conflict in a.rs".into(),
655 stderr: String::new(),
656 };
657 let on_stderr = Error::Exit {
658 program: "git".into(),
659 code: 1,
660 stdout: String::new(),
661 stderr: "Automatic merge failed; fix conflicts and then commit".into(),
662 };
663 let unrelated = Error::Exit {
664 program: "git".into(),
665 code: 128,
666 stdout: String::new(),
667 stderr: "fatal: not a git repository".into(),
668 };
669 assert!(is_merge_conflict(&on_stdout));
670 assert!(is_merge_conflict(&on_stderr));
671 assert!(!is_merge_conflict(&unrelated));
672 assert!(!is_nothing_to_commit(&on_stdout));
673 }
674
675 #[test]
676 fn classifies_nothing_to_commit_and_transient_fetch() {
677 let nothing = Error::Exit {
678 program: "git".into(),
679 code: 1,
680 stdout: "nothing to commit, working tree clean".into(),
681 stderr: String::new(),
682 };
683 assert!(is_nothing_to_commit(¬hing));
684
685 let dns = Error::Exit {
686 program: "git".into(),
687 code: 128,
688 stdout: String::new(),
689 stderr: "fatal: unable to access 'https://x/': Could not resolve host: x".into(),
690 };
691 assert!(is_transient_fetch_error(&dns));
692 assert!(!is_transient_fetch_error(¬hing));
693
694 // A processkit timeout is transient too. (As of processkit 0.10 a `Timeout`
695 // carries whatever partial `stdout`/`stderr` was captured before the
696 // deadline; we still treat it as unconditionally retryable regardless.)
697 let timeout = Error::Timeout {
698 program: "git".into(),
699 timeout: Duration::from_secs(10),
700 stdout: String::new(),
701 stderr: String::new(),
702 };
703 assert!(is_transient_fetch_error(&timeout));
704 }
705
706 // R9: an io-level transient from the spawn (EINTR / EAGAIN / busy) is fetch-
707 // retryable too, via processkit's `Error::is_transient()`.
708 #[test]
709 fn classifies_io_transient_as_fetch_retryable() {
710 let interrupted = Error::Spawn {
711 program: "git".into(),
712 source: std::io::Error::from(std::io::ErrorKind::Interrupted),
713 };
714 assert!(
715 interrupted.is_transient(),
716 "processkit treats Interrupted as a transient io error"
717 );
718 assert!(is_transient_fetch_error(&interrupted));
719 // A non-transient io error (e.g. NotFound — the binary is missing) is not retried.
720 let missing = Error::Spawn {
721 program: "git".into(),
722 source: std::io::Error::from(std::io::ErrorKind::NotFound),
723 };
724 assert!(!is_transient_fetch_error(&missing));
725 }
726
727 // R2: regression for the processkit 0.9.1 untruncated-`Error::Exit` fix. A large
728 // output (well past the old 4 KiB cap) with the decisive marker near the END must
729 // still classify — proving the classifiers see the whole captured stream.
730 #[test]
731 fn classifies_on_large_output_past_the_old_4kib_cap() {
732 let padding = "noise line that says nothing\n".repeat(500); // ~14 KiB
733 let conflict = Error::Exit {
734 program: "git".into(),
735 code: 1,
736 stdout: format!("{padding}CONFLICT (content): Merge conflict in late.rs"),
737 stderr: String::new(),
738 };
739 assert!(
740 is_merge_conflict(&conflict),
741 "a conflict marker past 4 KiB must still classify"
742 );
743
744 let transient = Error::Exit {
745 program: "git".into(),
746 code: 128,
747 stdout: String::new(),
748 stderr: format!("{padding}fatal: unable to access: Could not resolve host: x"),
749 };
750 assert!(is_transient_fetch_error(&transient));
751 }
752
753 // processkit's `Error` is `#[non_exhaustive]` and grows variants over time
754 // (`NotReady`/`Unsupported`/`CassetteMiss`/`NotFound`/`Signalled`/`Cancelled`/
755 // `ResourceLimit`). Unfamiliar variants must fall through every classifier to
756 // "no" — a not-ready or unsupported run is neither a conflict, nor a clean
757 // tree, nor worth a fetch retry.
758 #[test]
759 fn unfamiliar_error_variants_are_not_classified() {
760 let not_ready = Error::NotReady {
761 program: "git".into(),
762 timeout: Duration::from_secs(5),
763 };
764 let unsupported = Error::Unsupported {
765 operation: "suspend".into(),
766 };
767 for err in [¬_ready, &unsupported] {
768 assert!(!is_merge_conflict(err));
769 assert!(!is_nothing_to_commit(err));
770 assert!(!is_transient_fetch_error(err));
771 }
772 }
773
774 // `Error::Cancelled` (a client-level `default_cancel_on` killing an in-flight
775 // run; always available since cancellation became core in processkit 0.10) must
776 // fall through every classifier to "no" — a cancelled fetch was *deliberately*
777 // stopped, so replaying it would fight the cancellation. (Behaviour already held
778 // via the `#[non_exhaustive]` fall-through above; this pins it as a first-class
779 // assertion.)
780 #[test]
781 fn cancelled_is_not_transient_or_otherwise_classified() {
782 let cancelled = Error::Cancelled {
783 program: "git".into(),
784 };
785 assert!(!is_transient_fetch_error(&cancelled));
786 assert!(!is_merge_conflict(&cancelled));
787 assert!(!is_nothing_to_commit(&cancelled));
788 }
789
790 // `Error::Signalled` (a process killed by a signal — e.g. an external SIGTERM/
791 // SIGKILL, surfaced first-class since processkit 0.9.2 and carrying partial
792 // `stdout`/`stderr` since 0.10) is *terminal*, not transient: a deliberate kill
793 // should not be auto-retried, and a signal death is neither a merge conflict nor
794 // a clean tree. processkit's own `is_transient()` agrees (false for `Signalled`),
795 // so it falls through every classifier to "no" — pinned here, including the case
796 // where the captured stderr happens to contain an otherwise-transient marker (a
797 // killed fetch is still not ours to silently replay).
798 #[test]
799 fn signalled_is_terminal_not_transient() {
800 let signalled = Error::Signalled {
801 program: "git".into(),
802 signal: Some(15),
803 stdout: String::new(),
804 stderr: "fatal: unable to access: Could not resolve host: x".into(),
805 };
806 assert!(!signalled.is_transient());
807 assert!(!is_transient_fetch_error(&signalled));
808 assert!(!is_merge_conflict(&signalled));
809 assert!(!is_nothing_to_commit(&signalled));
810 }
811
812 fn exit(program: &str, code: i32, stderr: &str) -> Error {
813 Error::Exit {
814 program: program.into(),
815 code,
816 stdout: String::new(),
817 stderr: stderr.into(),
818 }
819 }
820
821 // `is_lock_contention` recognises ONLY the *whole-repo* / working-copy lock
822 // failures (git index.lock, jj working-copy/op-heads lock) — the ones where the
823 // command did nothing, so a retry is idempotent even on a mutation. Per-ref lock
824 // failures and conflicts/timeouts are deliberately NOT classified (a multi-ref
825 // op can fail a ref lock mid-way, where a retry would not be idempotent).
826 #[test]
827 fn classifies_lock_contention() {
828 let lock_failures = [
829 exit(
830 "git",
831 128,
832 "fatal: Unable to create '/r/.git/index.lock': File exists.",
833 ),
834 exit(
835 "git",
836 128,
837 "Another git process seems to be running in this repository",
838 ),
839 exit("jj", 1, "Error: Failed to lock the working copy"),
840 exit("jj", 1, "Error: Failed to lock op heads"),
841 ];
842 for e in &lock_failures {
843 assert!(is_lock_contention(e), "should be lock contention: {e:?}");
844 // A lock failure is NOT a transient *fetch* error — different class.
845 assert!(!is_transient_fetch_error(e), "not a fetch error: {e:?}");
846 }
847 let not_locks = [
848 exit("git", 1, "CONFLICT (content): Merge conflict in a.rs"),
849 exit("git", 1, "error: pathspec 'x' did not match any file(s)"),
850 exit("git", 128, "fatal: not a git repository"),
851 // Per-ref locks are NOT classified — a multi-ref push/fetch can fail a
852 // ref lock after earlier refs already moved (non-idempotent to replay).
853 exit(
854 "git",
855 1,
856 "error: cannot lock ref 'refs/heads/x': reference already exists",
857 ),
858 exit(
859 "git",
860 128,
861 "Unable to create '/r/.git/packed-refs.lock': File exists.",
862 ),
863 Error::Timeout {
864 program: "git".into(),
865 timeout: Duration::from_secs(1),
866 stdout: String::new(),
867 stderr: String::new(),
868 },
869 ];
870 for e in ¬_locks {
871 assert!(
872 !is_lock_contention(e),
873 "should NOT be lock contention: {e:?}"
874 );
875 }
876 }
877
878 // Backoff is exponential off the base, capped at `max_backoff`, and zero when
879 // there's no base (immediate retry).
880 #[test]
881 fn backoff_is_exponential_capped_and_zero_without_base() {
882 let p = RetryPolicy::none()
883 .attempts(6)
884 .base_backoff(Duration::from_millis(10))
885 .max_backoff(Duration::from_millis(80));
886 assert_eq!(backoff_for(&p, 0), Duration::from_millis(10));
887 assert_eq!(backoff_for(&p, 1), Duration::from_millis(20));
888 assert_eq!(backoff_for(&p, 2), Duration::from_millis(40));
889 assert_eq!(backoff_for(&p, 3), Duration::from_millis(80));
890 assert_eq!(
891 backoff_for(&p, 4),
892 Duration::from_millis(80),
893 "capped at max"
894 );
895 assert_eq!(
896 backoff_for(&RetryPolicy::none(), 3),
897 Duration::ZERO,
898 "no base → no wait"
899 );
900 }
901
902 // Full jitter (used by `RetryPolicy::lock_contention`): every sampled backoff
903 // stays within `[0, exponential cap]`, and successive samples de-correlate
904 // (more than one distinct value) so retries don't thunder together. Pins the
905 // jitter path, which the exponential test above deliberately turns off.
906 #[test]
907 fn jitter_stays_within_cap_and_decorrelates() {
908 let p = RetryPolicy::none()
909 .attempts(8)
910 .base_backoff(Duration::from_millis(10))
911 .max_backoff(Duration::from_millis(80))
912 .with_jitter(true);
913 // The cap at retry_index 3 is the full 80ms exponential value.
914 let cap = Duration::from_millis(80);
915 let mut seen = std::collections::HashSet::new();
916 for _ in 0..1000 {
917 let d = backoff_for(&p, 3);
918 assert!(
919 d <= cap,
920 "jittered backoff {d:?} must stay within the cap {cap:?}"
921 );
922 seen.insert(d.as_nanos());
923 }
924 assert!(
925 seen.len() > 1,
926 "full jitter must produce a spread of delays, not a constant"
927 );
928 // A zero base still short-circuits to zero even with jitter on.
929 assert_eq!(
930 backoff_for(&RetryPolicy::none().with_jitter(true), 2),
931 Duration::ZERO
932 );
933 }
934
935 // The executor: retries while the predicate matches and attempts remain, returns
936 // the first Ok, doesn't retry a non-matching error, and exhausts to the last Err.
937 #[tokio::test]
938 async fn retry_async_retries_then_succeeds_and_respects_the_predicate() {
939 use std::sync::atomic::{AtomicU32, Ordering};
940 // Zero backoff → no sleep, deterministic & fast.
941 let policy = RetryPolicy::none().attempts(4);
942 let lock = || {
943 exit(
944 "git",
945 128,
946 "Unable to create '/r/.git/index.lock': File exists.",
947 )
948 };
949
950 // Fails twice with a lock error, then succeeds — retried to success.
951 let calls = AtomicU32::new(0);
952 let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
953 let n = calls.fetch_add(1, Ordering::SeqCst);
954 let lock = lock();
955 async move { if n < 2 { Err(lock) } else { Ok(n) } }
956 })
957 .await;
958 assert_eq!(out.unwrap(), 2);
959 assert_eq!(calls.load(Ordering::SeqCst), 3, "1 try + 2 retries");
960
961 // A non-lock error is returned immediately (not retried).
962 let calls = AtomicU32::new(0);
963 let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
964 calls.fetch_add(1, Ordering::SeqCst);
965 async { Err(exit("git", 1, "real, deterministic failure")) }
966 })
967 .await;
968 assert!(out.is_err());
969 assert_eq!(
970 calls.load(Ordering::SeqCst),
971 1,
972 "non-retryable → single attempt"
973 );
974
975 // Persistent lock contention exhausts the attempt budget.
976 let calls = AtomicU32::new(0);
977 let out: Result<u32> = retry_async(&policy, is_lock_contention, || {
978 calls.fetch_add(1, Ordering::SeqCst);
979 async { Err(exit("git", 128, "index.lock': File exists")) }
980 })
981 .await;
982 assert!(out.is_err());
983 assert_eq!(calls.load(Ordering::SeqCst), 4, "all attempts used");
984 }
985
986 // `resolve_credential` returns `None` until a provider is attached, then the
987 // provider's credential. (No process is spawned, so the real runner is fine.)
988 #[tokio::test]
989 async fn retrying_client_resolves_credential_opt_in() {
990 let client = ManagedClient::new("git");
991 assert!(!client.has_credentials());
992 assert!(
993 client
994 .resolve_credential(CredentialService::Git, None)
995 .await
996 .unwrap()
997 .is_none(),
998 "no provider → ambient (None)"
999 );
1000
1001 let client = client.with_credentials(Arc::new(StaticCredential::token("t0k")));
1002 assert!(client.has_credentials());
1003 let got = client
1004 .resolve_credential(CredentialService::Git, None)
1005 .await
1006 .unwrap()
1007 .expect("provider yields a credential");
1008 assert_eq!(got.secret().expose(), "t0k");
1009 }
1010
1011 // An empty (or whitespace-only) secret is treated as `None` (ambient):
1012 // injecting an empty token would override the ambient login with nothing
1013 // instead of deferring to it. Mirrors `EnvToken`'s whitespace-only ⇒ unset rule.
1014 #[tokio::test]
1015 async fn resolve_credential_treats_empty_secret_as_ambient() {
1016 // Service-agnostic: both the forge (token-env) and git (helper) paths route
1017 // through this chokepoint, so a blank secret is ambient for either.
1018 for blank in ["", " ", "\t\n"] {
1019 let client = ManagedClient::new("git")
1020 .with_credentials(Arc::new(StaticCredential::token(blank)));
1021 for service in [CredentialService::GitHub, CredentialService::Git] {
1022 assert!(
1023 client
1024 .resolve_credential(service, None)
1025 .await
1026 .unwrap()
1027 .is_none(),
1028 "blank secret {blank:?} → ambient (None) for {service:?}"
1029 );
1030 }
1031 }
1032 }
1033}