Skip to main content

entelix_core/
error.rs

1//! Top-level error type for `entelix-core` and the public API surface of the
2//! facade crate.
3//!
4//! Conventions (see CLAUDE.md §"Error conventions"):
5//! - Public crate APIs surface `entelix_core::Error`. Module-internal errors
6//!   are typed enums (e.g. `CodecError`) that bubble up via `From` chains.
7//! - Provider failures carry a typed `kind: ProviderErrorKind` (Network /
8//!   Tls / Dns / Http(status)) — retry classifiers branch on the typed
9//!   signal, not on parsed strings.
10//! - `Result<T> = std::result::Result<T, Error>`.
11
12use std::borrow::Cow;
13use std::time::Duration;
14
15use crate::auth::AuthError;
16
17/// Convenience alias used across `entelix-core` and re-exported by the facade.
18pub type Result<T> = core::result::Result<T, Error>;
19
20/// Aggregate error returned from public entelix-core APIs.
21#[derive(Debug, thiserror::Error)]
22#[non_exhaustive]
23pub enum Error {
24    /// Caller supplied an invalid request before any provider was contacted —
25    /// e.g. empty message list, missing required field, schema mismatch.
26    #[error("invalid request: {0}")]
27    InvalidRequest(Cow<'static, str>),
28
29    /// Configuration error detected at construction time (builders, factories,
30    /// crate-init code).
31    #[error("config error: {0}")]
32    Config(Cow<'static, str>),
33
34    /// Provider failure. `kind` distinguishes transport-class
35    /// failures (network / TLS / DNS) from HTTP-class failures
36    /// (4xx / 5xx) so retry classifiers can act on the typed signal
37    /// rather than parsing strings or reading a `status: 0`
38    /// sentinel. `retry_after` carries the vendor's `Retry-After`
39    /// hint when present — the retry layer honours it ahead of its
40    /// own backoff (invariant #17 — vendor authoritative signal
41    /// beats self-jitter).
42    #[error("provider {kind}: {message}")]
43    Provider {
44        /// Failure category — `Network`, `Tls`, `Dns`, or
45        /// `Http(status)`.
46        kind: ProviderErrorKind,
47        /// Provider-supplied message, normalized to a string.
48        message: String,
49        /// Vendor `Retry-After` hint when present. Capped at the
50        /// transport's parsing limit so a malicious vendor cannot
51        /// pin a retry loop forever.
52        #[allow(dead_code)]
53        retry_after: Option<Duration>,
54        /// Underlying error (transport / parser / signer) preserved
55        /// for operator diagnostics. Walk it via [`std::error::Error::source`]
56        /// or `{:?}`; the LLM-facing channel never sees it
57        /// (invariant 16 — `LlmRenderable::render_for_llm` strips
58        /// source chains).
59        #[source]
60        source: Option<Box<dyn std::error::Error + Send + Sync + 'static>>,
61    },
62
63    /// The operation was cancelled via the `ExecutionContext` cancellation token.
64    #[error("operation cancelled")]
65    Cancelled,
66
67    /// The operation hit the deadline carried by `ExecutionContext`.
68    #[error("deadline exceeded")]
69    DeadlineExceeded,
70
71    /// A dispatch (tool body, graph node, or middleware layer) requested
72    /// human-in-the-loop intervention. The runtime catches this,
73    /// persists a checkpoint at the pre-dispatch state, and returns
74    /// `kind` + `payload` to the caller. Resume with
75    /// `entelix_graph::CompiledGraph::resume_with`.
76    ///
77    /// See [`crate::interruption::InterruptionKind`] for the typed
78    /// reason taxonomy and [`crate::interrupt`] /
79    /// [`crate::interrupt_with`] for the canonical raise sites.
80    #[error("dispatch interrupted for human review")]
81    Interrupted {
82        /// Typed reason — `Custom` for operator-defined pauses,
83        /// `ApprovalPending { tool_use_id }` for tool-approval
84        /// pauses raised by `ApprovalLayer`, or any future SDK
85        /// variant. Operator match sites should carry a fall-through
86        /// `_` arm.
87        kind: crate::interruption::InterruptionKind,
88        /// Operator free-form data describing what the resumer needs
89        /// to know. For typed kinds this is often `Value::Null`; for
90        /// `Custom` it carries whatever `interrupt(payload)` passed.
91        payload: serde_json::Value,
92    },
93
94    /// A validator (typed-output `OutputValidator`, tool body, hook)
95    /// requested the model retry the current turn with corrective
96    /// feedback. Distinct from [`Self::Provider`] (transport
97    /// retries — wire-level failure) and [`Self::InvalidRequest`]
98    /// (operator misuse) so retry classifiers, OTel dashboards, and
99    /// budget meters all branch on a typed signal.
100    ///
101    /// Catch-and-resume semantics: the surrounding agent or
102    /// `complete_typed<O>` loop catches this variant, appends a
103    /// `RetryPromptPart` to the conversation carrying `hint`, and
104    /// re-invokes the model — counting one increment against
105    /// `ChatModelConfig::validation_retries`. Operators that want to
106    /// raise this variant build it via [`Error::model_retry`] so
107    /// the `RenderedForLlm` funnel (invariant 16) cannot be
108    /// bypassed.
109    #[error("model retry requested (attempt {attempt})")]
110    ModelRetry {
111        /// Corrective text the loop will surface to the model on the
112        /// retried turn. The `RenderedForLlm` carrier ensures the
113        /// payload was filtered through the operator-controlled
114        /// rendering funnel rather than copied raw from a
115        /// vendor-side error string.
116        hint: crate::llm_facing::RenderedForLlm<String>,
117        /// Per-call attempt counter. The retry loop stamps this on
118        /// emit so the variant is self-describing without callers
119        /// tracking attempt state externally. The first retry sees
120        /// `attempt = 1`.
121        attempt: u32,
122    },
123
124    /// JSON serialization or deserialization failed at an entelix-managed
125    /// boundary (codec, tool I/O, persistence write/read).
126    #[error(transparent)]
127    Serde(#[from] serde_json::Error),
128
129    /// Credential resolution or use failed. Distinct from
130    /// [`Self::Provider`] so retry policies and dashboards can
131    /// distinguish "the model is down" from "our key is bad" without
132    /// pattern-matching on error messages.
133    #[error(transparent)]
134    Auth(AuthError),
135
136    /// A `RunBudget` axis was exceeded — request count, token
137    /// totals, or tool calls hit the configured limit. The
138    /// `axis` field identifies which axis fired; `limit` is the
139    /// configured cap; `observed` is the value that breached it.
140    /// Distinct from [`Self::Provider`] so retry classifiers can
141    /// short-circuit (a budget breach does not retry) and from
142    /// [`Self::InvalidRequest`] so dashboards see the budget
143    /// signal as a first-class category.
144    /// A [`crate::RunBudget`] axis was exceeded. The typed
145    /// [`crate::run_budget::UsageLimitBreach`] enum carries both
146    /// the breaching axis and its magnitude in one variant — axis
147    /// and magnitude are paired by construction so consumers
148    /// pattern-match a single value rather than checking the axis
149    /// to know which numeric type to read.
150    #[error("{0}")]
151    UsageLimitExceeded(crate::run_budget::UsageLimitBreach),
152
153    /// A classified tool-dispatch failure was escalated to terminal
154    /// by [`crate::tools::ToolErrorPolicyLayer`]. The reasoning loop
155    /// catches this variant on the same code path as
156    /// [`Self::Interrupted`] — propagate to the caller without
157    /// re-prompting the model. The terminal routing decision is
158    /// orthogonal to wire identity: [`Self::envelope`] delegates to
159    /// the inner source, so dashboards keying off `wire_code` see
160    /// the underlying category (`"upstream_unauthorized"` for
161    /// [`crate::tools::ToolErrorKind::Auth`], etc.) and the
162    /// "terminal" classification surfaces on a separate axis
163    /// (`AgentEvent::Failed::kind`).
164    #[error("tool {tool_name} terminal ({kind}): {source}")]
165    ToolErrorTerminal {
166        /// Classified category of the underlying failure
167        /// (`ToolErrorKind::classify(&source)` at the wrap site).
168        kind: crate::tools::ToolErrorKind,
169        /// Name of the dispatched tool that produced the failure.
170        tool_name: String,
171        /// The original error preserved for operator diagnostics.
172        /// Walked via [`std::error::Error::source`] / `{:?}`. The
173        /// LLM-facing channel never sees this — `for_llm` strips
174        /// source chains per invariant 16.
175        #[source]
176        source: Box<Self>,
177    },
178}
179
180impl Error {
181    /// Build an `InvalidRequest` from a static or owned string.
182    pub fn invalid_request(msg: impl Into<Cow<'static, str>>) -> Self {
183        Self::InvalidRequest(msg.into())
184    }
185
186    /// Build a `Config` error from a static or owned string.
187    pub fn config(msg: impl Into<Cow<'static, str>>) -> Self {
188        Self::Config(msg.into())
189    }
190
191    /// Build a [`Self::ModelRetry`] from an LLM-rendered hint. The
192    /// `attempt` counter starts at zero and is incremented by the
193    /// surrounding retry loop on each emit; validators / tools
194    /// raising this variant from a fresh call site pass `0` and
195    /// trust the loop to stamp the running counter.
196    ///
197    /// Construction goes through [`crate::llm_facing::RenderedForLlm`] so the
198    /// hint is not a free-form `String` — the typed carrier ensures
199    /// the message has been routed through the operator's rendering
200    /// funnel (invariant 16). Consumers raising this variant from a
201    /// validator typically obtain the rendered hint via
202    /// `LlmRenderable::for_llm`.
203    pub const fn model_retry(
204        hint: crate::llm_facing::RenderedForLlm<String>,
205        attempt: u32,
206    ) -> Self {
207        Self::ModelRetry { hint, attempt }
208    }
209
210    /// Build an HTTP-class provider error. Use the `_network` /
211    /// `_tls` / `_dns` variants for transport-class failures so
212    /// retry classifiers see the typed signal rather than a
213    /// stringly-typed status code.
214    ///
215    /// Status `0` / 1xx / 2xx / 3xx / ≥600 do **not** represent a
216    /// terminal vendor response. The constructor coerces them to
217    /// [`ProviderErrorKind::Network`] so retry classifiers, wire
218    /// codes, and dashboards see "we never received a terminal
219    /// response" rather than a plausible-looking `upstream_error`
220    /// (invariant 15).
221    ///
222    /// Synthetic-message form: use when the message is composed
223    /// from vendor body fields (no source error). For
224    /// [`std::error::Error`]-bearing failures, prefer
225    /// [`Self::provider_http_from`] which preserves the source
226    /// chain.
227    pub fn provider_http(status: u16, message: impl Into<String>) -> Self {
228        Self::Provider {
229            kind: http_or_network(status),
230            message: message.into(),
231            retry_after: None,
232            source: None,
233        }
234    }
235
236    /// Build an HTTP-class provider error from any
237    /// [`std::error::Error`]. Message is `err.to_string()`; the
238    /// original error is preserved as `#[source]`. Status coercion
239    /// follows [`Self::provider_http`] — non-4xx/5xx statuses
240    /// surface as [`ProviderErrorKind::Network`].
241    pub fn provider_http_from<E>(status: u16, err: E) -> Self
242    where
243        E: std::error::Error + Send + Sync + 'static,
244    {
245        Self::Provider {
246            kind: http_or_network(status),
247            message: err.to_string(),
248            retry_after: None,
249            source: Some(Box::new(err)),
250        }
251    }
252
253    /// Build a network-class provider error (connect refused, read
254    /// reset, peer hangup before HTTP framing). Distinguishes
255    /// "vendor returned a 5xx" from "we never spoke to vendor".
256    ///
257    /// Synthetic-message form: use when no source error exists
258    /// (e.g. vendor wire-format prose lifted from a JSON body).
259    /// Source-bearing form: [`Self::provider_network_from`] derives
260    /// the message from the source's `Display` and stores the source
261    /// for `{:?}` walks (preferred for `map_err` chains).
262    pub fn provider_network(message: impl Into<String>) -> Self {
263        Self::Provider {
264            kind: ProviderErrorKind::Network,
265            message: message.into(),
266            retry_after: None,
267            source: None,
268        }
269    }
270
271    /// Build a network-class provider error from any
272    /// [`std::error::Error`]. Message is `err.to_string()`; the
273    /// original error is preserved as `#[source]` so operator
274    /// diagnostics walk the full chain. Pairs with `.map_err`:
275    ///
276    /// ```ignore
277    /// http_req.send().await.map_err(Error::provider_network_from)?;
278    /// ```
279    pub fn provider_network_from<E>(err: E) -> Self
280    where
281        E: std::error::Error + Send + Sync + 'static,
282    {
283        Self::Provider {
284            kind: ProviderErrorKind::Network,
285            message: err.to_string(),
286            retry_after: None,
287            source: Some(Box::new(err)),
288        }
289    }
290
291    /// Build a TLS-class provider error (handshake failure,
292    /// certificate validation, protocol mismatch).
293    pub fn provider_tls(message: impl Into<String>) -> Self {
294        Self::Provider {
295            kind: ProviderErrorKind::Tls,
296            message: message.into(),
297            retry_after: None,
298            source: None,
299        }
300    }
301
302    /// Build a TLS-class provider error from any
303    /// [`std::error::Error`]. Message is `err.to_string()`; the
304    /// original error is preserved as `#[source]`.
305    pub fn provider_tls_from<E>(err: E) -> Self
306    where
307        E: std::error::Error + Send + Sync + 'static,
308    {
309        Self::Provider {
310            kind: ProviderErrorKind::Tls,
311            message: err.to_string(),
312            retry_after: None,
313            source: Some(Box::new(err)),
314        }
315    }
316
317    /// Build a DNS-class provider error (name resolution failure,
318    /// SSRF allowlist rejection at the resolver).
319    pub fn provider_dns(message: impl Into<String>) -> Self {
320        Self::Provider {
321            kind: ProviderErrorKind::Dns,
322            message: message.into(),
323            retry_after: None,
324            source: None,
325        }
326    }
327
328    /// Build a DNS-class provider error from any
329    /// [`std::error::Error`]. Message is `err.to_string()`; the
330    /// original error is preserved as `#[source]`.
331    pub fn provider_dns_from<E>(err: E) -> Self
332    where
333        E: std::error::Error + Send + Sync + 'static,
334    {
335        Self::Provider {
336            kind: ProviderErrorKind::Dns,
337            message: err.to_string(),
338            retry_after: None,
339            source: Some(Box::new(err)),
340        }
341    }
342
343    /// Attach a `Retry-After` duration to a provider error. The
344    /// duration arrives from the vendor's `Retry-After` response
345    /// header (or equivalent body field). Returns `self` unchanged
346    /// for non-`Provider` variants — callers know the variant they
347    /// constructed, so this is `Self -> Self` rather than a typed
348    /// projection.
349    #[must_use]
350    pub fn with_retry_after(mut self, duration: Duration) -> Self {
351        if let Self::Provider {
352            ref mut retry_after,
353            ..
354        } = self
355        {
356            *retry_after = Some(duration);
357        }
358        self
359    }
360
361    /// Attach the underlying error as the `Provider` variant's source
362    /// chain, preserving root-cause context for operator diagnostics
363    /// (`{:?}` / [`std::error::Error::source`] walk). Returns `self`
364    /// unchanged for non-`Provider` variants.
365    ///
366    /// Channel-separation guarantee (invariant 16): the source chain
367    /// is operator-only. [`crate::LlmRenderable::render_for_llm`]
368    /// strips it for LLM-facing renderings; sinks / OTel / logs keep
369    /// the full diagnostic.
370    #[must_use]
371    pub fn with_source<E>(mut self, err: E) -> Self
372    where
373        E: std::error::Error + Send + Sync + 'static,
374    {
375        if let Self::Provider { ref mut source, .. } = self {
376            *source = Some(Box::new(err));
377        }
378        self
379    }
380
381    /// Typed wire shape for this error — the **single canonical
382    /// inspector** integrators read at sink / SSE / audit boundaries
383    /// instead of parsing `Display` output. `ErrorEnvelope` is `Copy`,
384    /// so call sites cache or pass-by-value without ceremony.
385    ///
386    /// Guarantees (patch-version stable, mirrored on `ErrorEnvelope`'s
387    /// own doc-comment):
388    /// - `wire_code` is a snake-case ASCII `&'static str` suitable as
389    ///   an i18n key, metric label, or typed-wire-envelope key. Adding
390    ///   a new [`Error`] variant adds a new code; existing codes are
391    ///   forever-stable.
392    /// - `wire_class` is the coarse responsibility split (`Client` for
393    ///   caller-actionable failures, `Server` for SDK/vendor-side
394    ///   failures). Orthogonal to retryability.
395    /// - `retry_after_secs` carries the vendor's `Retry-After` hint
396    ///   converted to whole seconds when the originating
397    ///   [`Self::Provider`] error captured one; `None` for every other
398    ///   variant or Provider error without a hint.
399    /// - `provider_status` carries the raw HTTP status when the error
400    ///   is `Provider` with [`ProviderErrorKind::Http`]; `None`
401    ///   otherwise. Lets sinks/audit retain `429 vs 503` granularity
402    ///   even though `wire_code` collapses them onto coarse buckets.
403    ///
404    /// HTTP provider failures bucket on the status family for
405    /// `wire_code` so vendor drift (a new 4xx) absorbs into the right
406    /// class without an SDK release; the raw status is still observable
407    /// through `provider_status` for operators that want the exact
408    /// signal.
409    pub fn envelope(&self) -> ErrorEnvelope {
410        // ToolErrorTerminal is a routing-decision wrapper, not a
411        // distinct wire category — the inner source's envelope is
412        // the authoritative wire shape. Dashboards keying off
413        // `wire_code` see `"upstream_unauthorized"` for an Auth
414        // failure regardless of whether it terminated the loop;
415        // the terminal flag rides on `AgentEvent::Failed::kind`.
416        if let Self::ToolErrorTerminal { source, .. } = self {
417            return source.envelope();
418        }
419        let (wire_code, wire_class) = self.wire_signal();
420        let (retry_after_secs, provider_status) = match self {
421            Self::Provider {
422                kind, retry_after, ..
423            } => (
424                retry_after.map(|d| d.as_secs()),
425                match kind {
426                    ProviderErrorKind::Http(status) => Some(*status),
427                    _ => None,
428                },
429            ),
430            _ => (None, None),
431        };
432        ErrorEnvelope {
433            wire_code,
434            wire_class,
435            retry_after_secs,
436            provider_status,
437        }
438    }
439
440    /// Wrap a classified tool failure into [`Self::ToolErrorTerminal`].
441    /// `kind` is the operator-policy decision input
442    /// (`ToolErrorKind::classify(&source)` at the wrap site);
443    /// `tool_name` echoes [`crate::ToolInvocation::name`] for
444    /// downstream diagnostics. The source is moved into a `Box` so
445    /// the variant stays small on the stack.
446    pub fn tool_error_terminal(
447        kind: crate::tools::ToolErrorKind,
448        tool_name: impl Into<String>,
449        source: Self,
450    ) -> Self {
451        Self::ToolErrorTerminal {
452            kind,
453            tool_name: tool_name.into(),
454            source: Box::new(source),
455        }
456    }
457
458    /// Internal matcher producing the `(wire_code, wire_class)` pair.
459    /// Single match arm per [`Error`] variant keeps the two signals
460    /// from drifting apart on future variant additions — they're
461    /// chosen together, not from two parallel `match` expressions.
462    fn wire_signal(&self) -> (&'static str, ErrorClass) {
463        match self {
464            Self::InvalidRequest(_) => ("invalid_request", ErrorClass::Client),
465            Self::Config(_) => ("config_error", ErrorClass::Server),
466            Self::Provider { kind, .. } => match kind {
467                ProviderErrorKind::Network => ("transport_failure", ErrorClass::Server),
468                ProviderErrorKind::Tls => ("tls_failure", ErrorClass::Server),
469                ProviderErrorKind::Dns => ("dns_failure", ErrorClass::Server),
470                ProviderErrorKind::Http(status) => match *status {
471                    429 => ("rate_limited", ErrorClass::Client),
472                    401 | 403 => ("upstream_unauthorized", ErrorClass::Client),
473                    s if (400..500).contains(&s) => ("upstream_invalid", ErrorClass::Client),
474                    s if (500..600).contains(&s) => ("upstream_unavailable", ErrorClass::Server),
475                    _ => ("upstream_error", ErrorClass::Server),
476                },
477            },
478            Self::Auth(_) => ("auth_failed", ErrorClass::Client),
479            Self::Cancelled => ("cancelled", ErrorClass::Client),
480            Self::DeadlineExceeded => ("deadline_exceeded", ErrorClass::Server),
481            Self::Interrupted { .. } => ("interrupted", ErrorClass::Client),
482            Self::ModelRetry { .. } => ("model_retry_exhausted", ErrorClass::Client),
483            Self::Serde(_) => ("serde", ErrorClass::Server),
484            Self::UsageLimitExceeded(_) => ("quota_exceeded", ErrorClass::Client),
485            // Unreachable in practice — `envelope` short-circuits to
486            // the inner source's envelope before reaching here. Kept
487            // for exhaustiveness so future direct callers of
488            // `wire_signal` (none today) cannot miss the delegation
489            // intent.
490            Self::ToolErrorTerminal { source, .. } => source.wire_signal(),
491        }
492    }
493}
494
495/// Typed wire shape of an [`Error`] — the canonical inspector at
496/// sink / SSE / audit boundaries. Built by [`Error::envelope`]; never
497/// constructed externally so the field set evolves under the same
498/// patch-version-stability guarantee as `wire_code` itself.
499///
500/// `Copy` is intentional: every field is 16 bytes or smaller. Carry
501/// the envelope by value through sink fan-out, OTel attribute
502/// stamping, and SSE serialisation without `.clone()` ceremony.
503///
504/// ## Field semantics
505///
506/// - `wire_code` — patch-version-stable snake-case `&'static str`
507///   bucketing the error onto an i18n / metric / typed-wire key. HTTP
508///   provider failures bucket on the status family so vendor drift
509///   does not require an SDK release.
510/// - `wire_class` — coarse responsibility split. `Client` for
511///   caller-actionable failures, `Server` for SDK/vendor-side
512///   failures. Orthogonal to retry semantics.
513/// - `retry_after_secs` — vendor `Retry-After` hint converted to
514///   whole seconds when the originating [`Error::Provider`] captured
515///   one. `None` for every other variant or for Provider errors that
516///   arrived without a hint. Sinks key SSE rate-limit timers /
517///   FE retry indicators off this field.
518/// - `provider_status` — raw HTTP status when the error is
519///   `Provider` with [`ProviderErrorKind::Http`]; `None` otherwise.
520///   Lets audit / replay retain `429 vs 503` granularity even though
521///   `wire_code` collapses them onto coarse buckets.
522#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, serde::Serialize)]
523#[non_exhaustive]
524pub struct ErrorEnvelope {
525    /// Patch-version-stable wire code. See type-level doc.
526    pub wire_code: &'static str,
527    /// Responsibility class. See type-level doc.
528    pub wire_class: ErrorClass,
529    /// Vendor `Retry-After` hint in seconds. See type-level doc.
530    #[serde(skip_serializing_if = "Option::is_none")]
531    pub retry_after_secs: Option<u64>,
532    /// Raw HTTP status for Provider/Http failures. See type-level doc.
533    #[serde(skip_serializing_if = "Option::is_none")]
534    pub provider_status: Option<u16>,
535}
536
537/// Coarse responsibility class for an [`Error`]. Two values by design —
538/// "transient" / "permanent" is a retry-policy axis, orthogonal to
539/// responsibility, and surfaced via [`Error::Provider`]'s
540/// `retry_after` field plus the `RetryClassifier` policy surface.
541///
542/// Maps onto the standard HTTP family split: `Client` ≈ 4xx-equivalent
543/// (caller / integrator can act to fix), `Server` ≈ 5xx-equivalent
544/// (vendor or deployment must act).
545///
546/// JSON serialisation produces `"client"` / `"server"` to match the
547/// [`std::fmt::Display`] form — wire dashboards keying off the lower-
548/// case bucket stay consistent across the OTel / SSE / audit surfaces.
549#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, serde::Serialize)]
550#[serde(rename_all = "lowercase")]
551#[non_exhaustive]
552pub enum ErrorClass {
553    /// The caller — request shape, credentials, quota, cancellation
554    /// choice — is the actor that can resolve the failure.
555    Client,
556    /// The SDK, vendor, or deployment environment is the actor that
557    /// can resolve the failure.
558    Server,
559}
560
561impl std::fmt::Display for ErrorClass {
562    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
563        match self {
564            Self::Client => f.write_str("client"),
565            Self::Server => f.write_str("server"),
566        }
567    }
568}
569
570/// Coerce a raw `u16` HTTP status into a typed
571/// [`ProviderErrorKind`]. 4xx / 5xx surface as
572/// [`ProviderErrorKind::Http`]; every other value collapses to
573/// [`ProviderErrorKind::Network`] because the SDK never received a
574/// terminal vendor response (invariant 15 — no silent fallback to
575/// a plausible-looking `upstream_error`).
576const fn http_or_network(status: u16) -> ProviderErrorKind {
577    if status >= 400 && status < 600 {
578        ProviderErrorKind::Http(status)
579    } else {
580        ProviderErrorKind::Network
581    }
582}
583
584/// Provider failure category — distinguishes transport-class
585/// failures (the SDK never received a complete HTTP framing) from
586/// HTTP-class failures (the vendor responded with a status). Retry
587/// classifiers use this to make typed decisions rather than
588/// pattern-matching on `status: 0` sentinels (invariant #17).
589#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
590#[non_exhaustive]
591pub enum ProviderErrorKind {
592    /// Connect refused, read reset, peer hangup before HTTP framing
593    /// completed.
594    Network,
595    /// TLS handshake failure, certificate validation failure,
596    /// protocol mismatch.
597    Tls,
598    /// DNS resolution failure or SSRF allowlist rejection at the
599    /// resolver.
600    Dns,
601    /// Vendor responded with an HTTP status. Carries the actual
602    /// numeric code so classifiers can branch on `408|425|429|5xx`.
603    Http(u16),
604}
605
606impl std::fmt::Display for ProviderErrorKind {
607    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
608        match self {
609            Self::Network => f.write_str("network"),
610            Self::Tls => f.write_str("tls"),
611            Self::Dns => f.write_str("dns"),
612            Self::Http(status) => write!(f, "returned {status}"),
613        }
614    }
615}