Skip to main content

entelix_core/tools/
error_kind.rs

1//! `ToolErrorKind` — tool-dispatch failure category derived from
2//! [`crate::Error`] for observability and retry classification.
3//!
4//! Tool authors return `Result<Value, Error>` from `Tool::execute`;
5//! the runtime classifies the error variant into one of these
6//! seven categories so observability sinks (`AgentEvent::ToolError`),
7//! retry middleware (`RetryToolLayer`), and recovery sinks all
8//! reach the same cross-tool taxonomy.
9//!
10//! Mirrors [`crate::ProviderErrorKind`] in shape (typed enum
11//! categorising failures) but operates at a higher level — provider
12//! kinds describe transport mechanisms, tool kinds describe the
13//! semantic outcome the operator (or the model) actually cares about.
14
15use crate::error::Error;
16
17/// Cross-tool failure category.
18///
19/// Derive from [`Error`] via [`Self::classify`]. Used for retry
20/// middleware (`RetryToolLayer` retries [`Self::Transient`] /
21/// [`Self::RateLimit`]), observability sinks (operators surface the
22/// category in dashboards), and downstream recovery routing
23/// (different categories trigger different operator responses —
24/// page on `Auth`, alert on `Quota`, ignore `Validation` noise).
25#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
26#[non_exhaustive]
27pub enum ToolErrorKind {
28    /// Network blip, transient 5xx, generic transport failure —
29    /// safe to retry.
30    Transient,
31    /// Vendor signalled rate limiting (429 with `Retry-After` hint).
32    /// Retryable after the configured cooldown.
33    RateLimit,
34    /// Vendor signalled exhausted quota / billing cap. Retry will
35    /// not succeed until the quota resets or operator intervenes —
36    /// surface to ops, do not retry automatically.
37    Quota,
38    /// Credential rejected (401 / 403 / [`Error::Auth`]). Retry will
39    /// not succeed until credentials are rotated.
40    Auth,
41    /// Permanent vendor failure (4xx other than auth/rate/quota,
42    /// 405, 410, 422 …). The same call will fail again.
43    Permanent,
44    /// Caller-side input rejected ([`Error::InvalidRequest`],
45    /// [`Error::Serde`]) — the operator's payload does not match
46    /// the tool contract. Retry is meaningless without changing the
47    /// payload.
48    Validation,
49    /// Tool-internal bug or misconfiguration ([`Error::Config`], or
50    /// any unclassified shape). Surface to ops; retry is not
51    /// meaningful.
52    Internal,
53}
54
55impl ToolErrorKind {
56    /// Derive the category from an [`Error`].
57    ///
58    /// The mapping is intentionally exhaustive over the variants
59    /// [`Error`] surfaces today — the `_` catch-all routes to
60    /// [`Self::Internal`] so future variants stay observable until
61    /// classified explicitly. Operational variants
62    /// ([`Error::Cancelled`], [`Error::DeadlineExceeded`],
63    /// [`Error::Interrupted`], [`Error::ModelRetry`]) flow through
64    /// `Internal` because they are agent-runtime control signals,
65    /// not tool failures — call sites that observe them should not
66    /// reach this classifier in the first place.
67    #[must_use]
68    pub fn classify(error: &Error) -> Self {
69        use crate::error::ProviderErrorKind;
70        match error {
71            Error::Provider {
72                kind: ProviderErrorKind::Network | ProviderErrorKind::Tls | ProviderErrorKind::Dns,
73                ..
74            } => Self::Transient,
75            Error::Provider {
76                kind: ProviderErrorKind::Http(429),
77                retry_after,
78                ..
79            } => {
80                // Vendor distinguishes 429-with-Retry-After (transient
81                // back-pressure) from 429-without (often quota
82                // exhaustion). The hint presence is the cue.
83                if retry_after.is_some() {
84                    Self::RateLimit
85                } else {
86                    Self::Quota
87                }
88            }
89            Error::Provider {
90                kind: ProviderErrorKind::Http(status),
91                ..
92            } => {
93                if *status == 401 || *status == 403 {
94                    Self::Auth
95                } else if (500..600).contains(status) || *status == 408 || *status == 425 {
96                    Self::Transient
97                } else {
98                    Self::Permanent
99                }
100            }
101            Error::Auth(_) => Self::Auth,
102            Error::UsageLimitExceeded(_) => Self::Quota,
103            Error::InvalidRequest(_) | Error::Serde(_) => Self::Validation,
104            // Operational variants (Cancelled, DeadlineExceeded,
105            // Interrupted, ModelRetry) and any future shape route
106            // here together with Config — none of them are tool
107            // failures the operator can act on per-category.
108            _ => Self::Internal,
109        }
110    }
111
112    /// Whether the runtime should attempt the tool call again.
113    ///
114    /// `Transient` and `RateLimit` are retryable; everything else
115    /// is a surface-and-stop signal. `RetryToolLayer` consults this
116    /// via the underlying `RetryClassifier` (which can be
117    /// overridden per deployment) — operators that want different
118    /// retry policy install a custom classifier rather than mutating
119    /// this method.
120    #[must_use]
121    pub const fn is_retryable(self) -> bool {
122        matches!(self, Self::Transient | Self::RateLimit)
123    }
124}
125
126#[cfg(test)]
127#[allow(clippy::unwrap_used)]
128mod tests {
129    use super::*;
130    use std::time::Duration;
131
132    #[test]
133    fn provider_network_classifies_as_transient() {
134        let err = Error::provider_network("connect refused");
135        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
136        assert!(ToolErrorKind::classify(&err).is_retryable());
137    }
138
139    #[test]
140    fn provider_dns_classifies_as_transient() {
141        let err = Error::provider_dns("no such host");
142        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
143    }
144
145    #[test]
146    fn provider_5xx_classifies_as_transient() {
147        let err = Error::provider_http(503, "down");
148        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
149        let err = Error::provider_http(502, "bad gateway");
150        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
151    }
152
153    #[test]
154    fn http_408_and_425_classify_as_transient() {
155        // 408 Request Timeout, 425 Too Early — both retryable per
156        // spec semantics.
157        let err = Error::provider_http(408, "timeout");
158        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
159        let err = Error::provider_http(425, "too early");
160        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
161    }
162
163    #[test]
164    fn http_429_with_retry_after_classifies_as_rate_limit() {
165        let err = Error::provider_http(429, "slow down").with_retry_after(Duration::from_secs(5));
166        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::RateLimit);
167        assert!(ToolErrorKind::classify(&err).is_retryable());
168    }
169
170    #[test]
171    fn http_429_without_retry_after_classifies_as_quota() {
172        // Vendor signalling quota exhaustion typically omits
173        // `Retry-After` because the cooldown is a billing cycle,
174        // not a request window.
175        let err = Error::provider_http(429, "monthly cap reached");
176        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Quota);
177        assert!(!ToolErrorKind::classify(&err).is_retryable());
178    }
179
180    #[test]
181    fn http_401_403_classify_as_auth() {
182        let err = Error::provider_http(401, "unauthorized");
183        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Auth);
184        let err = Error::provider_http(403, "forbidden");
185        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Auth);
186        assert!(!ToolErrorKind::classify(&err).is_retryable());
187    }
188
189    #[test]
190    fn http_4xx_other_classifies_as_permanent() {
191        let err = Error::provider_http(404, "not found");
192        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Permanent);
193        let err = Error::provider_http(422, "unprocessable");
194        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Permanent);
195        assert!(!ToolErrorKind::classify(&err).is_retryable());
196    }
197
198    #[test]
199    fn invalid_request_and_serde_classify_as_validation() {
200        let err = Error::invalid_request("bad input");
201        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Validation);
202        let serde_err: serde_json::Error = serde_json::from_str::<i32>("not-a-number").unwrap_err();
203        let err: Error = serde_err.into();
204        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Validation);
205    }
206
207    #[test]
208    fn config_classifies_as_internal() {
209        let err = Error::config("misconfigured");
210        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Internal);
211    }
212
213    #[test]
214    fn usage_limit_exceeded_classifies_as_quota() {
215        use crate::run_budget::UsageLimitBreach;
216        let err = Error::UsageLimitExceeded(UsageLimitBreach::Requests {
217            limit: 10,
218            observed: 11,
219        });
220        assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Quota);
221    }
222}