entelix_core/tools/error_kind.rs
1//! `ToolErrorKind` — tool-dispatch failure category derived from
2//! [`crate::Error`] for observability and retry classification.
3//!
4//! Tool authors return `Result<Value, Error>` from `Tool::execute`;
5//! the runtime classifies the error variant into one of these
6//! seven categories so observability sinks (`AgentEvent::ToolError`),
7//! retry middleware (`RetryToolLayer`), and recovery sinks all
8//! reach the same cross-tool taxonomy.
9//!
10//! Mirrors [`crate::ProviderErrorKind`] in shape (typed enum
11//! categorising failures) but operates at a higher level — provider
12//! kinds describe transport mechanisms, tool kinds describe the
13//! semantic outcome the operator (or the model) actually cares about.
14
15use crate::error::Error;
16
17/// Cross-tool failure category.
18///
19/// Derive from [`Error`] via [`Self::classify`]. Used for retry
20/// middleware (`RetryToolLayer` retries [`Self::Transient`] /
21/// [`Self::RateLimit`]), observability sinks (operators surface the
22/// category in dashboards), and downstream recovery routing
23/// (different categories trigger different operator responses —
24/// page on `Auth`, alert on `Quota`, ignore `Validation` noise).
25#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
26#[non_exhaustive]
27pub enum ToolErrorKind {
28 /// Network blip, transient 5xx, generic transport failure —
29 /// safe to retry.
30 Transient,
31 /// Vendor signalled rate limiting (429 with `Retry-After` hint).
32 /// Retryable after the configured cooldown.
33 RateLimit,
34 /// Vendor signalled exhausted quota / billing cap. Retry will
35 /// not succeed until the quota resets or operator intervenes —
36 /// surface to ops, do not retry automatically.
37 Quota,
38 /// Credential rejected (401 / 403 / [`Error::Auth`]). Retry will
39 /// not succeed until credentials are rotated.
40 Auth,
41 /// Permanent vendor failure (4xx other than auth/rate/quota,
42 /// 405, 410, 422 …). The same call will fail again.
43 Permanent,
44 /// Caller-side input rejected ([`Error::InvalidRequest`],
45 /// [`Error::Serde`]) — the operator's payload does not match
46 /// the tool contract. Retry is meaningless without changing the
47 /// payload.
48 Validation,
49 /// Tool-internal bug or misconfiguration ([`Error::Config`], or
50 /// any unclassified shape). Surface to ops; retry is not
51 /// meaningful.
52 Internal,
53}
54
55impl ToolErrorKind {
56 /// Derive the category from an [`Error`].
57 ///
58 /// The mapping is intentionally exhaustive over the variants
59 /// [`Error`] surfaces today — the `_` catch-all routes to
60 /// [`Self::Internal`] so future variants stay observable until
61 /// classified explicitly. Operational variants
62 /// ([`Error::Cancelled`], [`Error::DeadlineExceeded`],
63 /// [`Error::Interrupted`], [`Error::ModelRetry`]) flow through
64 /// `Internal` because they are agent-runtime control signals,
65 /// not tool failures — call sites that observe them should not
66 /// reach this classifier in the first place.
67 #[must_use]
68 pub fn classify(error: &Error) -> Self {
69 use crate::error::ProviderErrorKind;
70 match error {
71 Error::Provider {
72 kind: ProviderErrorKind::Network | ProviderErrorKind::Tls | ProviderErrorKind::Dns,
73 ..
74 } => Self::Transient,
75 Error::Provider {
76 kind: ProviderErrorKind::Http(429),
77 retry_after,
78 ..
79 } => {
80 // Vendor distinguishes 429-with-Retry-After (transient
81 // back-pressure) from 429-without (often quota
82 // exhaustion). The hint presence is the cue.
83 if retry_after.is_some() {
84 Self::RateLimit
85 } else {
86 Self::Quota
87 }
88 }
89 Error::Provider {
90 kind: ProviderErrorKind::Http(status),
91 ..
92 } => {
93 if *status == 401 || *status == 403 {
94 Self::Auth
95 } else if (500..600).contains(status) || *status == 408 || *status == 425 {
96 Self::Transient
97 } else {
98 Self::Permanent
99 }
100 }
101 Error::Auth(_) => Self::Auth,
102 Error::UsageLimitExceeded(_) => Self::Quota,
103 Error::InvalidRequest(_) | Error::Serde(_) => Self::Validation,
104 // Operational variants (Cancelled, DeadlineExceeded,
105 // Interrupted, ModelRetry) and any future shape route
106 // here together with Config — none of them are tool
107 // failures the operator can act on per-category.
108 _ => Self::Internal,
109 }
110 }
111
112 /// Whether the runtime should attempt the tool call again.
113 ///
114 /// `Transient` and `RateLimit` are retryable; everything else
115 /// is a surface-and-stop signal. `RetryToolLayer` consults this
116 /// via the underlying `RetryClassifier` (which can be
117 /// overridden per deployment) — operators that want different
118 /// retry policy install a custom classifier rather than mutating
119 /// this method.
120 #[must_use]
121 pub const fn is_retryable(self) -> bool {
122 matches!(self, Self::Transient | Self::RateLimit)
123 }
124}
125
126#[cfg(test)]
127#[allow(clippy::unwrap_used)]
128mod tests {
129 use super::*;
130 use std::time::Duration;
131
132 #[test]
133 fn provider_network_classifies_as_transient() {
134 let err = Error::provider_network("connect refused");
135 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
136 assert!(ToolErrorKind::classify(&err).is_retryable());
137 }
138
139 #[test]
140 fn provider_dns_classifies_as_transient() {
141 let err = Error::provider_dns("no such host");
142 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
143 }
144
145 #[test]
146 fn provider_5xx_classifies_as_transient() {
147 let err = Error::provider_http(503, "down");
148 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
149 let err = Error::provider_http(502, "bad gateway");
150 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
151 }
152
153 #[test]
154 fn http_408_and_425_classify_as_transient() {
155 // 408 Request Timeout, 425 Too Early — both retryable per
156 // spec semantics.
157 let err = Error::provider_http(408, "timeout");
158 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
159 let err = Error::provider_http(425, "too early");
160 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Transient);
161 }
162
163 #[test]
164 fn http_429_with_retry_after_classifies_as_rate_limit() {
165 let err = Error::provider_http(429, "slow down").with_retry_after(Duration::from_secs(5));
166 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::RateLimit);
167 assert!(ToolErrorKind::classify(&err).is_retryable());
168 }
169
170 #[test]
171 fn http_429_without_retry_after_classifies_as_quota() {
172 // Vendor signalling quota exhaustion typically omits
173 // `Retry-After` because the cooldown is a billing cycle,
174 // not a request window.
175 let err = Error::provider_http(429, "monthly cap reached");
176 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Quota);
177 assert!(!ToolErrorKind::classify(&err).is_retryable());
178 }
179
180 #[test]
181 fn http_401_403_classify_as_auth() {
182 let err = Error::provider_http(401, "unauthorized");
183 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Auth);
184 let err = Error::provider_http(403, "forbidden");
185 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Auth);
186 assert!(!ToolErrorKind::classify(&err).is_retryable());
187 }
188
189 #[test]
190 fn http_4xx_other_classifies_as_permanent() {
191 let err = Error::provider_http(404, "not found");
192 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Permanent);
193 let err = Error::provider_http(422, "unprocessable");
194 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Permanent);
195 assert!(!ToolErrorKind::classify(&err).is_retryable());
196 }
197
198 #[test]
199 fn invalid_request_and_serde_classify_as_validation() {
200 let err = Error::invalid_request("bad input");
201 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Validation);
202 let serde_err: serde_json::Error = serde_json::from_str::<i32>("not-a-number").unwrap_err();
203 let err: Error = serde_err.into();
204 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Validation);
205 }
206
207 #[test]
208 fn config_classifies_as_internal() {
209 let err = Error::config("misconfigured");
210 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Internal);
211 }
212
213 #[test]
214 fn usage_limit_exceeded_classifies_as_quota() {
215 use crate::run_budget::UsageLimitBreach;
216 let err = Error::UsageLimitExceeded(UsageLimitBreach::Requests {
217 limit: 10,
218 observed: 11,
219 });
220 assert_eq!(ToolErrorKind::classify(&err), ToolErrorKind::Quota);
221 }
222}