Skip to main content

zeph_tools/
error_taxonomy.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! 12-category tool invocation error taxonomy (arXiv:2601.16280).
5//!
6//! Provides fine-grained error classification beyond the binary `ErrorKind`
7//! (Transient/Permanent), enabling category-specific recovery strategies,
8//! structured LLM feedback, and quality-attributable reputation scoring.
9
10use crate::executor::ErrorKind;
11
12/// Fine-grained 12-category classification of tool invocation errors.
13///
14/// Each category determines retry eligibility, LLM parameter reformat path,
15/// quality attribution for reputation scoring, and structured feedback content.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, serde::Serialize)]
17pub enum ToolErrorCategory {
18    // ── Initialization failures ──────────────────────────────────────────
19    /// Tool name not found in the registry (LLM requested a non-existent tool).
20    ToolNotFound,
21
22    // ── Parameter failures ───────────────────────────────────────────────
23    /// LLM provided invalid or missing parameters for the tool.
24    InvalidParameters,
25    /// Parameter type mismatch (e.g., string where integer expected).
26    TypeMismatch,
27
28    // ── Permission / policy failures ─────────────────────────────────────
29    /// Blocked by security policy (blocklist, sandbox, trust gate).
30    PolicyBlocked,
31    /// Requires user confirmation before execution.
32    ConfirmationRequired,
33
34    // ── Execution failures (permanent) ───────────────────────────────────
35    /// HTTP 403/404 or equivalent permanent resource rejection.
36    PermanentFailure,
37    /// Operation cancelled by the user.
38    Cancelled,
39
40    // ── Execution failures (transient) ───────────────────────────────────
41    /// HTTP 429 (rate limit) or resource exhaustion.
42    RateLimited,
43    /// HTTP 5xx or equivalent server-side error.
44    ServerError,
45    /// Network connectivity failure (DNS, connection refused, reset).
46    NetworkError,
47    /// Operation timed out.
48    Timeout,
49}
50
51impl ToolErrorCategory {
52    /// Whether this error category is eligible for automatic retry with backoff.
53    #[must_use]
54    pub fn is_retryable(self) -> bool {
55        matches!(
56            self,
57            Self::RateLimited | Self::ServerError | Self::NetworkError | Self::Timeout
58        )
59    }
60
61    /// Whether the LLM should be asked to reformat parameters and retry.
62    ///
63    /// Only `InvalidParameters` and `TypeMismatch` trigger the reformat path.
64    /// A single reformat attempt is allowed; if it fails, the error is final.
65    #[must_use]
66    pub fn needs_parameter_reformat(self) -> bool {
67        matches!(self, Self::InvalidParameters | Self::TypeMismatch)
68    }
69
70    /// Whether this error is attributable to LLM output quality.
71    ///
72    /// Quality failures affect reputation scoring in triage routing and are the
73    /// only category for which `attempt_self_reflection` should be triggered.
74    /// Infrastructure errors (network, timeout, server, rate limit) are NOT
75    /// the model's fault and must never trigger self-reflection.
76    #[must_use]
77    pub fn is_quality_failure(self) -> bool {
78        matches!(
79            self,
80            Self::InvalidParameters | Self::TypeMismatch | Self::ToolNotFound
81        )
82    }
83
84    /// Coarse classification for backward compatibility with existing `ErrorKind`.
85    #[must_use]
86    pub fn error_kind(self) -> ErrorKind {
87        if self.is_retryable() {
88            ErrorKind::Transient
89        } else {
90            ErrorKind::Permanent
91        }
92    }
93
94    /// Human-readable label for audit logs, TUI status indicators, and structured feedback.
95    #[must_use]
96    pub fn label(self) -> &'static str {
97        match self {
98            Self::ToolNotFound => "tool_not_found",
99            Self::InvalidParameters => "invalid_parameters",
100            Self::TypeMismatch => "type_mismatch",
101            Self::PolicyBlocked => "policy_blocked",
102            Self::ConfirmationRequired => "confirmation_required",
103            Self::PermanentFailure => "permanent_failure",
104            Self::Cancelled => "cancelled",
105            Self::RateLimited => "rate_limited",
106            Self::ServerError => "server_error",
107            Self::NetworkError => "network_error",
108            Self::Timeout => "timeout",
109        }
110    }
111
112    /// Recovery suggestion for the LLM based on error category.
113    #[must_use]
114    pub fn suggestion(self) -> &'static str {
115        match self {
116            Self::ToolNotFound => {
117                "Check the tool name. Use tool_definitions to see available tools."
118            }
119            Self::InvalidParameters => "Review the tool schema and provide correct parameters.",
120            Self::TypeMismatch => "Check parameter types against the tool schema.",
121            Self::PolicyBlocked => {
122                "This operation is blocked by security policy. Try an alternative approach."
123            }
124            Self::ConfirmationRequired => "This operation requires user confirmation.",
125            Self::PermanentFailure => {
126                "This resource is not available. Try an alternative approach."
127            }
128            Self::Cancelled => "Operation was cancelled by the user.",
129            Self::RateLimited => "Rate limit exceeded. The system will retry if possible.",
130            Self::ServerError => "Server error. The system will retry if possible.",
131            Self::NetworkError => "Network error. The system will retry if possible.",
132            Self::Timeout => "Operation timed out. The system will retry if possible.",
133        }
134    }
135}
136
137/// Structured error feedback injected as `tool_result` content for classified errors.
138///
139/// Provides the LLM with actionable information about what went wrong and what to
140/// do next, replacing the opaque `[error] ...` string format.
141#[derive(Debug, Clone, serde::Serialize)]
142pub struct ToolErrorFeedback {
143    pub category: ToolErrorCategory,
144    pub message: String,
145    pub retryable: bool,
146}
147
148impl ToolErrorFeedback {
149    /// Format as a structured string for injection into `tool_result` content.
150    #[must_use]
151    pub fn format_for_llm(&self) -> String {
152        format!(
153            "[tool_error]\ncategory: {}\nerror: {}\nsuggestion: {}\nretryable: {}",
154            self.category.label(),
155            self.message,
156            self.category.suggestion(),
157            self.retryable,
158        )
159    }
160}
161
162/// Classify an HTTP status code into a `ToolErrorCategory`.
163#[must_use]
164pub fn classify_http_status(status: u16) -> ToolErrorCategory {
165    match status {
166        400 | 422 => ToolErrorCategory::InvalidParameters,
167        401 | 403 => ToolErrorCategory::PolicyBlocked,
168        429 => ToolErrorCategory::RateLimited,
169        500..=599 => ToolErrorCategory::ServerError,
170        // 404, 410, and all other non-success codes: permanent failure.
171        _ => ToolErrorCategory::PermanentFailure,
172    }
173}
174
175/// Classify an `io::Error` into a `ToolErrorCategory`.
176///
177/// # Note on `io::ErrorKind::NotFound`
178///
179/// `NotFound` from an `Execution` error means a file or binary was not found at the
180/// OS level (e.g., `bash: command not found`). This is NOT the same as "tool not found
181/// in registry" (`ToolNotFound`). We map it to `PermanentFailure` to avoid incorrectly
182/// penalizing the model for OS-level path issues.
183#[must_use]
184pub fn classify_io_error(err: &std::io::Error) -> ToolErrorCategory {
185    match err.kind() {
186        std::io::ErrorKind::TimedOut => ToolErrorCategory::Timeout,
187        std::io::ErrorKind::ConnectionRefused
188        | std::io::ErrorKind::ConnectionReset
189        | std::io::ErrorKind::ConnectionAborted
190        | std::io::ErrorKind::BrokenPipe => ToolErrorCategory::NetworkError,
191        // WouldBlock / Interrupted are async runtime signals, not true network failures,
192        // but they are transient and retryable — map to ServerError as the generic
193        // retryable catch-all rather than NetworkError to avoid misleading audit labels.
194        std::io::ErrorKind::WouldBlock | std::io::ErrorKind::Interrupted => {
195            ToolErrorCategory::ServerError
196        }
197        std::io::ErrorKind::PermissionDenied => ToolErrorCategory::PolicyBlocked,
198        // OS-level file/binary not found is a permanent execution failure, not a registry miss.
199        // ToolNotFound is reserved for registry misses (LLM requested an unknown tool name).
200        _ => ToolErrorCategory::PermanentFailure,
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207
208    #[test]
209    fn retryable_categories() {
210        assert!(ToolErrorCategory::RateLimited.is_retryable());
211        assert!(ToolErrorCategory::ServerError.is_retryable());
212        assert!(ToolErrorCategory::NetworkError.is_retryable());
213        assert!(ToolErrorCategory::Timeout.is_retryable());
214
215        assert!(!ToolErrorCategory::InvalidParameters.is_retryable());
216        assert!(!ToolErrorCategory::TypeMismatch.is_retryable());
217        assert!(!ToolErrorCategory::ToolNotFound.is_retryable());
218        assert!(!ToolErrorCategory::PolicyBlocked.is_retryable());
219        assert!(!ToolErrorCategory::PermanentFailure.is_retryable());
220        assert!(!ToolErrorCategory::Cancelled.is_retryable());
221        assert!(!ToolErrorCategory::ConfirmationRequired.is_retryable());
222    }
223
224    #[test]
225    fn quality_failure_categories() {
226        assert!(ToolErrorCategory::InvalidParameters.is_quality_failure());
227        assert!(ToolErrorCategory::TypeMismatch.is_quality_failure());
228        assert!(ToolErrorCategory::ToolNotFound.is_quality_failure());
229
230        // Infrastructure errors must NOT be quality failures — they must not trigger
231        // self-reflection, as they are not attributable to LLM output quality.
232        assert!(!ToolErrorCategory::NetworkError.is_quality_failure());
233        assert!(!ToolErrorCategory::ServerError.is_quality_failure());
234        assert!(!ToolErrorCategory::RateLimited.is_quality_failure());
235        assert!(!ToolErrorCategory::Timeout.is_quality_failure());
236        assert!(!ToolErrorCategory::PolicyBlocked.is_quality_failure());
237        assert!(!ToolErrorCategory::PermanentFailure.is_quality_failure());
238        assert!(!ToolErrorCategory::Cancelled.is_quality_failure());
239    }
240
241    #[test]
242    fn needs_parameter_reformat() {
243        assert!(ToolErrorCategory::InvalidParameters.needs_parameter_reformat());
244        assert!(ToolErrorCategory::TypeMismatch.needs_parameter_reformat());
245        assert!(!ToolErrorCategory::NetworkError.needs_parameter_reformat());
246        assert!(!ToolErrorCategory::ToolNotFound.needs_parameter_reformat());
247    }
248
249    #[test]
250    fn error_kind_backward_compat() {
251        // Retryable categories → Transient
252        assert_eq!(
253            ToolErrorCategory::NetworkError.error_kind(),
254            ErrorKind::Transient
255        );
256        assert_eq!(
257            ToolErrorCategory::Timeout.error_kind(),
258            ErrorKind::Transient
259        );
260        // Non-retryable → Permanent
261        assert_eq!(
262            ToolErrorCategory::InvalidParameters.error_kind(),
263            ErrorKind::Permanent
264        );
265        assert_eq!(
266            ToolErrorCategory::PolicyBlocked.error_kind(),
267            ErrorKind::Permanent
268        );
269    }
270
271    #[test]
272    fn classify_http_status_codes() {
273        assert_eq!(classify_http_status(403), ToolErrorCategory::PolicyBlocked);
274        assert_eq!(
275            classify_http_status(404),
276            ToolErrorCategory::PermanentFailure
277        );
278        assert_eq!(
279            classify_http_status(422),
280            ToolErrorCategory::InvalidParameters
281        );
282        assert_eq!(classify_http_status(429), ToolErrorCategory::RateLimited);
283        assert_eq!(classify_http_status(500), ToolErrorCategory::ServerError);
284        assert_eq!(classify_http_status(503), ToolErrorCategory::ServerError);
285        assert_eq!(
286            classify_http_status(200),
287            ToolErrorCategory::PermanentFailure
288        );
289    }
290
291    #[test]
292    fn classify_io_not_found_is_permanent_not_tool_not_found() {
293        // B2 fix: OS-level NotFound must NOT map to ToolNotFound.
294        // ToolNotFound is reserved for registry misses (LLM requested unknown tool name).
295        let err = std::io::Error::new(std::io::ErrorKind::NotFound, "No such file or directory");
296        assert_eq!(classify_io_error(&err), ToolErrorCategory::PermanentFailure);
297    }
298
299    #[test]
300    fn classify_io_connection_errors() {
301        let refused =
302            std::io::Error::new(std::io::ErrorKind::ConnectionRefused, "connection refused");
303        assert_eq!(classify_io_error(&refused), ToolErrorCategory::NetworkError);
304
305        let reset = std::io::Error::new(std::io::ErrorKind::ConnectionReset, "reset");
306        assert_eq!(classify_io_error(&reset), ToolErrorCategory::NetworkError);
307
308        let timed_out = std::io::Error::new(std::io::ErrorKind::TimedOut, "timed out");
309        assert_eq!(classify_io_error(&timed_out), ToolErrorCategory::Timeout);
310    }
311
312    #[test]
313    fn tool_error_feedback_format() {
314        let fb = ToolErrorFeedback {
315            category: ToolErrorCategory::InvalidParameters,
316            message: "missing required field: url".to_owned(),
317            retryable: false,
318        };
319        let s = fb.format_for_llm();
320        assert!(s.contains("[tool_error]"));
321        assert!(s.contains("invalid_parameters"));
322        assert!(s.contains("missing required field: url"));
323        assert!(s.contains("retryable: false"));
324    }
325
326    #[test]
327    fn all_categories_have_labels() {
328        let categories = [
329            ToolErrorCategory::ToolNotFound,
330            ToolErrorCategory::InvalidParameters,
331            ToolErrorCategory::TypeMismatch,
332            ToolErrorCategory::PolicyBlocked,
333            ToolErrorCategory::ConfirmationRequired,
334            ToolErrorCategory::PermanentFailure,
335            ToolErrorCategory::Cancelled,
336            ToolErrorCategory::RateLimited,
337            ToolErrorCategory::ServerError,
338            ToolErrorCategory::NetworkError,
339            ToolErrorCategory::Timeout,
340        ];
341        for cat in categories {
342            assert!(!cat.label().is_empty(), "category {cat:?} has empty label");
343            assert!(
344                !cat.suggestion().is_empty(),
345                "category {cat:?} has empty suggestion"
346            );
347        }
348    }
349
350    // ── classify_http_status: full coverage per taxonomy spec ────────────────
351
352    #[test]
353    fn classify_http_400_is_invalid_parameters() {
354        assert_eq!(
355            classify_http_status(400),
356            ToolErrorCategory::InvalidParameters
357        );
358    }
359
360    #[test]
361    fn classify_http_401_is_policy_blocked() {
362        assert_eq!(classify_http_status(401), ToolErrorCategory::PolicyBlocked);
363    }
364
365    #[test]
366    fn classify_http_502_is_server_error() {
367        assert_eq!(classify_http_status(502), ToolErrorCategory::ServerError);
368    }
369
370    // ── ToolErrorFeedback: category-specific content ──────────────────────────
371
372    #[test]
373    fn feedback_permanent_failure_not_retryable() {
374        let fb = ToolErrorFeedback {
375            category: ToolErrorCategory::PermanentFailure,
376            message: "resource does not exist".to_owned(),
377            retryable: false,
378        };
379        let s = fb.format_for_llm();
380        assert!(s.contains("permanent_failure"));
381        assert!(s.contains("resource does not exist"));
382        assert!(s.contains("retryable: false"));
383        // Suggestion must not mention auto-retry for a permanent error.
384        let suggestion = ToolErrorCategory::PermanentFailure.suggestion();
385        assert!(!suggestion.contains("retry automatically"), "{suggestion}");
386    }
387
388    #[test]
389    fn feedback_rate_limited_is_retryable_and_mentions_retry() {
390        let fb = ToolErrorFeedback {
391            category: ToolErrorCategory::RateLimited,
392            message: "too many requests".to_owned(),
393            retryable: true,
394        };
395        let s = fb.format_for_llm();
396        assert!(s.contains("rate_limited"));
397        assert!(s.contains("retryable: true"));
398        // RateLimited suggestion must mention retry but not promise it is automatic.
399        let suggestion = ToolErrorCategory::RateLimited.suggestion();
400        assert!(suggestion.contains("retry"), "{suggestion}");
401        assert!(!suggestion.contains("automatically"), "{suggestion}");
402    }
403
404    #[test]
405    fn transient_suggestion_neutral_no_automatically() {
406        // Suggestion text must not promise "automatically" — retry may or may not fire
407        // (executor may not be retryable, or retries may be exhausted).
408        for cat in [
409            ToolErrorCategory::ServerError,
410            ToolErrorCategory::NetworkError,
411            ToolErrorCategory::RateLimited,
412            ToolErrorCategory::Timeout,
413        ] {
414            let s = cat.suggestion();
415            assert!(
416                !s.contains("automatically"),
417                "{cat:?} suggestion must not promise automatic retry: {s}"
418            );
419        }
420    }
421
422    #[test]
423    fn feedback_retryable_matches_category_is_retryable() {
424        // Transient categories must produce retryable: true feedback.
425        for cat in [
426            ToolErrorCategory::ServerError,
427            ToolErrorCategory::NetworkError,
428            ToolErrorCategory::RateLimited,
429            ToolErrorCategory::Timeout,
430        ] {
431            let fb = ToolErrorFeedback {
432                category: cat,
433                message: "error".to_owned(),
434                retryable: cat.is_retryable(),
435            };
436            assert!(fb.retryable, "{cat:?} feedback must be retryable");
437        }
438
439        // Permanent categories must produce retryable: false feedback.
440        for cat in [
441            ToolErrorCategory::InvalidParameters,
442            ToolErrorCategory::PolicyBlocked,
443            ToolErrorCategory::PermanentFailure,
444        ] {
445            let fb = ToolErrorFeedback {
446                category: cat,
447                message: "error".to_owned(),
448                retryable: cat.is_retryable(),
449            };
450            assert!(!fb.retryable, "{cat:?} feedback must not be retryable");
451        }
452    }
453
454    // ── B4 regression: infrastructure errors must NOT be quality failures ─────
455
456    #[test]
457    fn b4_infrastructure_errors_not_quality_failures() {
458        // These categories must never trigger self-reflection (B4 fix).
459        for cat in [
460            ToolErrorCategory::NetworkError,
461            ToolErrorCategory::ServerError,
462            ToolErrorCategory::RateLimited,
463            ToolErrorCategory::Timeout,
464        ] {
465            assert!(
466                !cat.is_quality_failure(),
467                "{cat:?} must not be a quality failure"
468            );
469            // And they must be retryable.
470            assert!(cat.is_retryable(), "{cat:?} must be retryable");
471        }
472    }
473
474    #[test]
475    fn b4_quality_failures_may_trigger_reflection() {
476        // These categories should trigger self-reflection.
477        for cat in [
478            ToolErrorCategory::InvalidParameters,
479            ToolErrorCategory::TypeMismatch,
480            ToolErrorCategory::ToolNotFound,
481        ] {
482            assert!(
483                cat.is_quality_failure(),
484                "{cat:?} must be a quality failure"
485            );
486            // Quality failures are not retryable.
487            assert!(!cat.is_retryable(), "{cat:?} must not be retryable");
488        }
489    }
490
491    // ── B2 regression: io::NotFound must NOT produce ToolNotFound ────────────
492
493    #[test]
494    fn b2_io_not_found_maps_to_permanent_failure_not_tool_not_found() {
495        let err = std::io::Error::new(std::io::ErrorKind::NotFound, "bash: command not found");
496        let cat = classify_io_error(&err);
497        assert_ne!(
498            cat,
499            ToolErrorCategory::ToolNotFound,
500            "OS-level NotFound must NOT map to ToolNotFound"
501        );
502        assert_eq!(
503            cat,
504            ToolErrorCategory::PermanentFailure,
505            "OS-level NotFound must map to PermanentFailure"
506        );
507    }
508
509    // ── ToolErrorCategory::Cancelled: not retryable, not quality failure ──────
510
511    #[test]
512    fn cancelled_is_not_retryable_and_not_quality_failure() {
513        assert!(!ToolErrorCategory::Cancelled.is_retryable());
514        assert!(!ToolErrorCategory::Cancelled.is_quality_failure());
515        assert!(!ToolErrorCategory::Cancelled.needs_parameter_reformat());
516    }
517}