Skip to main content

zeph_mcp/
error.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use zeph_common::ToolName;
5
6/// Typed error code for MCP tool call retry and recovery classification.
7///
8/// Used by [`McpError::code`] and callers such as the agent retry loop to decide
9/// whether an operation should be retried, backed off, or abandoned.
10#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
11#[serde(rename_all = "snake_case")]
12pub enum McpErrorCode {
13    /// Transient error: retry is likely to succeed.
14    Transient,
15    /// Rate limited: back off and retry.
16    RateLimited,
17    /// Invalid input: do not retry without changing parameters.
18    InvalidInput,
19    /// Auth failure: re-authenticate or escalate.
20    AuthFailure,
21    /// Server error: may be transient, retry with backoff.
22    ServerError,
23    /// Not found: resource or tool does not exist.
24    NotFound,
25    /// Blocked by policy rules.
26    PolicyBlocked,
27}
28
29impl McpErrorCode {
30    /// Whether this error code suggests the operation can be retried.
31    #[must_use]
32    pub fn is_retryable(self) -> bool {
33        matches!(
34            self,
35            Self::Transient | Self::RateLimited | Self::ServerError
36        )
37    }
38}
39
40/// Crate-wide error type for all MCP operations.
41///
42/// Variants cover connection failures, tool call errors, policy blocks, OAuth flows,
43/// and infrastructure errors (Qdrant, JSON serialization). Use [`McpError::code`] to
44/// obtain a typed [`McpErrorCode`] for retry/recovery decisions.
45///
46/// # Examples
47///
48/// ```
49/// use zeph_mcp::error::{McpError, McpErrorCode};
50///
51/// let err = McpError::Timeout {
52///     server_id: "github".to_owned(),
53///     tool_name: "create_issue".into(),
54///     timeout_secs: 30,
55/// };
56/// assert_eq!(err.code(), Some(McpErrorCode::Transient));
57/// assert!(err.code().unwrap().is_retryable());
58/// ```
59#[derive(Debug, thiserror::Error)]
60pub enum McpError {
61    #[error("connection failed for server '{server_id}': {message}")]
62    Connection { server_id: String, message: String },
63
64    #[error("tool call failed: {server_id}/{tool_name}: {message}")]
65    ToolCall {
66        server_id: String,
67        tool_name: ToolName,
68        message: String,
69        /// Typed error code for retry classification.
70        code: McpErrorCode,
71    },
72
73    #[error("server '{server_id}' not found")]
74    ServerNotFound { server_id: String },
75
76    #[error("server '{server_id}' is already connected")]
77    ServerAlreadyConnected { server_id: String },
78
79    #[error("tool '{tool_name}' not found on server '{server_id}'")]
80    ToolNotFound {
81        server_id: String,
82        tool_name: ToolName,
83    },
84
85    #[error("tool call timed out after {timeout_secs}s: {server_id}/{tool_name}")]
86    Timeout {
87        server_id: String,
88        tool_name: ToolName,
89        timeout_secs: u64,
90    },
91
92    #[error("Qdrant error: {0}")]
93    Qdrant(#[from] Box<qdrant_client::QdrantError>),
94
95    #[error("JSON error: {0}")]
96    Json(#[from] serde_json::Error),
97
98    #[error("integer conversion: {0}")]
99    IntConversion(#[from] std::num::TryFromIntError),
100
101    #[error("SSRF blocked: URL '{url}' resolves to private/reserved IP {addr}")]
102    SsrfBlocked { url: String, addr: String },
103
104    #[error("invalid URL '{url}': {message}")]
105    InvalidUrl { url: String, message: String },
106
107    #[error("embedding error: {0}")]
108    Embedding(String),
109
110    #[error("MCP command '{command}' not allowed")]
111    CommandNotAllowed { command: String },
112
113    #[error("env var '{var_name}' is blocked for MCP server processes")]
114    EnvVarBlocked { var_name: String },
115
116    #[error("policy violation: {0}")]
117    PolicyViolation(String),
118
119    #[error("OAuth error for server '{server_id}': {message}")]
120    OAuthError { server_id: String, message: String },
121
122    #[error("OAuth callback timed out for server '{server_id}' after {timeout_secs}s")]
123    OAuthCallbackTimeout {
124        server_id: String,
125        timeout_secs: u64,
126    },
127
128    #[error("tool list refresh rejected for '{server_id}': list is locked after initial connect")]
129    ToolListLocked { server_id: String },
130}
131
132impl McpError {
133    /// Return the typed error code for this error variant.
134    #[must_use]
135    pub fn code(&self) -> Option<McpErrorCode> {
136        match self {
137            Self::ToolCall { code, .. } => Some(*code),
138            Self::Timeout { .. } | Self::Connection { .. } => Some(McpErrorCode::Transient),
139            Self::ServerNotFound { .. } | Self::ToolNotFound { .. } => Some(McpErrorCode::NotFound),
140            Self::PolicyViolation(_)
141            | Self::SsrfBlocked { .. }
142            | Self::CommandNotAllowed { .. }
143            | Self::EnvVarBlocked { .. } => Some(McpErrorCode::PolicyBlocked),
144            Self::OAuthError { .. } | Self::OAuthCallbackTimeout { .. } => {
145                Some(McpErrorCode::AuthFailure)
146            }
147            Self::InvalidUrl { .. } | Self::ToolListLocked { .. } => {
148                Some(McpErrorCode::InvalidInput)
149            }
150            Self::Embedding(_) => Some(McpErrorCode::ServerError),
151            _ => None,
152        }
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    #[test]
161    fn connection_error_display() {
162        let err = McpError::Connection {
163            server_id: "github".into(),
164            message: "refused".into(),
165        };
166        assert_eq!(
167            err.to_string(),
168            "connection failed for server 'github': refused"
169        );
170    }
171
172    #[test]
173    fn tool_call_error_display() {
174        let err = McpError::ToolCall {
175            server_id: "fs".into(),
176            tool_name: "read_file".into(),
177            message: "not found".into(),
178            code: McpErrorCode::ServerError,
179        };
180        assert_eq!(err.to_string(), "tool call failed: fs/read_file: not found");
181    }
182
183    #[test]
184    fn error_code_is_retryable() {
185        assert!(McpErrorCode::Transient.is_retryable());
186        assert!(McpErrorCode::RateLimited.is_retryable());
187        assert!(McpErrorCode::ServerError.is_retryable());
188        assert!(!McpErrorCode::InvalidInput.is_retryable());
189        assert!(!McpErrorCode::AuthFailure.is_retryable());
190        assert!(!McpErrorCode::NotFound.is_retryable());
191        assert!(!McpErrorCode::PolicyBlocked.is_retryable());
192    }
193
194    #[test]
195    fn mcp_error_code_method() {
196        let err = McpError::ToolCall {
197            server_id: "s".into(),
198            tool_name: "t".into(),
199            message: "e".into(),
200            code: McpErrorCode::RateLimited,
201        };
202        assert_eq!(err.code(), Some(McpErrorCode::RateLimited));
203
204        let timeout = McpError::Timeout {
205            server_id: "s".into(),
206            tool_name: "t".into(),
207            timeout_secs: 30,
208        };
209        assert_eq!(timeout.code(), Some(McpErrorCode::Transient));
210
211        let policy = McpError::PolicyViolation("denied".into());
212        assert_eq!(policy.code(), Some(McpErrorCode::PolicyBlocked));
213    }
214
215    #[test]
216    fn server_not_found_display() {
217        let err = McpError::ServerNotFound {
218            server_id: "missing".into(),
219        };
220        assert_eq!(err.to_string(), "server 'missing' not found");
221    }
222
223    #[test]
224    fn tool_not_found_display() {
225        let err = McpError::ToolNotFound {
226            server_id: "fs".into(),
227            tool_name: "delete".into(),
228        };
229        assert_eq!(err.to_string(), "tool 'delete' not found on server 'fs'");
230    }
231
232    #[test]
233    fn server_already_connected_display() {
234        let err = McpError::ServerAlreadyConnected {
235            server_id: "github".into(),
236        };
237        assert_eq!(err.to_string(), "server 'github' is already connected");
238    }
239
240    #[test]
241    fn timeout_error_display() {
242        let err = McpError::Timeout {
243            server_id: "slow".into(),
244            tool_name: "query".into(),
245            timeout_secs: 30,
246        };
247        assert_eq!(err.to_string(), "tool call timed out after 30s: slow/query");
248    }
249
250    #[test]
251    fn handshake_timeout_has_initialize_tool_name() {
252        let err = McpError::Timeout {
253            server_id: "my-server".into(),
254            tool_name: "initialize".into(),
255            timeout_secs: 10,
256        };
257        assert_eq!(
258            err.to_string(),
259            "tool call timed out after 10s: my-server/initialize"
260        );
261        assert_eq!(err.code(), Some(McpErrorCode::Transient));
262    }
263
264    #[test]
265    fn list_tools_timeout_has_tools_list_tool_name() {
266        let err = McpError::Timeout {
267            server_id: "my-server".into(),
268            tool_name: "tools/list".into(),
269            timeout_secs: 30,
270        };
271        assert_eq!(
272            err.to_string(),
273            "tool call timed out after 30s: my-server/tools/list"
274        );
275        assert_eq!(err.code(), Some(McpErrorCode::Transient));
276    }
277}