Skip to main content

opendev_runtime/
errors.rs

1//! Structured error types for OpenDev.
2//!
3//! Provides typed error classes with structured fields for better retry logic,
4//! error-specific recovery, and comprehensive provider error classification.
5//! Ported from `opendev/core/errors.py`.
6
7use regex::Regex;
8use serde::{Deserialize, Serialize};
9use std::sync::LazyLock;
10
11/// High-level error category for classification.
12#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum ErrorCategory {
15    ContextOverflow,
16    OutputLength,
17    RateLimit,
18    Auth,
19    Api,
20    Gateway,
21    Permission,
22    EditMismatch,
23    FileNotFound,
24    Timeout,
25    Unknown,
26}
27
28/// Strategy for recovering from an error.
29///
30/// Each error category maps to a recommended recovery strategy that callers
31/// can use to decide how to handle failures automatically.
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33#[serde(tag = "type", rename_all = "snake_case")]
34pub enum RecoveryStrategy {
35    /// Retry the operation after a delay.
36    Retry {
37        /// Milliseconds to wait before retrying.
38        delay_ms: u64,
39        /// Maximum number of retry attempts.
40        max_attempts: u32,
41    },
42    /// Fall back to an alternative model.
43    FallbackModel {
44        /// The model identifier to fall back to.
45        model: String,
46    },
47    /// Reduce the context window and retry.
48    ReduceContext,
49    /// Require user intervention with a descriptive message.
50    UserIntervention {
51        /// Description of what the user should do.
52        message: String,
53    },
54}
55
56impl RecoveryStrategy {
57    /// Serialize the recovery strategy to a JSON value.
58    pub fn to_json(&self) -> serde_json::Value {
59        serde_json::to_value(self).unwrap_or_else(|_| serde_json::json!({"type": "unknown"}))
60    }
61}
62
63/// Base structured error with metadata.
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct StructuredError {
66    pub category: ErrorCategory,
67    pub message: String,
68    pub is_retryable: bool,
69    #[serde(skip_serializing_if = "Option::is_none")]
70    pub status_code: Option<u16>,
71    #[serde(skip_serializing_if = "Option::is_none")]
72    pub provider: Option<String>,
73    #[serde(skip_serializing_if = "Option::is_none")]
74    pub original_error: Option<String>,
75    /// For context overflow: how many tokens were in the prompt.
76    #[serde(skip_serializing_if = "Option::is_none")]
77    pub token_count: Option<u64>,
78    /// For context overflow: what the model limit is.
79    #[serde(skip_serializing_if = "Option::is_none")]
80    pub token_limit: Option<u64>,
81    /// For rate limit: seconds to wait before retrying.
82    #[serde(skip_serializing_if = "Option::is_none")]
83    pub retry_after: Option<f64>,
84}
85
86impl StructuredError {
87    /// Whether this error should trigger context compaction.
88    pub fn should_compact(&self) -> bool {
89        self.category == ErrorCategory::ContextOverflow
90    }
91
92    /// Whether the operation should be retried.
93    pub fn should_retry(&self) -> bool {
94        self.is_retryable
95    }
96
97    /// Return a stable error code for this error.
98    ///
99    /// Error codes follow the pattern `EXXXX_DESCRIPTION`:
100    /// - E1xxx: Rate limiting and quota errors
101    /// - E2xxx: Tool and timeout errors
102    /// - E3xxx: Context and token errors
103    /// - E4xxx: Authentication and permission errors
104    /// - E5xxx: Gateway and network errors
105    /// - E9xxx: Unknown/unclassified errors
106    pub fn error_code(&self) -> &str {
107        match self.category {
108            ErrorCategory::RateLimit => "E1001_RATE_LIMIT",
109            ErrorCategory::Timeout => "E2001_TOOL_TIMEOUT",
110            ErrorCategory::ContextOverflow => "E3001_CONTEXT_OVERFLOW",
111            ErrorCategory::OutputLength => "E3002_OUTPUT_LENGTH",
112            ErrorCategory::Auth => "E4001_AUTH_FAILED",
113            ErrorCategory::Permission => "E4002_PERMISSION_DENIED",
114            ErrorCategory::Gateway => "E5001_GATEWAY_ERROR",
115            ErrorCategory::Api => "E5002_API_ERROR",
116            ErrorCategory::EditMismatch => "E6001_EDIT_MISMATCH",
117            ErrorCategory::FileNotFound => "E6002_FILE_NOT_FOUND",
118            ErrorCategory::Unknown => "E9001_UNKNOWN",
119        }
120    }
121
122    /// Serialize this error to a structured JSON value for reporting.
123    pub fn to_json(&self) -> serde_json::Value {
124        let mut obj = serde_json::json!({
125            "error_code": self.error_code(),
126            "category": self.category,
127            "message": self.message,
128            "is_retryable": self.is_retryable,
129        });
130        let map = obj.as_object_mut().expect("json object");
131        if let Some(sc) = self.status_code {
132            map.insert("status_code".into(), serde_json::json!(sc));
133        }
134        if let Some(ref p) = self.provider {
135            map.insert("provider".into(), serde_json::json!(p));
136        }
137        if let Some(ref oe) = self.original_error {
138            map.insert("original_error".into(), serde_json::json!(oe));
139        }
140        if let Some(tc) = self.token_count {
141            map.insert("token_count".into(), serde_json::json!(tc));
142        }
143        if let Some(tl) = self.token_limit {
144            map.insert("token_limit".into(), serde_json::json!(tl));
145        }
146        if let Some(ra) = self.retry_after {
147            map.insert("retry_after".into(), serde_json::json!(ra));
148        }
149        let strategy = self.recovery_strategy();
150        map.insert("recovery_strategy".into(), strategy.to_json());
151        obj
152    }
153
154    /// Return the recommended recovery strategy for this error.
155    pub fn recovery_strategy(&self) -> RecoveryStrategy {
156        match self.category {
157            ErrorCategory::RateLimit => {
158                let delay = self
159                    .retry_after
160                    .map(|s| (s * 1000.0) as u64)
161                    .unwrap_or(5000);
162                RecoveryStrategy::Retry {
163                    delay_ms: delay,
164                    max_attempts: 3,
165                }
166            }
167            ErrorCategory::Timeout => RecoveryStrategy::Retry {
168                delay_ms: 2000,
169                max_attempts: 2,
170            },
171            ErrorCategory::ContextOverflow => RecoveryStrategy::ReduceContext,
172            ErrorCategory::OutputLength => RecoveryStrategy::Retry {
173                delay_ms: 0,
174                max_attempts: 1,
175            },
176            ErrorCategory::Auth => RecoveryStrategy::UserIntervention {
177                message: "Check your API key and authentication settings.".into(),
178            },
179            ErrorCategory::Permission => RecoveryStrategy::UserIntervention {
180                message: "Insufficient permissions. Check your access rights.".into(),
181            },
182            ErrorCategory::Gateway => RecoveryStrategy::Retry {
183                delay_ms: 3000,
184                max_attempts: 3,
185            },
186            ErrorCategory::Api => {
187                if self.is_retryable {
188                    RecoveryStrategy::Retry {
189                        delay_ms: 2000,
190                        max_attempts: 3,
191                    }
192                } else {
193                    RecoveryStrategy::FallbackModel {
194                        model: "default".into(),
195                    }
196                }
197            }
198            ErrorCategory::EditMismatch => RecoveryStrategy::UserIntervention {
199                message: "The edit target was not found. Review the file content.".into(),
200            },
201            ErrorCategory::FileNotFound => RecoveryStrategy::UserIntervention {
202                message: "File not found. Check the path and try again.".into(),
203            },
204            ErrorCategory::Unknown => RecoveryStrategy::UserIntervention {
205                message: "An unexpected error occurred. Please try again.".into(),
206            },
207        }
208    }
209
210    /// Create a generic API error.
211    pub fn api(message: impl Into<String>, status_code: Option<u16>) -> Self {
212        let code = status_code;
213        Self {
214            category: if code.is_some() {
215                ErrorCategory::Api
216            } else {
217                ErrorCategory::Unknown
218            },
219            message: message.into(),
220            is_retryable: matches!(code, Some(500 | 502 | 503 | 504)),
221            status_code: code,
222            provider: None,
223            original_error: None,
224            token_count: None,
225            token_limit: None,
226            retry_after: None,
227        }
228    }
229
230    /// Create a context overflow error.
231    pub fn context_overflow(
232        message: impl Into<String>,
233        provider: Option<String>,
234        token_count: Option<u64>,
235        token_limit: Option<u64>,
236    ) -> Self {
237        let msg = message.into();
238        Self {
239            category: ErrorCategory::ContextOverflow,
240            message: msg.clone(),
241            is_retryable: true,
242            status_code: None,
243            provider,
244            original_error: Some(msg),
245            token_count,
246            token_limit,
247            retry_after: None,
248        }
249    }
250
251    /// Create an output length error.
252    pub fn output_length(message: impl Into<String>) -> Self {
253        Self {
254            category: ErrorCategory::OutputLength,
255            message: message.into(),
256            is_retryable: true,
257            status_code: None,
258            provider: None,
259            original_error: None,
260            token_count: None,
261            token_limit: None,
262            retry_after: None,
263        }
264    }
265
266    /// Create a rate limit error.
267    pub fn rate_limit(
268        message: impl Into<String>,
269        provider: Option<String>,
270        retry_after: Option<f64>,
271    ) -> Self {
272        let msg = message.into();
273        Self {
274            category: ErrorCategory::RateLimit,
275            message: msg.clone(),
276            is_retryable: true,
277            status_code: None,
278            provider,
279            original_error: Some(msg),
280            token_count: None,
281            token_limit: None,
282            retry_after,
283        }
284    }
285
286    /// Create an authentication error.
287    pub fn auth(
288        message: impl Into<String>,
289        status_code: Option<u16>,
290        provider: Option<String>,
291    ) -> Self {
292        let msg = message.into();
293        Self {
294            category: ErrorCategory::Auth,
295            message: msg.clone(),
296            is_retryable: false,
297            status_code,
298            provider,
299            original_error: Some(msg),
300            token_count: None,
301            token_limit: None,
302            retry_after: None,
303        }
304    }
305
306    /// Create a gateway error.
307    pub fn gateway(
308        message: impl Into<String>,
309        status_code: Option<u16>,
310        provider: Option<String>,
311        original_error: Option<String>,
312    ) -> Self {
313        Self {
314            category: ErrorCategory::Gateway,
315            message: message.into(),
316            is_retryable: true,
317            status_code,
318            provider,
319            original_error,
320            token_count: None,
321            token_limit: None,
322            retry_after: None,
323        }
324    }
325}
326
327impl std::fmt::Display for StructuredError {
328    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
329        write!(f, "[{}] {}", self.error_code(), self.message)
330    }
331}
332
333impl std::error::Error for StructuredError {}
334
335// ---------------------------------------------------------------------------
336// Provider error pattern library
337// ---------------------------------------------------------------------------
338
339/// Compiled regex patterns for each error category.
340struct PatternSet {
341    overflow: Vec<Regex>,
342    rate_limit: Vec<Regex>,
343    auth: Vec<Regex>,
344    gateway: Vec<Regex>,
345}
346
347fn compile_patterns(patterns: &[&str]) -> Vec<Regex> {
348    patterns
349        .iter()
350        .filter_map(|p| Regex::new(&format!("(?i){}", p)).ok())
351        .collect()
352}
353
354static PATTERNS: LazyLock<PatternSet> = LazyLock::new(|| {
355    PatternSet {
356        overflow: compile_patterns(&[
357            // Anthropic
358            r"prompt is too long",
359            r"max_tokens_exceeded",
360            r"context length.*exceeded",
361            r"maximum context length",
362            // OpenAI
363            r"maximum context length.*is \d+ tokens",
364            r"This model's maximum context length is",
365            r"reduce the length of the messages",
366            r"context_length_exceeded",
367            // Google
368            r"exceeds the maximum.*tokens",
369            r"RESOURCE_EXHAUSTED.*token",
370            r"GenerateContentRequest.*too large",
371            // Azure
372            r"Tokens in prompt.*exceed.*limit",
373            // Generic
374            r"token limit",
375            r"too many tokens",
376            r"context.*too long",
377            r"input.*too long",
378            r"prompt.*too large",
379        ]),
380        rate_limit: compile_patterns(&[
381            r"rate.?limit",
382            r"too many requests",
383            r"429",
384            r"quota exceeded",
385            r"capacity",
386            r"overloaded",
387        ]),
388        auth: compile_patterns(&[
389            r"invalid.*api.?key",
390            r"authentication",
391            r"unauthorized",
392            r"invalid.*token",
393            r"api key.*invalid",
394        ]),
395        gateway: compile_patterns(&[
396            r"<!doctype html",
397            r"<html",
398            r"502 Bad Gateway",
399            r"503 Service Unavailable",
400            r"504 Gateway Timeout",
401            r"CloudFlare",
402            r"nginx",
403        ]),
404    }
405});
406
407/// Classify an API error into a structured error type.
408///
409/// Checks the raw error message against known patterns for context overflow,
410/// rate limiting, authentication failures, and gateway/proxy issues across
411/// all supported providers (Anthropic, OpenAI, Google, Azure).
412pub fn classify_api_error(
413    error_message: &str,
414    status_code: Option<u16>,
415    provider: Option<&str>,
416) -> StructuredError {
417    let patterns = &*PATTERNS;
418    let provider_owned = provider.map(|s| s.to_string());
419
420    // Check gateway patterns first (HTML responses)
421    for re in &patterns.gateway {
422        if re.is_match(error_message) {
423            let friendly_msg = match status_code {
424                Some(401) => {
425                    "Authentication failed at gateway. Check your API key and proxy settings."
426                        .to_string()
427                }
428                Some(403) => "Access denied at gateway. Check your permissions and proxy settings."
429                    .to_string(),
430                _ => "API returned an HTML error page. Check your proxy/VPN settings or try again."
431                    .to_string(),
432            };
433            let truncated = if error_message.len() > 500 {
434                &error_message[..500]
435            } else {
436                error_message
437            };
438            return StructuredError::gateway(
439                friendly_msg,
440                status_code,
441                provider_owned,
442                Some(truncated.to_string()),
443            );
444        }
445    }
446
447    // Context overflow
448    for re in &patterns.overflow {
449        if re.is_match(error_message) {
450            return StructuredError::context_overflow(error_message, provider_owned, None, None);
451        }
452    }
453
454    // Rate limiting
455    for re in &patterns.rate_limit {
456        if re.is_match(error_message) {
457            static RETRY_AFTER_RE: LazyLock<Regex> = LazyLock::new(|| {
458                Regex::new(r"(?i)retry.?after[:\s]+(\d+\.?\d*)")
459                    .expect("valid regex: retry-after pattern")
460            });
461            let retry_after = RETRY_AFTER_RE
462                .captures(error_message)
463                .and_then(|caps| caps.get(1))
464                .and_then(|m| m.as_str().parse::<f64>().ok());
465            return StructuredError::rate_limit(error_message, provider_owned, retry_after);
466        }
467    }
468
469    // Auth errors — check status code first, then patterns
470    if matches!(status_code, Some(401 | 403)) {
471        return StructuredError::auth(error_message, status_code, provider_owned);
472    }
473    for re in &patterns.auth {
474        if re.is_match(error_message) {
475            return StructuredError::auth(error_message, status_code, provider_owned);
476        }
477    }
478
479    // Generic API error
480    StructuredError {
481        category: if status_code.is_some() {
482            ErrorCategory::Api
483        } else {
484            ErrorCategory::Unknown
485        },
486        message: error_message.to_string(),
487        is_retryable: matches!(status_code, Some(500 | 502 | 503 | 504)),
488        status_code,
489        provider: provider_owned,
490        original_error: Some(error_message.to_string()),
491        token_count: None,
492        token_limit: None,
493        retry_after: None,
494    }
495}
496
497#[cfg(test)]
498#[path = "errors_tests.rs"]
499mod tests;