infernum_core/
error.rs

1//! Error types for the Infernum ecosystem.
2
3use std::time::Duration;
4use thiserror::Error;
5
6/// Result type alias using [`Error`].
7pub type Result<T> = std::result::Result<T, Error>;
8
9/// Unified error type for the Infernum ecosystem.
10#[derive(Error, Debug)]
11pub enum Error {
12    /// Model was not found in the registry.
13    #[error("Model not found: {model_id}")]
14    ModelNotFound {
15        /// The requested model identifier.
16        model_id: String,
17    },
18
19    /// Model architecture is not supported.
20    #[error("Unsupported model architecture: {architecture}")]
21    UnsupportedArchitecture {
22        /// The unsupported architecture name.
23        architecture: String,
24    },
25
26    /// Out of memory during inference.
27    #[error("Out of memory: requested {requested} bytes, available {available} bytes")]
28    OutOfMemory {
29        /// Bytes requested.
30        requested: usize,
31        /// Bytes available.
32        available: usize,
33    },
34
35    /// Context length exceeded for the model.
36    #[error("Context length exceeded: {current} tokens > {max} max tokens")]
37    ContextLengthExceeded {
38        /// Current token count.
39        current: u32,
40        /// Maximum allowed tokens.
41        max: u32,
42    },
43
44    /// Invalid configuration provided.
45    #[error("Invalid configuration: {message}")]
46    InvalidConfig {
47        /// Description of the configuration error.
48        message: String,
49    },
50
51    /// Backend-specific error.
52    #[error("Backend error: {message}")]
53    Backend {
54        /// Backend name (cuda, metal, cpu, etc.).
55        backend: String,
56        /// Error message.
57        message: String,
58    },
59
60    /// Operation timed out.
61    #[error("Operation timed out after {duration:?}")]
62    Timeout {
63        /// Duration before timeout.
64        duration: Duration,
65    },
66
67    /// Rate limited by the system.
68    #[error("Rate limited: retry after {retry_after:?}")]
69    RateLimited {
70        /// Duration to wait before retrying.
71        retry_after: Duration,
72    },
73
74    /// Tokenization error.
75    #[error("Tokenization error: {message}")]
76    Tokenization {
77        /// Error message.
78        message: String,
79    },
80
81    /// Model loading error.
82    #[error("Failed to load model: {message}")]
83    ModelLoad {
84        /// Error message.
85        message: String,
86    },
87
88    /// I/O error.
89    #[error("I/O error: {0}")]
90    Io(#[from] std::io::Error),
91
92    /// Serialization error.
93    #[error("Serialization error: {0}")]
94    Serialization(#[from] serde_json::Error),
95
96    /// Internal error (unexpected state).
97    #[error("Internal error: {message}")]
98    Internal {
99        /// Error message.
100        message: String,
101    },
102}
103
104impl Error {
105    /// Returns `true` if this error is retryable.
106    #[must_use]
107    pub fn is_retryable(&self) -> bool {
108        matches!(self, Self::Timeout { .. } | Self::RateLimited { .. })
109    }
110
111    /// Returns `true` if this error is due to resource exhaustion.
112    #[must_use]
113    pub fn is_resource_exhaustion(&self) -> bool {
114        matches!(
115            self,
116            Self::OutOfMemory { .. } | Self::ContextLengthExceeded { .. }
117        )
118    }
119
120    /// Creates an internal error with the given message.
121    #[must_use]
122    pub fn internal(message: impl Into<String>) -> Self {
123        Self::Internal {
124            message: message.into(),
125        }
126    }
127
128    /// Creates a backend error with the given backend name and message.
129    #[must_use]
130    pub fn backend(backend: impl Into<String>, message: impl Into<String>) -> Self {
131        Self::Backend {
132            backend: backend.into(),
133            message: message.into(),
134        }
135    }
136
137    /// Creates a model load error.
138    #[must_use]
139    pub fn model_load(message: impl Into<String>) -> Self {
140        Self::ModelLoad {
141            message: message.into(),
142        }
143    }
144}