1use thiserror::Error;
4
5pub type Result<T> = std::result::Result<T, RingKernelError>;
7
8#[derive(Error, Debug)]
10pub enum RingKernelError {
11 #[error("kernel not found: {0}")]
14 KernelNotFound(String),
15
16 #[error("kernel already active: {0}")]
18 KernelAlreadyActive(String),
19
20 #[error("kernel not active: {0}")]
22 KernelNotActive(String),
23
24 #[error("kernel already terminated: {0}")]
26 KernelTerminated(String),
27
28 #[error("invalid state transition from {from:?} to {to:?}")]
30 InvalidStateTransition {
31 from: String,
33 to: String,
35 },
36
37 #[error("invalid state: expected {expected}, got {actual}")]
39 InvalidState {
40 expected: String,
42 actual: String,
44 },
45
46 #[error("kernel launch failed: {0}")]
48 LaunchFailed(String),
49
50 #[error("kernel compilation failed: {0}")]
52 CompilationError(String),
53
54 #[error("queue full: capacity {capacity}, attempted to enqueue message")]
57 QueueFull {
58 capacity: usize,
60 },
61
62 #[error("queue empty")]
64 QueueEmpty,
65
66 #[error("serialization error: {0}")]
68 SerializationError(String),
69
70 #[error("deserialization error: {0}")]
72 DeserializationError(String),
73
74 #[error("message validation failed: {0}")]
76 ValidationError(String),
77
78 #[error("message too large: {size} bytes (max: {max} bytes)")]
80 MessageTooLarge {
81 size: usize,
83 max: usize,
85 },
86
87 #[error("message timeout after {0:?}")]
89 Timeout(std::time::Duration),
90
91 #[error("GPU memory allocation failed: {size} bytes - {reason}")]
94 AllocationFailed {
95 size: usize,
97 reason: String,
99 },
100
101 #[error("host memory allocation failed: {size} bytes")]
103 HostAllocationFailed {
104 size: usize,
106 },
107
108 #[error("memory transfer failed: {0}")]
110 TransferFailed(String),
111
112 #[error("invalid alignment: expected {expected}, got {actual}")]
114 InvalidAlignment {
115 expected: usize,
117 actual: usize,
119 },
120
121 #[error("out of GPU memory: requested {requested} bytes, available {available} bytes")]
123 OutOfMemory {
124 requested: usize,
126 available: usize,
128 },
129
130 #[error("memory pool exhausted")]
132 PoolExhausted,
133
134 #[error("memory error: {0}")]
136 MemoryError(String),
137
138 #[error("backend not available: {0}")]
141 BackendUnavailable(String),
142
143 #[error("backend initialization failed: {0}")]
145 BackendInitFailed(String),
146
147 #[error("no GPU device found")]
149 NoDeviceFound,
150
151 #[error("device selection failed: {0}")]
153 DeviceSelectionFailed(String),
154
155 #[error("backend error: {0}")]
157 BackendError(String),
158
159 #[error("deadlock detected")]
162 DeadlockDetected,
163
164 #[error("lock poisoned")]
166 LockPoisoned,
167
168 #[error("channel closed")]
170 ChannelClosed,
171
172 #[error("clock skew too large: {skew_ms}ms (max: {max_ms}ms)")]
175 ClockSkew {
176 skew_ms: u64,
178 max_ms: u64,
180 },
181
182 #[error("invalid timestamp")]
184 InvalidTimestamp,
185
186 #[error("K2K error: {0}")]
189 K2KError(String),
190
191 #[error("K2K destination not found: {0}")]
193 K2KDestinationNotFound(String),
194
195 #[error("K2K delivery failed: {0}")]
197 K2KDeliveryFailed(String),
198
199 #[error("pub/sub error: {0}")]
202 PubSubError(String),
203
204 #[error("topic not found: {0}")]
206 TopicNotFound(String),
207
208 #[error("subscription error: {0}")]
210 SubscriptionError(String),
211
212 #[error("multi-GPU error: {0}")]
215 MultiGpuError(String),
216
217 #[error("device not available: {0}")]
219 DeviceNotAvailable(String),
220
221 #[error("cross-device transfer failed: {0}")]
223 CrossDeviceTransferFailed(String),
224
225 #[error("telemetry error: {0}")]
228 TelemetryError(String),
229
230 #[error("metrics collection failed: {0}")]
232 MetricsCollectionFailed(String),
233
234 #[error("invalid configuration: {0}")]
237 InvalidConfig(String),
238
239 #[error("missing configuration: {0}")]
241 MissingConfig(String),
242
243 #[error("I/O error: {0}")]
246 IoError(#[from] std::io::Error),
247
248 #[error("internal error: {0}")]
251 Internal(String),
252
253 #[error("feature not supported: {0}")]
255 NotSupported(String),
256
257 #[error("operation cancelled")]
259 Cancelled,
260}
261
262impl RingKernelError {
263 pub fn is_recoverable(&self) -> bool {
265 matches!(
266 self,
267 RingKernelError::QueueFull { .. }
268 | RingKernelError::QueueEmpty
269 | RingKernelError::Timeout(_)
270 | RingKernelError::PoolExhausted
271 )
272 }
273
274 pub fn is_resource_error(&self) -> bool {
276 matches!(
277 self,
278 RingKernelError::AllocationFailed { .. }
279 | RingKernelError::HostAllocationFailed { .. }
280 | RingKernelError::OutOfMemory { .. }
281 | RingKernelError::PoolExhausted
282 )
283 }
284
285 pub fn is_fatal(&self) -> bool {
287 matches!(
288 self,
289 RingKernelError::BackendInitFailed(_)
290 | RingKernelError::NoDeviceFound
291 | RingKernelError::LockPoisoned
292 | RingKernelError::Internal(_)
293 )
294 }
295}
296
297#[cfg(test)]
298mod tests {
299 use super::*;
300
301 #[test]
302 fn test_error_display() {
303 let err = RingKernelError::KernelNotFound("test_kernel".to_string());
304 assert_eq!(format!("{}", err), "kernel not found: test_kernel");
305
306 let err = RingKernelError::QueueFull { capacity: 1024 };
307 assert!(format!("{}", err).contains("1024"));
308 }
309
310 #[test]
311 fn test_error_classification() {
312 assert!(RingKernelError::QueueFull { capacity: 1024 }.is_recoverable());
313 assert!(RingKernelError::OutOfMemory {
314 requested: 1000,
315 available: 100
316 }
317 .is_resource_error());
318 assert!(RingKernelError::LockPoisoned.is_fatal());
319 }
320}