Skip to main content

dynamo_runtime/
error.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Dynamo Error System
5//!
6//! This module provides a standardized error type for Dynamo with support for:
7//! - Categorized error types via [`ErrorType`] enum
8//! - Error chaining via the standard [`std::error::Error::source()`] method
9//! - Serialization for network transmission via serde
10//!
11//! # DynamoError
12//!
13//! [`DynamoError`] is the standardized error type for Dynamo. It can be created
14//! directly or converted from any [`std::error::Error`]:
15//!
16//! ```rust,ignore
17//! use dynamo_runtime::error::{DynamoError, ErrorType};
18//!
19//! // Simple error
20//! let err = DynamoError::msg("something failed");
21//!
22//! // Typed error with cause
23//! let cause = std::io::Error::other("io error");
24//! let err = DynamoError::builder()
25//!     .error_type(ErrorType::Unknown)
26//!     .message("operation failed")
27//!     .cause(cause)
28//!     .build();
29//!
30//! // Convert from any std::error::Error
31//! let std_err = std::io::Error::other("io error");
32//! let dynamo_err = DynamoError::from(Box::new(std_err) as Box<dyn std::error::Error>);
33//! ```
34
35use serde::{Deserialize, Serialize};
36use std::fmt;
37
38// ============================================================================
39// ErrorType Enum
40// ============================================================================
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
43pub enum ErrorType {
44    /// Uncategorized or unknown error.
45    Unknown,
46    /// The request contains invalid input (e.g., prompt exceeds context length).
47    InvalidArgument,
48    /// Failed to establish a connection to a remote worker.
49    CannotConnect,
50    /// An established connection was lost unexpectedly.
51    Disconnected,
52    /// A connection or request timed out.
53    ConnectionTimeout,
54    /// The backend accepted the request but stopped responding (stream inactivity timeout).
55    ResponseTimeout,
56    /// The request was cancelled (e.g., client disconnected).
57    Cancelled,
58    /// The system does not have enough resources to handle the request.
59    ResourceExhausted,
60    /// Error originating from a backend engine.
61    Backend(BackendError),
62}
63
64impl fmt::Display for ErrorType {
65    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66        match self {
67            ErrorType::Unknown => write!(f, "Unknown"),
68            ErrorType::InvalidArgument => write!(f, "InvalidArgument"),
69            ErrorType::CannotConnect => write!(f, "CannotConnect"),
70            ErrorType::Disconnected => write!(f, "Disconnected"),
71            ErrorType::ConnectionTimeout => write!(f, "ConnectionTimeout"),
72            ErrorType::ResponseTimeout => write!(f, "ResponseTimeout"),
73            ErrorType::Cancelled => write!(f, "Cancelled"),
74            ErrorType::ResourceExhausted => write!(f, "ResourceExhausted"),
75            ErrorType::Backend(sub) => write!(f, "Backend{sub}"),
76        }
77    }
78}
79
80/// Categorizes errors into a fixed set of standard types.
81///
82/// Consumers (e.g., the migration module) inspect the error type to decide
83/// what action to take, rather than the error defining its own behavior.
84/// Backend engine error subcategories.
85#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
86pub enum BackendError {
87    /// Uncategorized or unknown backend error.
88    Unknown,
89    /// The request contains invalid input (e.g., prompt exceeds context length).
90    InvalidArgument,
91    /// Failed to establish a connection to a remote worker.
92    CannotConnect,
93    /// An established connection was lost unexpectedly.
94    Disconnected,
95    /// A connection or request timed out.
96    ConnectionTimeout,
97    /// The backend accepted the request but stopped responding (stream inactivity timeout).
98    ResponseTimeout,
99    /// The request was cancelled (e.g., client disconnected).
100    Cancelled,
101    /// The engine process has shut down or crashed.
102    EngineShutdown,
103    /// The response stream was terminated before completion (e.g., engine dropped mid-stream).
104    StreamIncomplete,
105}
106
107impl fmt::Display for BackendError {
108    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
109        match self {
110            BackendError::Unknown => write!(f, "Unknown"),
111            BackendError::InvalidArgument => write!(f, "InvalidArgument"),
112            BackendError::CannotConnect => write!(f, "CannotConnect"),
113            BackendError::Disconnected => write!(f, "Disconnected"),
114            BackendError::ConnectionTimeout => write!(f, "ConnectionTimeout"),
115            BackendError::ResponseTimeout => write!(f, "ResponseTimeout"),
116            BackendError::Cancelled => write!(f, "Cancelled"),
117            BackendError::EngineShutdown => write!(f, "EngineShutdown"),
118            BackendError::StreamIncomplete => write!(f, "StreamIncomplete"),
119        }
120    }
121}
122
123// ============================================================================
124// DynamoError - The Standardized Error Type
125// ============================================================================
126
127/// The standardized error type for Dynamo.
128///
129/// `DynamoError` is a serializable, chainable error that:
130/// - Carries an [`ErrorType`] for categorization
131/// - Supports error chaining via [`std::error::Error::source()`]
132/// - Is serializable for network transmission via `Annotated`
133/// - Can be created from any [`std::error::Error`]
134///
135/// # Display
136///
137/// `Display` shows only the current error (standard Rust convention).
138/// Use `source()` to walk the cause chain:
139///
140/// ```rust,ignore
141/// let err = DynamoError::msg("outer");
142/// println!("{}", err); // "Unknown: outer"
143/// ```
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct DynamoError {
146    error_type: ErrorType,
147    message: String,
148    #[serde(default, skip_serializing_if = "Option::is_none")]
149    caused_by: Option<Box<DynamoError>>,
150}
151
152impl DynamoError {
153    /// Create a builder for constructing a `DynamoError`.
154    pub fn builder() -> DynamoErrorBuilder {
155        DynamoErrorBuilder::default()
156    }
157
158    /// Shorthand to create an `Unknown` error with just a message and no cause.
159    pub fn msg(message: impl Into<String>) -> Self {
160        Self::builder().message(message).build()
161    }
162
163    /// Returns the error type.
164    pub fn error_type(&self) -> ErrorType {
165        self.error_type
166    }
167
168    /// Returns the error message.
169    pub fn message(&self) -> &str {
170        &self.message
171    }
172}
173
174impl fmt::Display for DynamoError {
175    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
176        write!(f, "{}: {}", self.error_type, self.message)
177    }
178}
179
180impl std::error::Error for DynamoError {
181    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
182        self.caused_by
183            .as_deref()
184            .map(|e| e as &(dyn std::error::Error + 'static))
185    }
186}
187
188/// Convert from a reference to any `std::error::Error`.
189///
190/// If the error is already a `DynamoError`, it is cloned. Otherwise, it is
191/// wrapped as `ErrorType::Unknown` with the display string as the message.
192/// The source chain is recursively converted, preserving `DynamoError` instances.
193impl<'a> From<&'a (dyn std::error::Error + 'static)> for DynamoError {
194    fn from(err: &'a (dyn std::error::Error + 'static)) -> Self {
195        if let Some(dynamo_err) = err.downcast_ref::<DynamoError>() {
196            return dynamo_err.clone();
197        }
198
199        Self {
200            error_type: ErrorType::Unknown,
201            message: err.to_string(),
202            caused_by: err.source().map(|s| Box::new(DynamoError::from(s))),
203        }
204    }
205}
206
207/// Convert from an owned boxed `std::error::Error`.
208///
209/// If the error is already a `DynamoError`, ownership is taken without cloning.
210/// Otherwise, falls back to the reference-based conversion.
211impl From<Box<dyn std::error::Error + 'static>> for DynamoError {
212    fn from(err: Box<dyn std::error::Error + 'static>) -> Self {
213        match err.downcast::<DynamoError>() {
214            Ok(dynamo_err) => *dynamo_err,
215            Err(err) => DynamoError::from(&*err as &(dyn std::error::Error + 'static)),
216        }
217    }
218}
219
220// ============================================================================
221// DynamoErrorBuilder
222// ============================================================================
223
224/// Builder for constructing a [`DynamoError`].
225///
226/// # Example
227/// ```rust,ignore
228/// let err = DynamoError::builder()
229///     .error_type(ErrorType::Disconnected)
230///     .message("worker lost")
231///     .cause(some_io_error)
232///     .build();
233/// ```
234#[derive(Default)]
235pub struct DynamoErrorBuilder {
236    error_type: Option<ErrorType>,
237    message: Option<String>,
238    caused_by: Option<Box<DynamoError>>,
239}
240
241impl DynamoErrorBuilder {
242    /// Set the error type.
243    pub fn error_type(mut self, error_type: ErrorType) -> Self {
244        self.error_type = Some(error_type);
245        self
246    }
247
248    /// Set the error message.
249    pub fn message(mut self, message: impl Into<String>) -> Self {
250        self.message = Some(message.into());
251        self
252    }
253
254    /// Set the cause from any `std::error::Error`.
255    ///
256    /// If the cause is already a `DynamoError`, it is preserved as-is.
257    /// Otherwise, it is converted to a `DynamoError` with `ErrorType::Unknown`.
258    pub fn cause(mut self, cause: impl std::error::Error + 'static) -> Self {
259        self.caused_by = Some(Box::new(DynamoError::from(
260            &cause as &(dyn std::error::Error + 'static),
261        )));
262        self
263    }
264
265    /// Build the `DynamoError`.
266    ///
267    /// Defaults: `error_type` → `Unknown`, `message` → `""`, `cause` → `None`.
268    pub fn build(self) -> DynamoError {
269        DynamoError {
270            error_type: self.error_type.unwrap_or(ErrorType::Unknown),
271            message: self.message.unwrap_or_default(),
272            caused_by: self.caused_by,
273        }
274    }
275}
276
277// ============================================================================
278// Utility Functions
279// ============================================================================
280
281/// Check whether an error chain contains a specific set of error types
282/// while not containing any of the excluded error types.
283///
284/// Walks the chain via `source()`, inspecting each error that can be downcast
285/// to `DynamoError`. Returns `false` immediately if any error's type is in
286/// `exclude_set`. Otherwise, returns `true` if at least one error's type is
287/// in `match_set`. Errors that are not `DynamoError` are skipped.
288pub fn match_error_chain(
289    err: &(dyn std::error::Error + 'static),
290    match_set: &[ErrorType],
291    exclude_set: &[ErrorType],
292) -> bool {
293    let mut found = false;
294    let mut current: Option<&(dyn std::error::Error + 'static)> = Some(err);
295
296    while let Some(e) = current {
297        if let Some(dynamo_err) = e.downcast_ref::<DynamoError>() {
298            if exclude_set.contains(&dynamo_err.error_type()) {
299                return false;
300            }
301            if match_set.contains(&dynamo_err.error_type()) {
302                found = true;
303            }
304        }
305        current = e.source();
306    }
307
308    found
309}
310
311// ============================================================================
312// Tests
313// ============================================================================
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use std::error::Error;
319
320    // Compile-time assertions that DynamoError is std::error::Error + Send + Sync + 'static.
321    // These fail at compile time if a future change breaks these guarantees.
322    const _: () = {
323        fn assert_stderror<T: std::error::Error>() {}
324        fn assert_send<T: Send>() {}
325        fn assert_sync<T: Sync>() {}
326        fn assert_static<T: 'static>() {}
327        fn assert_all() {
328            assert_stderror::<DynamoError>();
329            assert_send::<DynamoError>();
330            assert_sync::<DynamoError>();
331            assert_static::<DynamoError>();
332        }
333    };
334
335    #[test]
336    fn test_msg_constructor() {
337        let err = DynamoError::msg("something failed");
338        assert_eq!(err.error_type(), ErrorType::Unknown);
339        assert_eq!(err.message(), "something failed");
340        assert!(err.source().is_none());
341    }
342
343    #[test]
344    fn test_new_constructor_with_cause() {
345        let cause = std::io::Error::other("io error");
346        let err = DynamoError::builder()
347            .error_type(ErrorType::Unknown)
348            .message("operation failed")
349            .cause(cause)
350            .build();
351
352        assert_eq!(err.error_type(), ErrorType::Unknown);
353        assert_eq!(err.message(), "operation failed");
354        assert!(err.source().is_some());
355    }
356
357    #[test]
358    fn test_display_shows_only_current_error() {
359        let cause = std::io::Error::other("io error");
360        let err = DynamoError::builder()
361            .error_type(ErrorType::Unknown)
362            .message("operation failed")
363            .cause(cause)
364            .build();
365
366        // Display should only show the current error, not the chain
367        assert_eq!(err.to_string(), "Unknown: operation failed");
368    }
369
370    #[test]
371    fn test_source_chain() {
372        let cause = std::io::Error::other("io error");
373        let err = DynamoError::builder()
374            .error_type(ErrorType::Unknown)
375            .message("operation failed")
376            .cause(cause)
377            .build();
378
379        // source() should return the cause
380        let source = err.source().unwrap();
381        assert!(source.to_string().contains("io error"));
382    }
383
384    #[test]
385    fn test_from_boxed_std_error() {
386        let std_err = std::io::Error::other("io error");
387        let boxed: Box<dyn std::error::Error> = Box::new(std_err);
388        let dynamo_err = DynamoError::from(boxed);
389
390        assert_eq!(dynamo_err.error_type(), ErrorType::Unknown);
391        assert_eq!(dynamo_err.message(), "io error");
392    }
393
394    #[test]
395    fn test_from_boxed_takes_ownership_of_dynamo_error() {
396        let inner = DynamoError::msg("original");
397        let boxed: Box<dyn std::error::Error> = Box::new(inner);
398        let dynamo_err = DynamoError::from(boxed);
399
400        // Should take ownership, not clone or wrap
401        assert_eq!(dynamo_err.error_type(), ErrorType::Unknown);
402        assert_eq!(dynamo_err.message(), "original");
403    }
404
405    #[test]
406    fn test_from_boxed_with_source_chain() {
407        #[derive(Debug)]
408        struct OuterError {
409            source: std::io::Error,
410        }
411
412        impl fmt::Display for OuterError {
413            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
414                write!(f, "outer error occurred")
415            }
416        }
417
418        impl std::error::Error for OuterError {
419            fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
420                Some(&self.source)
421            }
422        }
423
424        let inner = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found");
425        let outer = OuterError { source: inner };
426        let boxed: Box<dyn std::error::Error> = Box::new(outer);
427        let dynamo_err = DynamoError::from(boxed);
428
429        assert_eq!(dynamo_err.message(), "outer error occurred");
430        assert!(dynamo_err.source().is_some());
431
432        let cause = dynamo_err.source().unwrap();
433        assert!(cause.to_string().contains("file not found"));
434    }
435
436    #[test]
437    fn test_serialization_roundtrip() {
438        let cause = DynamoError::msg("inner cause");
439        let err = DynamoError::builder()
440            .error_type(ErrorType::Unknown)
441            .message("outer error")
442            .cause(cause)
443            .build();
444
445        let json = serde_json::to_string(&err).unwrap();
446        let deserialized: DynamoError = serde_json::from_str(&json).unwrap();
447
448        assert_eq!(deserialized.error_type(), ErrorType::Unknown);
449        assert_eq!(deserialized.message(), "outer error");
450        assert!(deserialized.source().is_some());
451
452        let cause = deserialized
453            .source()
454            .unwrap()
455            .downcast_ref::<DynamoError>()
456            .unwrap();
457        assert_eq!(cause.message(), "inner cause");
458    }
459
460    #[test]
461    fn test_error_type_display() {
462        assert_eq!(ErrorType::Unknown.to_string(), "Unknown");
463        assert_eq!(ErrorType::InvalidArgument.to_string(), "InvalidArgument");
464        assert_eq!(ErrorType::CannotConnect.to_string(), "CannotConnect");
465        assert_eq!(ErrorType::Disconnected.to_string(), "Disconnected");
466        assert_eq!(
467            ErrorType::ConnectionTimeout.to_string(),
468            "ConnectionTimeout"
469        );
470        assert_eq!(ErrorType::ResponseTimeout.to_string(), "ResponseTimeout");
471        assert_eq!(ErrorType::Cancelled.to_string(), "Cancelled");
472        assert_eq!(
473            ErrorType::Backend(BackendError::Unknown).to_string(),
474            "BackendUnknown"
475        );
476        assert_eq!(
477            ErrorType::Backend(BackendError::InvalidArgument).to_string(),
478            "BackendInvalidArgument"
479        );
480        assert_eq!(
481            ErrorType::Backend(BackendError::CannotConnect).to_string(),
482            "BackendCannotConnect"
483        );
484        assert_eq!(
485            ErrorType::Backend(BackendError::Disconnected).to_string(),
486            "BackendDisconnected"
487        );
488        assert_eq!(
489            ErrorType::Backend(BackendError::ConnectionTimeout).to_string(),
490            "BackendConnectionTimeout"
491        );
492        assert_eq!(
493            ErrorType::Backend(BackendError::Cancelled).to_string(),
494            "BackendCancelled"
495        );
496        assert_eq!(
497            ErrorType::Backend(BackendError::EngineShutdown).to_string(),
498            "BackendEngineShutdown"
499        );
500        assert_eq!(
501            ErrorType::Backend(BackendError::StreamIncomplete).to_string(),
502            "BackendStreamIncomplete"
503        );
504        assert_eq!(
505            ErrorType::Backend(BackendError::ResponseTimeout).to_string(),
506            "BackendResponseTimeout"
507        );
508    }
509}