Skip to main content

vyre_driver/backend/
error.rs

1//! Actionable backend error taxonomy.
2
3use crate::Error;
4
5/// Machine-readable classification of a backend failure kind.
6///
7/// Use this to drive retry logic, circuit breakers, and alerting rules
8/// without parsing human-readable message strings.
9#[non_exhaustive]
10#[derive(Clone, Copy, Debug, Eq, PartialEq)]
11pub enum ErrorCode {
12    /// Backend device reported insufficient memory.
13    DeviceOutOfMemory,
14    /// The backend does not support a required feature.
15    UnsupportedFeature,
16    /// A lock used by the backend failed to unlock safely.
17    ///
18    /// This is generally caused by a panic while a write guard was held and
19    /// indicates an internal synchronization bug in process state.
20    PoisonedLock,
21    /// GPU kernel-source compilation failed. "Shader" in the variant
22    /// name is historical; the code covers any kernel-source compile
23    /// failure for any backend kernel-source or binary validation.
24    /// A 2.0 rename to `KernelCompileFailed` is tracked in the
25    /// semver-policy doc; the variant stays stable in 0.x.
26    KernelCompileFailed,
27    /// Command dispatch or queue submission failed.
28    DispatchFailed,
29    /// The program itself is invalid for this backend.
30    InvalidProgram,
31    /// Unclassified error (produced by [`BackendError::new`]).
32    Unknown,
33}
34
35impl ErrorCode {
36    /// Stable integer identifier for API consumers and diagnostic catalogs.
37    ///
38    /// These ids are append-only. Existing assignments must not be reused or
39    /// renumbered because downstream systems may persist them in telemetry,
40    /// alert rules, and retry policies.
41    #[must_use]
42    pub const fn stable_id(self) -> u32 {
43        match self {
44            Self::DeviceOutOfMemory => 1001,
45            Self::UnsupportedFeature => 1002,
46            Self::PoisonedLock => 1003,
47            Self::KernelCompileFailed => 1004,
48            Self::DispatchFailed => 1005,
49            Self::InvalidProgram => 1006,
50            Self::Unknown => 1999,
51        }
52    }
53}
54
55/// Actionable backend dispatch failure.
56///
57/// Every error that flows through the frozen `VyreBackend` contract must
58/// include remediation text beginning with `Fix: `. This guarantees that
59/// conform reports are directly actionable for backend authors and that
60/// consumers never receive an opaque failure string.
61///
62/// Prefer specific variants (`DeviceOutOfMemory`, `KernelCompileFailed`,
63/// etc.) over [`BackendError::new`] in new backends. The `Raw` variant
64/// exists solely for backward compatibility with existing call sites.
65///
66/// # Examples
67///
68/// ```
69/// use vyre::BackendError;
70///
71/// let err = BackendError::new("adapter not found. Fix: install a compatible device driver.");
72/// assert!(err.message().contains("Fix:"));
73/// ```
74#[non_exhaustive]
75#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
76pub enum BackendError {
77    /// Device ran out of memory during buffer allocation or dispatch.
78    #[error(
79        "device out of memory: requested {requested} bytes, {available} available.          Fix: reduce buffer sizes or split the dispatch into smaller chunks."
80    )]
81    DeviceOutOfMemory {
82        /// Bytes requested that triggered the OOM condition.
83        requested: u64,
84        /// Bytes reported available at the time of the failure.
85        available: u64,
86    },
87
88    /// The backend does not support a required feature.
89    #[error(
90        "unsupported feature `{name}` on backend `{backend}`.          Fix: check backend capability before using this feature, or select a backend that supports it."
91    )]
92    UnsupportedFeature {
93        /// Feature name (e.g. `"subgroup_ops"`, `"f16"`).
94        name: String,
95        /// Backend identifier (matches [`crate::backend::VyreBackend::id`]).
96        backend: String,
97    },
98
99    /// Internal lock poisoning was detected during backend synchronization.
100    #[error(
101        "backend lock poisoned: {lock_error}. Fix: report the panic origin, prevent panics on lock guards, and retry the backend operation."
102    )]
103    PoisonedLock {
104        /// Diagnostic details from the poison error.
105        lock_error: String,
106    },
107
108    /// GPU kernel-source compilation failed.
109    ///
110    /// "Shader" in the variant name is historical and generalised
111    ///  -  the code applies to any kernel-source compile failure across
112    /// backends. A 2.0 rename to
113    /// `KernelCompileFailed` is tracked in the semver-policy doc.
114    #[error(
115        "kernel-source compile failed on backend `{backend}`: {compiler_message}.          Fix: validate the vyre IR before lowering and check the lowered kernel source for type errors."
116    )]
117    KernelCompileFailed {
118        /// Backend identifier.
119        backend: String,
120        /// Compiler error text or lowered shader / IR excerpt.
121        compiler_message: String,
122    },
123
124    /// Command dispatch or GPU queue submission failed.
125    #[error(
126        "dispatch failed (code {code:?}): {message}.          Fix: verify adapter limits, buffer sizes, and GPU queue health before retrying."
127    )]
128    DispatchFailed {
129        /// Optional backend-specific numeric error code.
130        code: Option<i32>,
131        /// Human-readable failure detail.
132        message: String,
133    },
134
135    /// The program is structurally invalid for this backend.
136    #[error("{fix}")]
137    InvalidProgram {
138        /// Actionable description, should begin with `Fix: `.
139        fix: String,
140    },
141
142    /// Fallback for backends that have not migrated to structured errors.
143    ///
144    /// New backends should use a specific variant. This variant exists
145    /// solely to preserve backward compatibility with [`BackendError::new`].
146    #[error("{0}")]
147    Raw(String),
148}
149
150impl From<crate::Error> for BackendError {
151    fn from(error: crate::Error) -> Self {
152        Self::new(error.to_string())
153    }
154}
155
156impl BackendError {
157    /// Build a fallback [`BackendError::Raw`] after verifying the message is actionable.
158    ///
159    /// If the supplied message already contains a `Fix: ` section it is used
160    /// verbatim. Otherwise a generic fallback hint is appended. Prefer specific
161    /// variants (`DeviceOutOfMemory`, `KernelCompileFailed`, etc.) over this
162    /// constructor in new code.
163    ///
164    /// # Examples
165    ///
166    /// ```
167    /// use vyre::BackendError;
168    ///
169    /// let err = BackendError::new("queue full. Fix: retry with a smaller dispatch size.");
170    /// assert_eq!(err.to_string(), "queue full. Fix: retry with a smaller dispatch size.");
171    /// ```
172    pub fn new(message: impl Into<String>) -> Self {
173        let message = message.into();
174        if message.contains("Fix: ") {
175            return Self::Raw(message);
176        }
177        Self::Raw(format!(
178            "{message}. Fix: include backend-specific recovery guidance."
179        ))
180    }
181
182    /// Build an actionable unsupported-extension error for opaque IR payloads.
183    #[must_use]
184    pub fn unsupported_extension(
185        backend: impl Into<String>,
186        extension_kind: &str,
187        debug_identity: &str,
188    ) -> Self {
189        Self::UnsupportedFeature {
190            name: format!("opaque IR extension `{extension_kind}`/`{debug_identity}`"),
191            backend: backend.into(),
192        }
193    }
194
195    /// Build a structured lock-poisoning error.
196    ///
197    /// This constructor accepts any `PoisonError` from `RwLock` operations
198    /// and returns an actionable error carrying the root poison metadata.
199    pub fn poisoned_lock<T>(error: std::sync::PoisonError<T>) -> Self {
200        Self::PoisonedLock {
201            lock_error: error.to_string(),
202        }
203    }
204
205    /// Human-readable failure message, equivalent to [`ToString::to_string`].
206    ///
207    /// Prefer explicit `match` on variants or [`ErrorCode`] for programmatic
208    /// error handling; avoid string-parsing this output.
209    #[must_use]
210    pub fn message(&self) -> String {
211        self.to_string()
212    }
213
214    /// Consume this error and return its message string.
215    ///
216    /// Useful in `map_err` chains that expect `String`.
217    #[must_use]
218    pub fn into_message(self) -> String {
219        self.to_string()
220    }
221
222    /// Machine-readable error code for programmatic error handling.
223    ///
224    /// Use this to drive retry logic, circuit breakers, and alerting
225    /// without parsing human-readable message strings.
226    #[must_use]
227    pub fn code(&self) -> ErrorCode {
228        match self {
229            Self::DeviceOutOfMemory { .. } => ErrorCode::DeviceOutOfMemory,
230            Self::UnsupportedFeature { .. } => ErrorCode::UnsupportedFeature,
231            Self::PoisonedLock { .. } => ErrorCode::PoisonedLock,
232            Self::KernelCompileFailed { .. } => ErrorCode::KernelCompileFailed,
233            Self::DispatchFailed { .. } => ErrorCode::DispatchFailed,
234            Self::InvalidProgram { .. } => ErrorCode::InvalidProgram,
235            Self::Raw(_) => ErrorCode::Unknown,
236        }
237    }
238}