vyre_driver/backend/error.rs
1//! Actionable backend error taxonomy.
2
3use crate::Error;
4
5/// Machine-readable classification of a backend failure kind.
6///
7/// Use this to drive retry logic, circuit breakers, and alerting rules
8/// without parsing human-readable message strings.
9#[non_exhaustive]
10#[derive(Clone, Copy, Debug, Eq, PartialEq)]
11pub enum ErrorCode {
12 /// Backend device reported insufficient memory.
13 DeviceOutOfMemory,
14 /// The backend does not support a required feature.
15 UnsupportedFeature,
16 /// A lock used by the backend failed to unlock safely.
17 ///
18 /// This is generally caused by a panic while a write guard was held and
19 /// indicates an internal synchronization bug in process state.
20 PoisonedLock,
21 /// GPU kernel-source compilation failed. "Shader" in the variant
22 /// name is historical; the code covers any kernel-source compile
23 /// failure for any backend kernel-source or binary validation.
24 /// A 2.0 rename to `KernelCompileFailed` is tracked in the
25 /// semver-policy doc; the variant stays stable in 0.x.
26 KernelCompileFailed,
27 /// Command dispatch or queue submission failed.
28 DispatchFailed,
29 /// The program itself is invalid for this backend.
30 InvalidProgram,
31 /// Unclassified error (produced by [`BackendError::new`]).
32 Unknown,
33}
34
35impl ErrorCode {
36 /// Stable integer identifier for API consumers and diagnostic catalogs.
37 ///
38 /// These ids are append-only. Existing assignments must not be reused or
39 /// renumbered because downstream systems may persist them in telemetry,
40 /// alert rules, and retry policies.
41 #[must_use]
42 pub const fn stable_id(self) -> u32 {
43 match self {
44 Self::DeviceOutOfMemory => 1001,
45 Self::UnsupportedFeature => 1002,
46 Self::PoisonedLock => 1003,
47 Self::KernelCompileFailed => 1004,
48 Self::DispatchFailed => 1005,
49 Self::InvalidProgram => 1006,
50 Self::Unknown => 1999,
51 }
52 }
53}
54
55/// Actionable backend dispatch failure.
56///
57/// Every error that flows through the frozen `VyreBackend` contract must
58/// include remediation text beginning with `Fix: `. This guarantees that
59/// conform reports are directly actionable for backend authors and that
60/// consumers never receive an opaque failure string.
61///
62/// Prefer specific variants (`DeviceOutOfMemory`, `KernelCompileFailed`,
63/// etc.) over [`BackendError::new`] in new backends. The `Raw` variant
64/// exists solely for backward compatibility with existing call sites.
65///
66/// # Examples
67///
68/// ```
69/// use vyre::BackendError;
70///
71/// let err = BackendError::new("adapter not found. Fix: install a compatible device driver.");
72/// assert!(err.message().contains("Fix:"));
73/// ```
74#[non_exhaustive]
75#[derive(Clone, Debug, Eq, PartialEq, thiserror::Error)]
76pub enum BackendError {
77 /// Device ran out of memory during buffer allocation or dispatch.
78 #[error(
79 "device out of memory: requested {requested} bytes, {available} available. Fix: reduce buffer sizes or split the dispatch into smaller chunks."
80 )]
81 DeviceOutOfMemory {
82 /// Bytes requested that triggered the OOM condition.
83 requested: u64,
84 /// Bytes reported available at the time of the failure.
85 available: u64,
86 },
87
88 /// The backend does not support a required feature.
89 #[error(
90 "unsupported feature `{name}` on backend `{backend}`. Fix: check backend capability before using this feature, or select a backend that supports it."
91 )]
92 UnsupportedFeature {
93 /// Feature name (e.g. `"subgroup_ops"`, `"f16"`).
94 name: String,
95 /// Backend identifier (matches [`crate::backend::VyreBackend::id`]).
96 backend: String,
97 },
98
99 /// Internal lock poisoning was detected during backend synchronization.
100 #[error(
101 "backend lock poisoned: {lock_error}. Fix: report the panic origin, prevent panics on lock guards, and retry the backend operation."
102 )]
103 PoisonedLock {
104 /// Diagnostic details from the poison error.
105 lock_error: String,
106 },
107
108 /// GPU kernel-source compilation failed.
109 ///
110 /// "Shader" in the variant name is historical and generalised
111 /// - the code applies to any kernel-source compile failure across
112 /// backends. A 2.0 rename to
113 /// `KernelCompileFailed` is tracked in the semver-policy doc.
114 #[error(
115 "kernel-source compile failed on backend `{backend}`: {compiler_message}. Fix: validate the vyre IR before lowering and check the lowered kernel source for type errors."
116 )]
117 KernelCompileFailed {
118 /// Backend identifier.
119 backend: String,
120 /// Compiler error text or lowered shader / IR excerpt.
121 compiler_message: String,
122 },
123
124 /// Command dispatch or GPU queue submission failed.
125 #[error(
126 "dispatch failed (code {code:?}): {message}. Fix: verify adapter limits, buffer sizes, and GPU queue health before retrying."
127 )]
128 DispatchFailed {
129 /// Optional backend-specific numeric error code.
130 code: Option<i32>,
131 /// Human-readable failure detail.
132 message: String,
133 },
134
135 /// The program is structurally invalid for this backend.
136 #[error("{fix}")]
137 InvalidProgram {
138 /// Actionable description, should begin with `Fix: `.
139 fix: String,
140 },
141
142 /// Fallback for backends that have not migrated to structured errors.
143 ///
144 /// New backends should use a specific variant. This variant exists
145 /// solely to preserve backward compatibility with [`BackendError::new`].
146 #[error("{0}")]
147 Raw(String),
148}
149
150impl From<crate::Error> for BackendError {
151 fn from(error: crate::Error) -> Self {
152 Self::new(error.to_string())
153 }
154}
155
156impl BackendError {
157 /// Build a fallback [`BackendError::Raw`] after verifying the message is actionable.
158 ///
159 /// If the supplied message already contains a `Fix: ` section it is used
160 /// verbatim. Otherwise a generic fallback hint is appended. Prefer specific
161 /// variants (`DeviceOutOfMemory`, `KernelCompileFailed`, etc.) over this
162 /// constructor in new code.
163 ///
164 /// # Examples
165 ///
166 /// ```
167 /// use vyre::BackendError;
168 ///
169 /// let err = BackendError::new("queue full. Fix: retry with a smaller dispatch size.");
170 /// assert_eq!(err.to_string(), "queue full. Fix: retry with a smaller dispatch size.");
171 /// ```
172 pub fn new(message: impl Into<String>) -> Self {
173 let message = message.into();
174 if message.contains("Fix: ") {
175 return Self::Raw(message);
176 }
177 Self::Raw(format!(
178 "{message}. Fix: include backend-specific recovery guidance."
179 ))
180 }
181
182 /// Build an actionable unsupported-extension error for opaque IR payloads.
183 #[must_use]
184 pub fn unsupported_extension(
185 backend: impl Into<String>,
186 extension_kind: &str,
187 debug_identity: &str,
188 ) -> Self {
189 Self::UnsupportedFeature {
190 name: format!("opaque IR extension `{extension_kind}`/`{debug_identity}`"),
191 backend: backend.into(),
192 }
193 }
194
195 /// Build a structured lock-poisoning error.
196 ///
197 /// This constructor accepts any `PoisonError` from `RwLock` operations
198 /// and returns an actionable error carrying the root poison metadata.
199 pub fn poisoned_lock<T>(error: std::sync::PoisonError<T>) -> Self {
200 Self::PoisonedLock {
201 lock_error: error.to_string(),
202 }
203 }
204
205 /// Human-readable failure message, equivalent to [`ToString::to_string`].
206 ///
207 /// Prefer explicit `match` on variants or [`ErrorCode`] for programmatic
208 /// error handling; avoid string-parsing this output.
209 #[must_use]
210 pub fn message(&self) -> String {
211 self.to_string()
212 }
213
214 /// Consume this error and return its message string.
215 ///
216 /// Useful in `map_err` chains that expect `String`.
217 #[must_use]
218 pub fn into_message(self) -> String {
219 self.to_string()
220 }
221
222 /// Machine-readable error code for programmatic error handling.
223 ///
224 /// Use this to drive retry logic, circuit breakers, and alerting
225 /// without parsing human-readable message strings.
226 #[must_use]
227 pub fn code(&self) -> ErrorCode {
228 match self {
229 Self::DeviceOutOfMemory { .. } => ErrorCode::DeviceOutOfMemory,
230 Self::UnsupportedFeature { .. } => ErrorCode::UnsupportedFeature,
231 Self::PoisonedLock { .. } => ErrorCode::PoisonedLock,
232 Self::KernelCompileFailed { .. } => ErrorCode::KernelCompileFailed,
233 Self::DispatchFailed { .. } => ErrorCode::DispatchFailed,
234 Self::InvalidProgram { .. } => ErrorCode::InvalidProgram,
235 Self::Raw(_) => ErrorCode::Unknown,
236 }
237 }
238}