1use crate::ffi::bindings::*;
2#[cfg(feature = "serde")]
3use serde_derive::{Deserialize, Serialize};
4use thiserror::Error;
5
6#[derive(Debug, Clone, Eq, PartialEq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum Bits {
9 U32(u32),
10 U64(u64),
11}
12
13#[derive(Error, Debug)]
15#[error("{error}")]
16pub struct NvmlErrorWithSource {
17 pub error: NvmlError,
18 pub source: Option<NvmlError>,
19}
20
21impl From<NvmlError> for NvmlErrorWithSource {
22 fn from(error: NvmlError) -> Self {
23 Self {
24 error,
25 source: None,
26 }
27 }
28}
29
30#[derive(Error, Debug)]
31pub enum NvmlError {
32 #[error("could not interpret string as utf-8")]
33 Utf8Error(#[from] std::str::Utf8Error),
34 #[error("nul byte inside string")]
35 NulError(#[from] std::ffi::NulError),
36 #[error("a libloading error occurred: {0}")]
37 LibloadingError(#[from] libloading::Error),
38
39 #[error("function symbol failed to load: {0}")]
48 FailedToLoadSymbol(String),
49
50 #[error("max string length was {max_len} but string length is {actual_len}")]
51 StringTooLong { max_len: usize, actual_len: usize },
52
53 #[error("invalid combination of bits ({0:?}) when trying to interpret as bitflags")]
54 IncorrectBits(Bits),
55
56 #[error("unexpected enum variant value: {0}")]
69 UnexpectedVariant(u32),
70
71 #[error("a call to `EventSet.release_events()` failed")]
72 SetReleaseFailed,
73
74 #[error("a call to `Device.pci_info()` failed")]
75 GetPciInfoFailed,
76
77 #[error("a call to `PciInfo.try_into_c()` failed")]
78 PciInfoToCFailed,
79
80 #[error("NVML was not first initialized with `Nvml::init()`")]
81 Uninitialized,
82
83 #[error("a supplied argument was invalid")]
84 InvalidArg,
85
86 #[error("the requested operation is not available on the target device")]
87 NotSupported,
88
89 #[error("the current user does not have permission to perform this operation")]
90 NoPermission,
91
92 #[error("NVML was already initialized")]
93 #[deprecated = "deprecated in NVML (multiple initializations now allowed via refcounting)"]
94 AlreadyInitialized,
95
96 #[error("a query to find an object was unsuccessful")]
97 NotFound,
98
99 #[error(
107 "an input argument is not large enough{}",
108 if let Some(size) = .0 {
109 format!(", needs to be at least {}", size)
110 } else {
111 "".into()
112 }
113 )]
114 InsufficientSize(Option<usize>),
115
116 #[error("device's external power cables are not properly attached")]
117 InsufficientPower,
118
119 #[error("NVIDIA driver is not loaded")]
120 DriverNotLoaded,
121
122 #[error("the provided timeout was reached")]
123 Timeout,
124
125 #[error("NVIDIA kernel detected an interrupt issue with a device")]
126 IrqIssue,
127
128 #[error("a shared library couldn't be found or loaded")]
129 LibraryNotFound,
130
131 #[error("a function couldn't be found in a shared library")]
132 FunctionNotFound,
133
134 #[error("the infoROM is corrupted")]
135 CorruptedInfoROM,
136
137 #[error("device fell off the bus or has otherwise become inacessible")]
138 GpuLost,
139
140 #[error("device requires a reset before it can be used again")]
141 ResetRequired,
142
143 #[error("device control has been blocked by the operating system/cgroups")]
144 OperatingSystem,
145
146 #[error("RM detects a driver/library version mismatch")]
147 LibRmVersionMismatch,
148
149 #[error("operation cannot be performed because the GPU is currently in use")]
150 InUse,
151
152 #[error("insufficient memory")]
153 InsufficientMemory,
154
155 #[error("no data")]
156 NoData,
157
158 #[error(
159 "the requested vgpu operation is not available on the target device because \
160 ECC is enabled"
161 )]
162 VgpuEccNotSupported,
163
164 #[error("an internal driver error occurred")]
165 Unknown,
166}
167
168pub fn nvml_try(code: nvmlReturn_t) -> Result<(), NvmlError> {
170 if code == nvmlReturn_enum_NVML_SUCCESS {
171 return Ok(());
172 }
173 Err(code.into())
174}
175
176pub fn nvml_try_count(code: nvmlReturn_t) -> Result<(), NvmlError> {
180 if code == nvmlReturn_enum_NVML_SUCCESS || code == nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE
181 {
182 return Ok(());
183 }
184 Err(code.into())
185}
186
187#[allow(deprecated)]
188impl From<nvmlReturn_t> for NvmlError {
189 fn from(value: nvmlReturn_t) -> Self {
190 use NvmlError::*;
191 match value {
192 nvmlReturn_enum_NVML_ERROR_UNINITIALIZED => Uninitialized,
193 nvmlReturn_enum_NVML_ERROR_INVALID_ARGUMENT => InvalidArg,
194 nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED => NotSupported,
195 nvmlReturn_enum_NVML_ERROR_NO_PERMISSION => NoPermission,
196 nvmlReturn_enum_NVML_ERROR_ALREADY_INITIALIZED => AlreadyInitialized,
197 nvmlReturn_enum_NVML_ERROR_NOT_FOUND => NotFound,
198 nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => InsufficientSize(None),
199 nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_POWER => InsufficientPower,
200 nvmlReturn_enum_NVML_ERROR_DRIVER_NOT_LOADED => DriverNotLoaded,
201 nvmlReturn_enum_NVML_ERROR_TIMEOUT => Timeout,
202 nvmlReturn_enum_NVML_ERROR_IRQ_ISSUE => IrqIssue,
203 nvmlReturn_enum_NVML_ERROR_LIBRARY_NOT_FOUND => LibraryNotFound,
204 nvmlReturn_enum_NVML_ERROR_FUNCTION_NOT_FOUND => FunctionNotFound,
205 nvmlReturn_enum_NVML_ERROR_CORRUPTED_INFOROM => CorruptedInfoROM,
206 nvmlReturn_enum_NVML_ERROR_GPU_IS_LOST => GpuLost,
207 nvmlReturn_enum_NVML_ERROR_RESET_REQUIRED => ResetRequired,
208 nvmlReturn_enum_NVML_ERROR_OPERATING_SYSTEM => OperatingSystem,
209 nvmlReturn_enum_NVML_ERROR_LIB_RM_VERSION_MISMATCH => LibRmVersionMismatch,
210 nvmlReturn_enum_NVML_ERROR_IN_USE => InUse,
211 nvmlReturn_enum_NVML_ERROR_MEMORY => InsufficientMemory,
212 nvmlReturn_enum_NVML_ERROR_NO_DATA => NoData,
213 nvmlReturn_enum_NVML_ERROR_VGPU_ECC_NOT_SUPPORTED => VgpuEccNotSupported,
214 nvmlReturn_enum_NVML_ERROR_UNKNOWN => Unknown,
215 _ => UnexpectedVariant(value),
216 }
217 }
218}
219
220#[allow(deprecated)]
221impl From<NvmlError> for nvmlReturn_t {
222 fn from(error: NvmlError) -> Self {
223 use NvmlError::*;
224
225 match error {
226 Uninitialized => nvmlReturn_enum_NVML_ERROR_UNINITIALIZED,
227 InvalidArg => nvmlReturn_enum_NVML_ERROR_INVALID_ARGUMENT,
228 NotSupported => nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED,
229 NoPermission => nvmlReturn_enum_NVML_ERROR_NO_PERMISSION,
230 AlreadyInitialized => nvmlReturn_enum_NVML_ERROR_ALREADY_INITIALIZED,
231 NotFound => nvmlReturn_enum_NVML_ERROR_NOT_FOUND,
232 InsufficientSize(_) => nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE,
233 InsufficientPower => nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_POWER,
234 DriverNotLoaded => nvmlReturn_enum_NVML_ERROR_DRIVER_NOT_LOADED,
235 Timeout => nvmlReturn_enum_NVML_ERROR_TIMEOUT,
236 IrqIssue => nvmlReturn_enum_NVML_ERROR_IRQ_ISSUE,
237 LibraryNotFound => nvmlReturn_enum_NVML_ERROR_LIBRARY_NOT_FOUND,
238 FunctionNotFound => nvmlReturn_enum_NVML_ERROR_FUNCTION_NOT_FOUND,
239 CorruptedInfoROM => nvmlReturn_enum_NVML_ERROR_CORRUPTED_INFOROM,
240 GpuLost => nvmlReturn_enum_NVML_ERROR_GPU_IS_LOST,
241 ResetRequired => nvmlReturn_enum_NVML_ERROR_RESET_REQUIRED,
242 OperatingSystem => nvmlReturn_enum_NVML_ERROR_OPERATING_SYSTEM,
243 LibRmVersionMismatch => nvmlReturn_enum_NVML_ERROR_LIB_RM_VERSION_MISMATCH,
244 InUse => nvmlReturn_enum_NVML_ERROR_IN_USE,
245 InsufficientMemory => nvmlReturn_enum_NVML_ERROR_MEMORY,
246 NoData => nvmlReturn_enum_NVML_ERROR_NO_DATA,
247 VgpuEccNotSupported => nvmlReturn_enum_NVML_ERROR_VGPU_ECC_NOT_SUPPORTED,
248 Unknown => nvmlReturn_enum_NVML_ERROR_UNKNOWN,
249 UnexpectedVariant(code) => code,
250 Utf8Error(_)
252 | NulError(_)
253 | LibloadingError(_)
254 | FailedToLoadSymbol(_)
255 | StringTooLong { .. }
256 | IncorrectBits(_)
257 | SetReleaseFailed
258 | GetPciInfoFailed
259 | PciInfoToCFailed => nvmlReturn_enum_NVML_ERROR_UNKNOWN,
260 }
261 }
262}
263
264pub fn nvml_sym<'a, T>(sym: Result<&'a T, &libloading::Error>) -> Result<&'a T, NvmlError> {
266 sym.map_err(|e| NvmlError::FailedToLoadSymbol(e.to_string()))
267}