nvml_wrapper/
error.rs

1use crate::ffi::bindings::*;
2#[cfg(feature = "serde")]
3use serde_derive::{Deserialize, Serialize};
4use thiserror::Error;
5
6#[derive(Debug, Clone, Eq, PartialEq, Hash)]
7#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
8pub enum Bits {
9    U32(u32),
10    U64(u64),
11}
12
13/// An `NvmlError` with an optionally present source error for chaining errors
14#[derive(Error, Debug)]
15#[error("{error}")]
16pub struct NvmlErrorWithSource {
17    pub error: NvmlError,
18    pub source: Option<NvmlError>,
19}
20
21impl From<NvmlError> for NvmlErrorWithSource {
22    fn from(error: NvmlError) -> Self {
23        Self {
24            error,
25            source: None,
26        }
27    }
28}
29
30#[derive(Error, Debug)]
31pub enum NvmlError {
32    #[error("could not interpret string as utf-8")]
33    Utf8Error(#[from] std::str::Utf8Error),
34    #[error("nul byte inside string")]
35    NulError(#[from] std::ffi::NulError),
36    #[error("a libloading error occurred: {0}")]
37    LibloadingError(#[from] libloading::Error),
38
39    /**
40    A function symbol failed to load.
41
42    This variant is constructed with a textual description of a
43    `libloading::Error`. The error variant itself can't be provided because we're
44    unable to take ownership of the error when attempting to use a symbol, and
45    `libloading::Error` doesn't impl `Clone`.
46    */
47    #[error("function symbol failed to load: {0}")]
48    FailedToLoadSymbol(String),
49
50    #[error("max string length was {max_len} but string length is {actual_len}")]
51    StringTooLong { max_len: usize, actual_len: usize },
52
53    #[error("invalid combination of bits ({0:?}) when trying to interpret as bitflags")]
54    IncorrectBits(Bits),
55
56    /**
57    An unexpected enum variant was encountered.
58
59    This error is specific to this Rust wrapper. It is used to represent the
60    possibility that an enum variant that is not defined within the Rust bindings
61    can be returned from a C call.
62
63    The single field contains the value that could not be mapped to a
64    defined enum variant.
65
66    See [this issue](https://github.com/rust-lang/rust/issues/36927).
67    */
68    #[error("unexpected enum variant value: {0}")]
69    UnexpectedVariant(u32),
70
71    #[error("a call to `EventSet.release_events()` failed")]
72    SetReleaseFailed,
73
74    #[error("a call to `Device.pci_info()` failed")]
75    GetPciInfoFailed,
76
77    #[error("a call to `PciInfo.try_into_c()` failed")]
78    PciInfoToCFailed,
79
80    #[error("NVML was not first initialized with `Nvml::init()`")]
81    Uninitialized,
82
83    #[error("a supplied argument was invalid")]
84    InvalidArg,
85
86    #[error("the requested operation is not available on the target device")]
87    NotSupported,
88
89    #[error("the current user does not have permission to perform this operation")]
90    NoPermission,
91
92    #[error("NVML was already initialized")]
93    #[deprecated = "deprecated in NVML (multiple initializations now allowed via refcounting)"]
94    AlreadyInitialized,
95
96    #[error("a query to find an object was unsuccessful")]
97    NotFound,
98
99    /**
100    An input argument is not large enough.
101
102    The single field is the size required for a successful call (if `Some`)
103    and `None` if unknown.
104    */
105    // TODO: verify that ^
106    #[error(
107        "an input argument is not large enough{}",
108        if let Some(size) = .0 {
109            format!(", needs to be at least {}", size)
110        } else {
111            "".into()
112        }
113    )]
114    InsufficientSize(Option<usize>),
115
116    #[error("device's external power cables are not properly attached")]
117    InsufficientPower,
118
119    #[error("NVIDIA driver is not loaded")]
120    DriverNotLoaded,
121
122    #[error("the provided timeout was reached")]
123    Timeout,
124
125    #[error("NVIDIA kernel detected an interrupt issue with a device")]
126    IrqIssue,
127
128    #[error("a shared library couldn't be found or loaded")]
129    LibraryNotFound,
130
131    #[error("a function couldn't be found in a shared library")]
132    FunctionNotFound,
133
134    #[error("the infoROM is corrupted")]
135    CorruptedInfoROM,
136
137    #[error("device fell off the bus or has otherwise become inacessible")]
138    GpuLost,
139
140    #[error("device requires a reset before it can be used again")]
141    ResetRequired,
142
143    #[error("device control has been blocked by the operating system/cgroups")]
144    OperatingSystem,
145
146    #[error("RM detects a driver/library version mismatch")]
147    LibRmVersionMismatch,
148
149    #[error("operation cannot be performed because the GPU is currently in use")]
150    InUse,
151
152    #[error("insufficient memory")]
153    InsufficientMemory,
154
155    #[error("no data")]
156    NoData,
157
158    #[error(
159        "the requested vgpu operation is not available on the target device because \
160        ECC is enabled"
161    )]
162    VgpuEccNotSupported,
163
164    #[error("an internal driver error occured")]
165    Unknown,
166}
167
168/// Converts an `nvmlReturn_t` type into a `Result<(), NvmlError>`.
169#[allow(deprecated)]
170pub fn nvml_try(code: nvmlReturn_t) -> Result<(), NvmlError> {
171    use NvmlError::*;
172
173    match code {
174        nvmlReturn_enum_NVML_SUCCESS => Ok(()),
175        nvmlReturn_enum_NVML_ERROR_UNINITIALIZED => Err(Uninitialized),
176        nvmlReturn_enum_NVML_ERROR_INVALID_ARGUMENT => Err(InvalidArg),
177        nvmlReturn_enum_NVML_ERROR_NOT_SUPPORTED => Err(NotSupported),
178        nvmlReturn_enum_NVML_ERROR_NO_PERMISSION => Err(NoPermission),
179        nvmlReturn_enum_NVML_ERROR_ALREADY_INITIALIZED => Err(AlreadyInitialized),
180        nvmlReturn_enum_NVML_ERROR_NOT_FOUND => Err(NotFound),
181        nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => Err(InsufficientSize(None)),
182        nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_POWER => Err(InsufficientPower),
183        nvmlReturn_enum_NVML_ERROR_DRIVER_NOT_LOADED => Err(DriverNotLoaded),
184        nvmlReturn_enum_NVML_ERROR_TIMEOUT => Err(Timeout),
185        nvmlReturn_enum_NVML_ERROR_IRQ_ISSUE => Err(IrqIssue),
186        nvmlReturn_enum_NVML_ERROR_LIBRARY_NOT_FOUND => Err(LibraryNotFound),
187        nvmlReturn_enum_NVML_ERROR_FUNCTION_NOT_FOUND => Err(FunctionNotFound),
188        nvmlReturn_enum_NVML_ERROR_CORRUPTED_INFOROM => Err(CorruptedInfoROM),
189        nvmlReturn_enum_NVML_ERROR_GPU_IS_LOST => Err(GpuLost),
190        nvmlReturn_enum_NVML_ERROR_RESET_REQUIRED => Err(ResetRequired),
191        nvmlReturn_enum_NVML_ERROR_OPERATING_SYSTEM => Err(OperatingSystem),
192        nvmlReturn_enum_NVML_ERROR_LIB_RM_VERSION_MISMATCH => Err(LibRmVersionMismatch),
193        nvmlReturn_enum_NVML_ERROR_IN_USE => Err(InUse),
194        nvmlReturn_enum_NVML_ERROR_MEMORY => Err(InsufficientMemory),
195        nvmlReturn_enum_NVML_ERROR_NO_DATA => Err(NoData),
196        nvmlReturn_enum_NVML_ERROR_VGPU_ECC_NOT_SUPPORTED => Err(VgpuEccNotSupported),
197        nvmlReturn_enum_NVML_ERROR_UNKNOWN => Err(Unknown),
198        _ => Err(UnexpectedVariant(code)),
199    }
200}
201
202/// Helper to map a `&libloading::Error` into an `NvmlError`
203pub fn nvml_sym<'a, T>(sym: Result<&'a T, &libloading::Error>) -> Result<&'a T, NvmlError> {
204    sym.map_err(|e| NvmlError::FailedToLoadSymbol(e.to_string()))
205}