Skip to main content

nvml_wrapper/
lib.rs

1/*!
2A safe and ergonomic Rust wrapper for the [NVIDIA Management Library][nvml] (NVML),
3a C-based programmatic interface for monitoring and managing various states within
4NVIDIA GPUs.
5
6```
7use nvml_wrapper::Nvml;
8# use nvml_wrapper::error::*;
9# fn test() -> Result<(), NvmlError> {
10
11let nvml = Nvml::init()?;
12// Get the first `Device` (GPU) in the system
13let device = nvml.device_by_index(0)?;
14
15let brand = device.brand()?; // GeForce on my system
16let fan_speed = device.fan_speed(0)?; // Currently 17% on my system
17let power_limit = device.enforced_power_limit()?; // 275k milliwatts on my system
18let encoder_util = device.encoder_utilization()?; // Currently 0 on my system; Not encoding anything
19let memory_info = device.memory_info()?; // Currently 1.63/6.37 GB used on my system
20
21// ... and there's a whole lot more you can do. Most everything in NVML is wrapped and ready to go
22# Ok(())
23# }
24```
25
26NVML is intended to be a platform for building 3rd-party applications, and is
27also the underlying library for NVIDIA's nvidia-smi tool.
28
29## Usage
30
31`nvml-wrapper` builds on top of generated bindings for NVML that make use of the
32[`libloading`][libloading] crate. This means the NVML library gets loaded upon
33calling `Nvml::init` and can return an error if NVML isn't present, making it
34possible to drop NVIDIA-related features in your code at runtime on systems that
35don't have relevant hardware.
36
37Successful execution of `Nvml::init` means:
38
39* The NVML library was present on the system and able to be opened
40* The function symbol to initialize NVML was loaded and called successfully
41* An attempt has been made to load all other NVML function symbols
42
43Every function you call thereafter will individually return an error if it couldn't
44be loaded from the NVML library during the `Nvml::init` call.
45
46Note that it's not advised to repeatedly call `Nvml::init` as the constructor
47has to perform all the work of loading the function symbols from the library
48each time it gets called. Instead, call `Nvml::init` once and store the resulting
49`Nvml` instance somewhere to be accessed throughout the lifetime of your program
50(perhaps in a [`once_cell`][once_cell]).
51
52## NVML Support
53
54This wrapper is being developed against and currently supports NVML version
5511. Each new version of NVML is guaranteed to be backwards-compatible according
56to NVIDIA, so this wrapper should continue to work without issue regardless of
57NVML version bumps.
58
59### Legacy Functions
60
61Sometimes there will be function-level API version bumps in new NVML releases.
62For example:
63
64```text
65nvmlDeviceGetComputeRunningProcesses
66nvmlDeviceGetComputeRunningProcesses_v2
67nvmlDeviceGetComputeRunningProcesses_v3
68```
69
70The older versions of the functions will generally continue to work with the
71newer NVML releases; however, the newer function versions will not work with
72older NVML installs.
73
74By default this wrapper only provides access to the newest function versions.
75Enable the `legacy-functions` feature if you require the ability to call older
76functions.
77
78## MSRV
79
80The Minimum Supported Rust Version is currently 1.51.0. I will not go out of my
81way to avoid bumping this.
82
83## Cargo Features
84
85The `serde` feature can be toggled on in order to `#[derive(Serialize, Deserialize)]`
86for every NVML data structure.
87
88[nvml]: https://developer.nvidia.com/nvidia-management-library-nvml
89[libloading]: https://github.com/nagisa/rust_libloading
90[once_cell]: https://docs.rs/once_cell/latest/once_cell/sync/struct.Lazy.html
91*/
92
93#![recursion_limit = "1024"]
94#![allow(non_upper_case_globals)]
95
96extern crate libloading;
97extern crate nvml_wrapper_sys as ffi;
98
99pub mod bitmasks;
100pub mod device;
101pub mod enum_wrappers;
102pub mod enums;
103pub mod error;
104pub mod event;
105pub mod gpm;
106pub mod high_level;
107pub mod nv_link;
108pub mod struct_wrappers;
109pub mod structs;
110#[cfg(test)]
111mod test_utils;
112pub mod unit;
113pub mod vgpu;
114
115// Re-exports for convenience
116pub use crate::device::Device;
117pub use crate::event::EventSet;
118pub use crate::gpm::GpmSample;
119pub use crate::nv_link::NvLink;
120pub use crate::unit::Unit;
121
122/// Re-exports from `nvml-wrapper-sys` that are necessary for use of this wrapper.
123pub mod sys_exports {
124    /// Use these constants to populate the `structs::device::FieldId` newtype.
125    pub mod field_id {
126        pub use crate::ffi::bindings::field_id::*;
127    }
128}
129
130#[cfg(target_os = "linux")]
131use std::convert::TryInto;
132#[cfg(target_os = "linux")]
133use std::ptr;
134use std::{
135    convert::TryFrom,
136    ffi::{CStr, CString, OsStr},
137    mem::{self, ManuallyDrop},
138    os::raw::{c_int, c_uint},
139};
140
141use static_assertions::assert_impl_all;
142
143#[cfg(target_os = "linux")]
144use crate::enum_wrappers::device::TopologyLevel;
145
146use crate::error::{nvml_sym, nvml_try, NvmlError};
147use crate::ffi::bindings::*;
148
149use crate::struct_wrappers::ExcludedDeviceInfo;
150
151#[cfg(target_os = "linux")]
152use crate::struct_wrappers::device::PciInfo;
153use crate::struct_wrappers::device::VgpuVersion;
154use crate::struct_wrappers::unit::HwbcEntry;
155
156use crate::bitmasks::InitFlags;
157
158#[cfg(not(target_os = "linux"))]
159const LIB_PATH: &str = "nvml.dll";
160
161#[cfg(target_os = "linux")]
162const LIB_PATH: &str = "libnvidia-ml.so.1";
163
164/// Determines the major version of the CUDA driver given the full version.
165///
166/// Obtain the full version via `Nvml.sys_cuda_driver_version()`.
167pub fn cuda_driver_version_major(version: i32) -> i32 {
168    version / 1000
169}
170
171/// Determines the minor version of the CUDA driver given the full version.
172///
173/// Obtain the full version via `NVML.sys_cuda_driver_version()`.
174pub fn cuda_driver_version_minor(version: i32) -> i32 {
175    (version % 1000) / 10
176}
177
178/**
179The main struct that this library revolves around.
180
181According to NVIDIA's documentation, "It is the user's responsibility to call `nvmlInit()`
182before calling any other methods, and `nvmlShutdown()` once NVML is no longer being used."
183This struct is used to enforce those rules.
184
185Also according to NVIDIA's documentation, "NVML is thread-safe so it is safe to make
186simultaneous NVML calls from multiple threads." In the Rust world, this translates to `NVML`
187being `Send` + `Sync`. You can `.clone()` an `Arc` wrapped `NVML` and enjoy using it on any thread.
188
189NOTE: If you care about possible errors returned from `nvmlShutdown()`, use the `.shutdown()`
190method on this struct. **The `Drop` implementation ignores errors.**
191
192When reading documentation on this struct and its members, remember that a lot of it,
193especially in regards to errors returned, is copied from NVIDIA's docs. While they can be found
194online [here](http://docs.nvidia.com/deploy/nvml-api/index.html), the hosted docs sometimes outdated
195and may not accurately reflect the version of NVML that this library is written for; beware. You
196should ideally read the doc comments on an up-to-date NVML API header. Such a header can be
197downloaded as part of the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads).
198*/
199/// Describes which field ID numbering scheme the loaded NVML driver uses for
200/// IDs 251-273. NVIDIA broke ABI compatibility for these IDs between the
201/// original CUDA 13.0 release and CUDA 13.0 Update 1 (driver >= 580.82).
202///
203/// See <https://docs.nvidia.com/deploy/nvml-api/known-issues.html>
204#[derive(Debug, Clone, Copy, PartialEq, Eq)]
205pub enum FieldIdScheme {
206    /// Used by drivers before 580.82 (CUDA 12.x and original CUDA 13.0).
207    /// IDs 251-255 are CLOCKS_EVENT_REASON/POWER_SYNC, 256-273 are PWR_SMOOTHING.
208    V12,
209    /// Used by drivers >= 580.82 (CUDA 13.0 Update 1+).
210    /// IDs 251-268 are PWR_SMOOTHING, 269-273 are CLOCKS_EVENT_REASON/POWER_SYNC.
211    V13Update1,
212}
213
214pub struct Nvml {
215    lib: ManuallyDrop<NvmlLib>,
216    field_id_scheme: FieldIdScheme,
217}
218
219assert_impl_all!(Nvml: Send, Sync);
220
221impl std::fmt::Debug for Nvml {
222    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
223        f.write_str("NVML")
224    }
225}
226
227/// Parse a driver version string (e.g. "580.82.07") and determine the field ID scheme.
228/// Returns `V13Update1` for driver >= 580.82, `V12` otherwise.
229fn detect_field_id_scheme(driver_version: &str) -> FieldIdScheme {
230    let mut parts = driver_version.split('.');
231    let major: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
232    let minor: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
233
234    if major > 580 || (major == 580 && minor >= 82) {
235        FieldIdScheme::V13Update1
236    } else {
237        FieldIdScheme::V12
238    }
239}
240
241/// Translate a field ID from the canonical v12 numbering to the v13U1 numbering.
242/// Only affects IDs in the 251-273 range. IDs outside this range pass through unchanged.
243pub(crate) fn translate_field_id(scheme: FieldIdScheme, id: u32) -> u32 {
244    if scheme == FieldIdScheme::V12 {
245        return id;
246    }
247    // V13Update1 remapping:
248    //   v12 251-255 (CLOCKS_EVENT_REASON/POWER_SYNC) → v13U1 269-273
249    //   v12 256-273 (PWR_SMOOTHING) → v13U1 251-268
250    match id {
251        251..=255 => id + 18,
252        256..=273 => id - 5,
253        other => other,
254    }
255}
256
257impl Nvml {
258    /**
259    Handles NVML initialization and must be called before doing anything else.
260
261    While it is possible to initialize `NVML` multiple times (NVIDIA's docs state
262    that reference counting is used internally), you should strive to initialize
263    `NVML` once at the start of your program's execution; the constructors handle
264    dynamically loading function symbols from the `NVML` lib and are therefore
265    somewhat expensive.
266
267    Note that this will initialize NVML but not any GPUs. This means that NVML can
268    communicate with a GPU even when other GPUs in a system are bad or unstable.
269
270    By default, initialization looks for "libnvidia-ml.so" on linux and "nvml.dll"
271    on Windows. These default names should work for default installs on those
272    platforms; if further specification is required, use `Nvml::builder`.
273
274    # Errors
275
276    * `DriverNotLoaded`, if the NVIDIA driver is not running
277    * `NoPermission`, if NVML does not have permission to talk to the driver
278    * `Unknown`, on any unexpected error
279    */
280    // Checked against local
281    #[doc(alias = "nvmlInit_v2")]
282    pub fn init() -> Result<Self, NvmlError> {
283        Self::init_internal(LIB_PATH)
284    }
285
286    fn init_internal(path: impl AsRef<std::ffi::OsStr>) -> Result<Self, NvmlError> {
287        let lib = unsafe {
288            let lib = NvmlLib::new(path)?;
289            let sym = nvml_sym(lib.nvmlInit_v2.as_ref())?;
290
291            nvml_try(sym())?;
292            ManuallyDrop::new(lib)
293        };
294
295        let mut nvml = Self {
296            lib,
297            field_id_scheme: FieldIdScheme::V12,
298        };
299        nvml.field_id_scheme = nvml
300            .sys_driver_version()
301            .map(|v| detect_field_id_scheme(&v))
302            .unwrap_or(FieldIdScheme::V12);
303
304        Ok(nvml)
305    }
306
307    /**
308    An initialization function that allows you to pass flags to control certain behaviors.
309
310    This is the same as `init()` except for the addition of flags.
311
312    # Errors
313
314    * `DriverNotLoaded`, if the NVIDIA driver is not running
315    * `NoPermission`, if NVML does not have permission to talk to the driver
316    * `Unknown`, on any unexpected error
317
318    # Examples
319
320    ```
321    # use nvml_wrapper::Nvml;
322    # use nvml_wrapper::error::*;
323    use nvml_wrapper::bitmasks::InitFlags;
324
325    # fn main() -> Result<(), NvmlError> {
326    // Don't fail if the system doesn't have any NVIDIA GPUs
327    //
328    // Also, don't attach any GPUs during initialization
329    Nvml::init_with_flags(InitFlags::NO_GPUS | InitFlags::NO_ATTACH)?;
330    # Ok(())
331    # }
332    ```
333    */
334    #[doc(alias = "nvmlInitWithFlags")]
335    pub fn init_with_flags(flags: InitFlags) -> Result<Self, NvmlError> {
336        Self::init_with_flags_internal(LIB_PATH, flags)
337    }
338
339    fn init_with_flags_internal(
340        path: impl AsRef<std::ffi::OsStr>,
341        flags: InitFlags,
342    ) -> Result<Self, NvmlError> {
343        let lib = unsafe {
344            let lib = NvmlLib::new(path)?;
345            let sym = nvml_sym(lib.nvmlInitWithFlags.as_ref())?;
346
347            nvml_try(sym(flags.bits()))?;
348            ManuallyDrop::new(lib)
349        };
350
351        let mut nvml = Self {
352            lib,
353            field_id_scheme: FieldIdScheme::V12,
354        };
355        nvml.field_id_scheme = nvml
356            .sys_driver_version()
357            .map(|v| detect_field_id_scheme(&v))
358            .unwrap_or(FieldIdScheme::V12);
359
360        Ok(nvml)
361    }
362
363    /// Create an `NvmlBuilder` for further flexibility in how NVML is initialized.
364    pub fn builder<'a>() -> NvmlBuilder<'a> {
365        NvmlBuilder::default()
366    }
367
368    /// Get the underlying `NvmlLib` instance.
369    pub fn lib(&self) -> &NvmlLib {
370        &self.lib
371    }
372
373    /// Returns the detected field ID numbering scheme for the loaded driver.
374    pub fn field_id_scheme(&self) -> FieldIdScheme {
375        self.field_id_scheme
376    }
377
378    /**
379    Use this to shutdown NVML and release allocated resources if you care about handling
380    potential errors (*the `Drop` implementation ignores errors!*).
381
382    # Errors
383
384    * `Uninitialized`, if the library has not been successfully initialized
385    * `Unknown`, on any unexpected error
386    */
387    // Thanks to `sorear` on IRC for suggesting this approach
388    // Checked against local
389    // Tested
390    #[doc(alias = "nvmlShutdown")]
391    pub fn shutdown(mut self) -> Result<(), NvmlError> {
392        let sym = nvml_sym(self.lib.nvmlShutdown.as_ref())?;
393
394        unsafe {
395            nvml_try(sym())?;
396        }
397
398        // SAFETY: we `mem::forget(self)` after this, so `self.lib` won't get
399        // touched by our `Drop` impl
400        let lib = unsafe { ManuallyDrop::take(&mut self.lib) };
401        mem::forget(self);
402
403        Ok(lib.__library.close()?)
404    }
405
406    /**
407    Get the number of compute devices in the system (compute device == one GPU).
408
409    Note that this count can include devices you do not have permission to access.
410
411    # Errors
412
413    * `Uninitialized`, if the library has not been successfully initialized
414    * `Unknown`, on any unexpected error
415    */
416    // Checked against local
417    // Tested
418    #[doc(alias = "nvmlDeviceGetCount_v2")]
419    pub fn device_count(&self) -> Result<u32, NvmlError> {
420        let sym = nvml_sym(self.lib.nvmlDeviceGetCount_v2.as_ref())?;
421
422        unsafe {
423            let mut count: c_uint = mem::zeroed();
424            nvml_try(sym(&mut count))?;
425
426            Ok(count)
427        }
428    }
429
430    /**
431    Gets the version of the system's graphics driver and returns it as an alphanumeric
432    string.
433
434    # Errors
435
436    * `Uninitialized`, if the library has not been successfully initialized
437    * `Utf8Error`, if the string obtained from the C function is not valid Utf8
438    */
439    // Checked against local
440    // Tested
441    #[doc(alias = "nvmlSystemGetDriverVersion")]
442    pub fn sys_driver_version(&self) -> Result<String, NvmlError> {
443        let sym = nvml_sym(self.lib.nvmlSystemGetDriverVersion.as_ref())?;
444
445        unsafe {
446            let mut version_vec = vec![0; NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE as usize];
447
448            nvml_try(sym(
449                version_vec.as_mut_ptr(),
450                NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE,
451            ))?;
452
453            let version_raw = CStr::from_ptr(version_vec.as_ptr());
454            Ok(version_raw.to_str()?.into())
455        }
456    }
457
458    /**
459    Gets the version of the system's NVML library and returns it as an alphanumeric
460    string.
461
462    # Errors
463
464    * `Utf8Error`, if the string obtained from the C function is not valid Utf8
465    */
466    // Checked against local
467    // Tested
468    #[doc(alias = "nvmlSystemGetNVMLVersion")]
469    pub fn sys_nvml_version(&self) -> Result<String, NvmlError> {
470        let sym = nvml_sym(self.lib.nvmlSystemGetNVMLVersion.as_ref())?;
471
472        unsafe {
473            let mut version_vec = vec![0; NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE as usize];
474
475            nvml_try(sym(
476                version_vec.as_mut_ptr(),
477                NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE,
478            ))?;
479
480            // Thanks to `Amaranth` on IRC for help with this
481            let version_raw = CStr::from_ptr(version_vec.as_ptr());
482            Ok(version_raw.to_str()?.into())
483        }
484    }
485
486    /**
487    Gets the version of the system's CUDA driver.
488
489    Calls into the CUDA library (cuDriverGetVersion()).
490
491    You can use `cuda_driver_version_major` and `cuda_driver_version_minor`
492    to get the major and minor driver versions from this number.
493
494    # Errors
495
496    * `FunctionNotFound`, if cuDriverGetVersion() is not found in the shared library
497    * `LibraryNotFound`, if libcuda.so.1 or libcuda.dll cannot be found
498    */
499    #[doc(alias = "nvmlSystemGetCudaDriverVersion_v2")]
500    pub fn sys_cuda_driver_version(&self) -> Result<i32, NvmlError> {
501        let sym = nvml_sym(self.lib.nvmlSystemGetCudaDriverVersion_v2.as_ref())?;
502
503        unsafe {
504            let mut version: c_int = mem::zeroed();
505            nvml_try(sym(&mut version))?;
506
507            Ok(version)
508        }
509    }
510
511    /**
512    Gets the name of the process for the given process ID, cropped to the provided length.
513
514    # Errors
515
516    * `Uninitialized`, if the library has not been successfully initialized
517    * `InvalidArg`, if the length is 0 (if this is returned without length being 0, file an issue)
518    * `NotFound`, if the process does not exist
519    * `NoPermission`, if the user doesn't have permission to perform the operation
520    * `Utf8Error`, if the string obtained from the C function is not valid UTF-8. NVIDIA's docs say
521      that the string encoding is ANSI, so this may very well happen.
522    * `Unknown`, on any unexpected error
523    */
524    // TODO: The docs say the string is ANSI-encoded. Not sure if I should try
525    // to do anything about that
526    // Checked against local
527    // Tested
528    #[doc(alias = "nvmlSystemGetProcessName")]
529    pub fn sys_process_name(&self, pid: u32, length: usize) -> Result<String, NvmlError> {
530        let sym = nvml_sym(self.lib.nvmlSystemGetProcessName.as_ref())?;
531
532        unsafe {
533            let mut name_vec = vec![0; length];
534
535            nvml_try(sym(pid, name_vec.as_mut_ptr(), length as c_uint))?;
536
537            let name_raw = CStr::from_ptr(name_vec.as_ptr());
538            Ok(name_raw.to_str()?.into())
539        }
540    }
541
542    /**
543    Acquire the handle for a particular device based on its index (starts at 0).
544
545    Usage of this function causes NVML to initialize the target GPU. Additional
546    GPUs may be initialized if the target GPU is an SLI slave.
547
548    You can determine valid indices by using `.device_count()`. This
549    function doesn't call that for you, but the actual C function to get
550    the device handle will return an error in the case of an invalid index.
551    This means that the `InvalidArg` error will be returned if you pass in
552    an invalid index.
553
554    NVIDIA's docs state that "The order in which NVML enumerates devices has
555    no guarantees of consistency between reboots. For that reason it is recommended
556    that devices be looked up by their PCI ids or UUID." In this library, that translates
557    into usage of `.device_by_uuid()` and `.device_by_pci_bus_id()`.
558
559    The NVML index may not correlate with other APIs such as the CUDA device index.
560
561    # Errors
562
563    * `Uninitialized`, if the library has not been successfully initialized
564    * `InvalidArg`, if index is invalid
565    * `InsufficientPower`, if any attached devices have improperly attached external power cables
566    * `NoPermission`, if the user doesn't have permission to talk to this device
567    * `IrqIssue`, if the NVIDIA kernel detected an interrupt issue with the attached GPUs
568    * `GpuLost`, if the target GPU has fallen off the bus or is otherwise inaccessible
569    * `Unknown`, on any unexpected error
570    */
571    // Checked against local
572    // Tested
573    #[doc(alias = "nvmlDeviceGetHandleByIndex_v2")]
574    pub fn device_by_index(&self, index: u32) -> Result<Device<'_>, NvmlError> {
575        let sym = nvml_sym(self.lib.nvmlDeviceGetHandleByIndex_v2.as_ref())?;
576
577        unsafe {
578            let mut device: nvmlDevice_t = mem::zeroed();
579            nvml_try(sym(index, &mut device))?;
580
581            Ok(Device::new(device, self))
582        }
583    }
584
585    /**
586    Acquire the handle for a particular device based on its PCI bus ID.
587
588    Usage of this function causes NVML to initialize the target GPU. Additional
589    GPUs may be initialized if the target GPU is an SLI slave.
590
591    The bus ID corresponds to the `bus_id` returned by `Device.pci_info()`.
592
593    # Errors
594
595    * `Uninitialized`, if the library has not been successfully initialized
596    * `InvalidArg`, if `pci_bus_id` is invalid
597    * `NotFound`, if `pci_bus_id` does not match a valid device on the system
598    * `InsufficientPower`, if any attached devices have improperly attached external power cables
599    * `NoPermission`, if the user doesn't have permission to talk to this device
600    * `IrqIssue`, if the NVIDIA kernel detected an interrupt issue with the attached GPUs
601    * `GpuLost`, if the target GPU has fallen off the bus or is otherwise inaccessible
602    * `NulError`, for which you can read the docs on `std::ffi::NulError`
603    * `Unknown`, on any unexpected error
604    */
605    // Checked against local
606    // Tested
607    #[doc(alias = "nvmlDeviceGetHandleByPciBusId_v2")]
608    pub fn device_by_pci_bus_id<S: AsRef<str>>(
609        &self,
610        pci_bus_id: S,
611    ) -> Result<Device<'_>, NvmlError>
612    where
613        Vec<u8>: From<S>,
614    {
615        let sym = nvml_sym(self.lib.nvmlDeviceGetHandleByPciBusId_v2.as_ref())?;
616
617        unsafe {
618            let c_string = CString::new(pci_bus_id)?;
619            let mut device: nvmlDevice_t = mem::zeroed();
620
621            nvml_try(sym(c_string.as_ptr(), &mut device))?;
622
623            Ok(Device::new(device, self))
624        }
625    }
626
627    /// Not documenting this because it's deprecated and does not seem to work
628    /// anymore.
629    // Tested (for an error)
630    #[deprecated(note = "use `.device_by_uuid()`, this errors on dual GPU boards")]
631    #[doc(alias = "nvmlDeviceGetHandleBySerial")]
632    pub fn device_by_serial<S: AsRef<str>>(&self, board_serial: S) -> Result<Device<'_>, NvmlError>
633    where
634        Vec<u8>: From<S>,
635    {
636        let sym = nvml_sym(self.lib.nvmlDeviceGetHandleBySerial.as_ref())?;
637
638        unsafe {
639            let c_string = CString::new(board_serial)?;
640            let mut device: nvmlDevice_t = mem::zeroed();
641
642            nvml_try(sym(c_string.as_ptr(), &mut device))?;
643
644            Ok(Device::new(device, self))
645        }
646    }
647
648    /**
649    Acquire the handle for a particular device based on its globally unique immutable
650    UUID.
651
652    Usage of this function causes NVML to initialize the target GPU. Additional
653    GPUs may be initialized as the function called within searches for the target GPU.
654
655    # Errors
656
657    * `Uninitialized`, if the library has not been successfully initialized
658    * `InvalidArg`, if `uuid` is invalid
659    * `NotFound`, if `uuid` does not match a valid device on the system
660    * `InsufficientPower`, if any attached devices have improperly attached external power cables
661    * `IrqIssue`, if the NVIDIA kernel detected an interrupt issue with the attached GPUs
662    * `GpuLost`, if the target GPU has fallen off the bus or is otherwise inaccessible
663    * `NulError`, for which you can read the docs on `std::ffi::NulError`
664    * `Unknown`, on any unexpected error
665
666    NVIDIA doesn't mention `NoPermission` for this one. Strange!
667    */
668    // Checked against local
669    // Tested
670    #[doc(alias = "nvmlDeviceGetHandleByUUID")]
671    pub fn device_by_uuid<S: AsRef<str>>(&self, uuid: S) -> Result<Device<'_>, NvmlError>
672    where
673        Vec<u8>: From<S>,
674    {
675        let sym = nvml_sym(self.lib.nvmlDeviceGetHandleByUUID.as_ref())?;
676
677        unsafe {
678            let c_string = CString::new(uuid)?;
679            let mut device: nvmlDevice_t = mem::zeroed();
680
681            nvml_try(sym(c_string.as_ptr(), &mut device))?;
682
683            Ok(Device::new(device, self))
684        }
685    }
686
687    /**
688    Gets the common ancestor for two devices.
689
690    Note: this is the same as `Device.topology_common_ancestor()`.
691
692    # Errors
693
694    * `InvalidArg`, if the device is invalid
695    * `NotSupported`, if this `Device` or the OS does not support this feature
696    * `UnexpectedVariant`, for which you can read the docs for
697    * `Unknown`, on any unexpected error
698
699    # Platform Support
700
701    Only supports Linux.
702    */
703    // Checked against local
704    // Tested
705    #[cfg(target_os = "linux")]
706    #[doc(alias = "nvmlDeviceGetTopologyCommonAncestor")]
707    pub fn topology_common_ancestor(
708        &self,
709        device1: &Device,
710        device2: &Device,
711    ) -> Result<TopologyLevel, NvmlError> {
712        let sym = nvml_sym(self.lib.nvmlDeviceGetTopologyCommonAncestor.as_ref())?;
713
714        unsafe {
715            let mut level: nvmlGpuTopologyLevel_t = mem::zeroed();
716
717            nvml_try(sym(device1.handle(), device2.handle(), &mut level))?;
718
719            TopologyLevel::try_from(level)
720        }
721    }
722
723    /**
724    Acquire the handle for a particular `Unit` based on its index.
725
726    Valid indices are derived from the count returned by `.unit_count()`.
727    For example, if `unit_count` is 2 the valid indices are 0 and 1, corresponding
728    to UNIT 0 and UNIT 1.
729
730    Note that the order in which NVML enumerates units has no guarantees of
731    consistency between reboots.
732
733    # Errors
734
735    * `Uninitialized`, if the library has not been successfully initialized
736    * `InvalidArg`, if `index` is invalid
737    * `Unknown`, on any unexpected error
738
739    # Device Support
740
741    For S-class products.
742    */
743    // Checked against local
744    // Tested (for an error)
745    #[doc(alias = "nvmlUnitGetHandleByIndex")]
746    pub fn unit_by_index(&self, index: u32) -> Result<Unit<'_>, NvmlError> {
747        let sym = nvml_sym(self.lib.nvmlUnitGetHandleByIndex.as_ref())?;
748
749        unsafe {
750            let mut unit: nvmlUnit_t = mem::zeroed();
751            nvml_try(sym(index as c_uint, &mut unit))?;
752
753            Ok(Unit::new(unit, self))
754        }
755    }
756
757    /**
758    Checks if the passed-in `Device`s are on the same physical board.
759
760    Note: this is the same as `Device.is_on_same_board_as()`.
761
762    # Errors
763
764    * `Uninitialized`, if the library has not been successfully initialized
765    * `InvalidArg`, if either `Device` is invalid
766    * `NotSupported`, if this check is not supported by this `Device`
767    * `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
768    * `Unknown`, on any unexpected error
769    */
770    // Checked against local
771    // Tested
772    #[doc(alias = "nvmlDeviceOnSameBoard")]
773    pub fn are_devices_on_same_board(
774        &self,
775        device1: &Device,
776        device2: &Device,
777    ) -> Result<bool, NvmlError> {
778        let sym = nvml_sym(self.lib.nvmlDeviceOnSameBoard.as_ref())?;
779
780        unsafe {
781            let mut bool_int: c_int = mem::zeroed();
782
783            nvml_try(sym(device1.handle(), device2.handle(), &mut bool_int))?;
784
785            match bool_int {
786                0 => Ok(false),
787                _ => Ok(true),
788            }
789        }
790    }
791
792    /**
793    Gets the set of GPUs that have a CPU affinity with the given CPU number.
794
795    # Errors
796
797    * `InvalidArg`, if `cpu_number` is invalid
798    * `NotSupported`, if this `Device` or the OS does not support this feature
799    * `Unknown`, an error has occurred in the underlying topology discovery
800
801    # Platform Support
802
803    Only supports Linux.
804    */
805    // Tested
806    #[cfg(target_os = "linux")]
807    #[doc(alias = "nvmlSystemGetTopologyGpuSet")]
808    pub fn topology_gpu_set(&self, cpu_number: u32) -> Result<Vec<Device<'_>>, NvmlError> {
809        let sym = nvml_sym(self.lib.nvmlSystemGetTopologyGpuSet.as_ref())?;
810
811        unsafe {
812            let mut count = match self.topology_gpu_set_count(cpu_number)? {
813                0 => return Ok(vec![]),
814                value => value,
815            };
816            let mut devices: Vec<nvmlDevice_t> = vec![mem::zeroed(); count as usize];
817
818            nvml_try(sym(cpu_number, &mut count, devices.as_mut_ptr()))?;
819
820            Ok(devices.into_iter().map(|d| Device::new(d, self)).collect())
821        }
822    }
823
824    // Helper function for the above.
825    #[cfg(target_os = "linux")]
826    fn topology_gpu_set_count(&self, cpu_number: u32) -> Result<c_uint, NvmlError> {
827        let sym = nvml_sym(self.lib.nvmlSystemGetTopologyGpuSet.as_ref())?;
828
829        unsafe {
830            // Indicates that we want the count
831            let mut count: c_uint = 0;
832
833            // Passing null doesn't indicate that we want the count, just allowed
834            nvml_try(sym(cpu_number, &mut count, ptr::null_mut()))?;
835
836            Ok(count)
837        }
838    }
839
840    /**
841    Gets the IDs and firmware versions for any Host Interface Cards in the system.
842
843    # Errors
844
845    * `Uninitialized`, if the library has not been successfully initialized
846
847    # Device Support
848
849    Supports S-class products.
850    */
851    // Checked against local
852    // Tested
853    #[doc(alias = "nvmlSystemGetHicVersion")]
854    pub fn hic_versions(&self) -> Result<Vec<HwbcEntry>, NvmlError> {
855        let sym = nvml_sym(self.lib.nvmlSystemGetHicVersion.as_ref())?;
856
857        unsafe {
858            let mut count: c_uint = match self.hic_count()? {
859                0 => return Ok(vec![]),
860                value => value,
861            };
862            let mut hics: Vec<nvmlHwbcEntry_t> = vec![mem::zeroed(); count as usize];
863
864            nvml_try(sym(&mut count, hics.as_mut_ptr()))?;
865
866            hics.into_iter().map(HwbcEntry::try_from).collect()
867        }
868    }
869
870    /**
871    Gets the count of Host Interface Cards in the system.
872
873    # Errors
874
875    * `Uninitialized`, if the library has not been successfully initialized
876
877    # Device Support
878
879    Supports S-class products.
880    */
881    // Tested as part of the above method
882    #[doc(alias = "nvmlSystemGetHicVersion")]
883    pub fn hic_count(&self) -> Result<u32, NvmlError> {
884        let sym = nvml_sym(self.lib.nvmlSystemGetHicVersion.as_ref())?;
885
886        unsafe {
887            /*
888            NVIDIA doesn't even say that `count` will be set to the count if
889            `InsufficientSize` is returned. But we can assume sanity, right?
890
891            The idea here is:
892            If there are 0 HICs, NVML_SUCCESS is returned, `count` is set
893              to 0. We return count, all good.
894            If there is 1 HIC, NVML_SUCCESS is returned, `count` is set to
895              1. We return count, all good.
896            If there are >= 2 HICs, NVML_INSUFFICIENT_SIZE is returned.
897             `count` is theoretically set to the actual count, and we
898              return it.
899            */
900            let mut count: c_uint = 1;
901            let mut hics: [nvmlHwbcEntry_t; 1] = [mem::zeroed()];
902
903            match sym(&mut count, hics.as_mut_ptr()) {
904                nvmlReturn_enum_NVML_SUCCESS | nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => {
905                    Ok(count)
906                }
907                // We know that this will be an error
908                other => nvml_try(other).map(|_| 0),
909            }
910        }
911    }
912
913    /**
914    Gets the number of units in the system.
915
916    # Errors
917
918    * `Uninitialized`, if the library has not been successfully initialized
919    * `Unknown`, on any unexpected error
920
921    # Device Support
922
923    Supports S-class products.
924    */
925    // Checked against local
926    // Tested
927    #[doc(alias = "nvmlUnitGetCount")]
928    pub fn unit_count(&self) -> Result<u32, NvmlError> {
929        let sym = nvml_sym(self.lib.nvmlUnitGetCount.as_ref())?;
930
931        unsafe {
932            let mut count: c_uint = mem::zeroed();
933            nvml_try(sym(&mut count))?;
934
935            Ok(count)
936        }
937    }
938
939    /**
940    Create an empty set of events.
941
942    # Errors
943
944    * `Uninitialized`, if the library has not been successfully initialized
945    * `Unknown`, on any unexpected error
946
947    # Device Support
948
949    Supports Fermi and newer fully supported devices.
950    */
951    // Checked against local
952    // Tested
953    #[doc(alias = "nvmlEventSetCreate")]
954    pub fn create_event_set(&self) -> Result<EventSet<'_>, NvmlError> {
955        let sym = nvml_sym(self.lib.nvmlEventSetCreate.as_ref())?;
956
957        unsafe {
958            let mut set: nvmlEventSet_t = mem::zeroed();
959            nvml_try(sym(&mut set))?;
960
961            Ok(EventSet::new(set, self))
962        }
963    }
964
965    /**
966    Request the OS and the NVIDIA kernel driver to rediscover a portion of the PCI
967    subsystem in search of GPUs that were previously removed.
968
969    The portion of the PCI tree can be narrowed by specifying a domain, bus, and
970    device in the passed-in `pci_info`. **If all of these fields are zeroes, the
971    entire PCI tree will be searched.** Note that for long-running NVML processes,
972    the enumeration of devices will change based on how many GPUs are discovered
973    and where they are inserted in bus order.
974
975    All newly discovered GPUs will be initialized and have their ECC scrubbed which
976    may take several seconds per GPU. **All device handles are no longer guaranteed
977    to be valid post discovery**. I am not sure if this means **all** device
978    handles, literally, or if NVIDIA is referring to handles that had previously
979    been obtained to devices that were then removed and have now been
980    re-discovered.
981
982    Must be run as administrator.
983
984    # Errors
985
986    * `Uninitialized`, if the library has not been successfully initialized
987    * `OperatingSystem`, if the operating system is denying this feature
988    * `NoPermission`, if the calling process has insufficient permissions to
989      perform this operation
990    * `NulError`, if an issue is encountered when trying to convert a Rust
991      `String` into a `CString`.
992    * `Unknown`, on any unexpected error
993
994    # Device Support
995
996    Supports Pascal and newer fully supported devices.
997
998    Some Kepler devices are also supported (that's all NVIDIA says, no specifics).
999
1000    # Platform Support
1001
1002    Only supports Linux.
1003    */
1004    // TODO: constructor for default pci_infos ^
1005    // Checked against local
1006    // Tested
1007    #[cfg(target_os = "linux")]
1008    #[doc(alias = "nvmlDeviceDiscoverGpus")]
1009    pub fn discover_gpus(&self, pci_info: PciInfo) -> Result<(), NvmlError> {
1010        let sym = nvml_sym(self.lib.nvmlDeviceDiscoverGpus.as_ref())?;
1011
1012        unsafe { nvml_try(sym(&mut pci_info.try_into()?)) }
1013    }
1014
1015    /**
1016    Gets the number of excluded GPU devices in the system.
1017
1018    # Device Support
1019
1020    Supports all devices.
1021    */
1022    #[doc(alias = "nvmlGetExcludedDeviceCount")]
1023    pub fn excluded_device_count(&self) -> Result<u32, NvmlError> {
1024        let sym = nvml_sym(self.lib.nvmlGetExcludedDeviceCount.as_ref())?;
1025
1026        unsafe {
1027            let mut count: c_uint = mem::zeroed();
1028
1029            nvml_try(sym(&mut count))?;
1030            Ok(count)
1031        }
1032    }
1033
1034    /**
1035    Gets information for the specified excluded device.
1036
1037    # Errors
1038
1039    * `InvalidArg`, if the given index is invalid
1040    * `Utf8Error`, if strings obtained from the C function are not valid Utf8
1041
1042    # Device Support
1043
1044    Supports all devices.
1045    */
1046    #[doc(alias = "nvmlGetExcludedDeviceInfoByIndex")]
1047    pub fn excluded_device_info(&self, index: u32) -> Result<ExcludedDeviceInfo, NvmlError> {
1048        let sym = nvml_sym(self.lib.nvmlGetExcludedDeviceInfoByIndex.as_ref())?;
1049
1050        unsafe {
1051            let mut info: nvmlExcludedDeviceInfo_t = mem::zeroed();
1052
1053            nvml_try(sym(index, &mut info))?;
1054            ExcludedDeviceInfo::try_from(info)
1055        }
1056    }
1057
1058    /**
1059    Gets the loaded vGPU list of capabilities
1060
1061    # Errors
1062
1063    * `Uninitialized`, if the library has not been successfully initialized
1064    * `Unknown`, on any unexpected error
1065
1066    # Device Support
1067
1068    Supports all devices.
1069    */
1070    #[doc(alias = "nvmlGetVgpuDriverCapabilities")]
1071    pub fn vgpu_driver_capabilities(
1072        &self,
1073        capability: nvmlVgpuDriverCapability_t,
1074    ) -> Result<u32, NvmlError> {
1075        let sym = nvml_sym(self.lib.nvmlGetVgpuDriverCapabilities.as_ref())?;
1076
1077        unsafe {
1078            let mut mask: u32 = mem::zeroed();
1079
1080            nvml_try(sym(capability, &mut mask))?;
1081            Ok(mask)
1082        }
1083    }
1084
1085    /**
1086    Get the supported and actual vGPU versions range.
1087
1088    # Errors
1089
1090    * `Uninitialized`, if the library has not been successfully initialized
1091    * `Unknown`, on any unexpected error
1092
1093    # Device Support
1094    */
1095    #[doc(alias = "nvmlGetVgpuVersion")]
1096    pub fn vgpu_version(&self) -> Result<(VgpuVersion, VgpuVersion), NvmlError> {
1097        let sym = nvml_sym(self.lib.nvmlGetVgpuVersion.as_ref())?;
1098
1099        unsafe {
1100            let mut supported: nvmlVgpuVersion_t = mem::zeroed();
1101            let mut current: nvmlVgpuVersion_t = mem::zeroed();
1102
1103            nvml_try(sym(&mut supported, &mut current))?;
1104            Ok((VgpuVersion::from(supported), VgpuVersion::from(current)))
1105        }
1106    }
1107
1108    #[doc(alias = "nvmlSetVgpuVersion")]
1109    pub fn set_vgpu_version(&self, version: VgpuVersion) -> Result<(), NvmlError> {
1110        let sym = nvml_sym(self.lib.nvmlSetVgpuVersion.as_ref())?;
1111
1112        unsafe { nvml_try(sym(&mut version.as_c())) }
1113    }
1114}
1115
1116/// This `Drop` implementation ignores errors! Use the `.shutdown()` method on
1117/// the `Nvml` struct
1118/// if you care about handling them.
1119impl Drop for Nvml {
1120    #[doc(alias = "nvmlShutdown")]
1121    fn drop(&mut self) {
1122        unsafe {
1123            self.lib.nvmlShutdown();
1124
1125            // SAFETY: called after the last usage of `self.lib`
1126            ManuallyDrop::drop(&mut self.lib);
1127        }
1128    }
1129}
1130
1131/**
1132A builder struct that provides further flexibility in how NVML is initialized.
1133
1134# Examples
1135
1136Initialize NVML with a non-default name for the shared object file:
1137
1138```
1139use nvml_wrapper::Nvml;
1140use std::ffi::OsStr;
1141
1142let init_result = Nvml::builder().lib_path(OsStr::new("libnvidia-ml-other-name.so")).init();
1143```
1144
1145Initialize NVML with a non-default path to the shared object file:
1146
1147```
1148use nvml_wrapper::Nvml;
1149use std::ffi::OsStr;
1150
1151let init_result = Nvml::builder().lib_path(OsStr::new("/some/path/to/libnvidia-ml.so")).init();
1152```
1153*/
1154#[derive(Debug, Clone, Eq, PartialEq, Default)]
1155pub struct NvmlBuilder<'a> {
1156    lib_path: Option<&'a OsStr>,
1157    flags: InitFlags,
1158}
1159
1160impl<'a> NvmlBuilder<'a> {
1161    /**
1162    Set the path to the NVML lib file.
1163
1164    See [`libloading`'s docs][libloading] for details about how this lib path is
1165    handled.
1166
1167    [libloading]: https://docs.rs/libloading/0.6.6/libloading/struct.Library.html#method.new
1168    */
1169    pub fn lib_path(&mut self, path: &'a OsStr) -> &mut Self {
1170        self.lib_path = Some(path);
1171        self
1172    }
1173
1174    /// Set the `InitFlags` to initialize NVML with.
1175    pub fn flags(&mut self, flags: InitFlags) -> &mut Self {
1176        self.flags = flags;
1177        self
1178    }
1179
1180    /// Perform initialization.
1181    pub fn init(&self) -> Result<Nvml, NvmlError> {
1182        let lib_path = self.lib_path.unwrap_or_else(|| LIB_PATH.as_ref());
1183
1184        if self.flags.is_empty() {
1185            Nvml::init_internal(lib_path)
1186        } else {
1187            Nvml::init_with_flags_internal(lib_path, self.flags)
1188        }
1189    }
1190}
1191
1192#[cfg(test)]
1193mod test {
1194    use super::*;
1195    use crate::bitmasks::InitFlags;
1196    use crate::error::NvmlError;
1197    use crate::test_utils::*;
1198
1199    #[test]
1200    fn init_with_flags() {
1201        Nvml::init_with_flags(InitFlags::NO_GPUS).unwrap();
1202    }
1203
1204    #[test]
1205    fn shutdown() {
1206        test(3, || nvml().shutdown())
1207    }
1208
1209    #[test]
1210    fn device_count() {
1211        test(3, || nvml().device_count())
1212    }
1213
1214    #[test]
1215    fn sys_driver_version() {
1216        test(3, || nvml().sys_driver_version())
1217    }
1218
1219    #[test]
1220    fn sys_nvml_version() {
1221        test(3, || nvml().sys_nvml_version())
1222    }
1223
1224    #[test]
1225    fn sys_cuda_driver_version() {
1226        test(3, || nvml().sys_cuda_driver_version())
1227    }
1228
1229    #[test]
1230    fn sys_cuda_driver_version_major() {
1231        test(3, || {
1232            Ok(cuda_driver_version_major(nvml().sys_cuda_driver_version()?))
1233        })
1234    }
1235
1236    #[test]
1237    fn sys_cuda_driver_version_minor() {
1238        test(3, || {
1239            Ok(cuda_driver_version_minor(nvml().sys_cuda_driver_version()?))
1240        })
1241    }
1242
1243    #[test]
1244    fn sys_process_name() {
1245        let nvml = nvml();
1246        test_with_device(3, &nvml, |device| {
1247            let processes = device.running_graphics_processes()?;
1248            match nvml.sys_process_name(processes[0].pid, 64) {
1249                Err(NvmlError::NoPermission) => Ok("No permission error".into()),
1250                v => v,
1251            }
1252        })
1253    }
1254
1255    #[test]
1256    fn device_by_index() {
1257        let nvml = nvml();
1258        test(3, || nvml.device_by_index(0))
1259    }
1260
1261    #[test]
1262    fn device_by_pci_bus_id() {
1263        let nvml = nvml();
1264        test_with_device(3, &nvml, |device| {
1265            let id = device.pci_info()?.bus_id;
1266            nvml.device_by_pci_bus_id(id)
1267        })
1268    }
1269
1270    // Can't get serial on my machine
1271    #[ignore = "my machine does not support this call"]
1272    #[test]
1273    fn device_by_serial() {
1274        let nvml = nvml();
1275
1276        #[allow(deprecated)]
1277        test_with_device(3, &nvml, |device| {
1278            let serial = device.serial()?;
1279            nvml.device_by_serial(serial)
1280        })
1281    }
1282
1283    #[test]
1284    fn device_by_uuid() {
1285        let nvml = nvml();
1286        test_with_device(3, &nvml, |device| {
1287            let uuid = device.uuid()?;
1288            nvml.device_by_uuid(uuid)
1289        })
1290    }
1291
1292    // I don't have 2 devices
1293    #[ignore = "my machine does not support this call"]
1294    #[cfg(target_os = "linux")]
1295    #[test]
1296    fn topology_common_ancestor() {
1297        let nvml = nvml();
1298        let device1 = device(&nvml);
1299        let device2 = nvml.device_by_index(1).expect("device");
1300
1301        nvml.topology_common_ancestor(&device1, &device2)
1302            .expect("TopologyLevel");
1303    }
1304
1305    // Errors on my machine
1306
1307    #[test]
1308    #[ignore = "my machine does not support this call"]
1309    fn unit_by_index() {
1310        let nvml = nvml();
1311        test(3, || nvml.unit_by_index(0))
1312    }
1313
1314    // I don't have 2 devices
1315    #[ignore = "my machine does not support this call"]
1316    #[test]
1317    fn are_devices_on_same_board() {
1318        let nvml = nvml();
1319        let device1 = device(&nvml);
1320        let device2 = nvml.device_by_index(1).expect("device");
1321
1322        nvml.are_devices_on_same_board(&device1, &device2)
1323            .expect("bool");
1324    }
1325
1326    #[cfg(target_os = "linux")]
1327    #[test]
1328    fn topology_gpu_set() {
1329        let nvml = nvml();
1330        test(3, || nvml.topology_gpu_set(0))
1331    }
1332
1333    #[test]
1334    fn hic_version() {
1335        let nvml = nvml();
1336        test(3, || nvml.hic_versions())
1337    }
1338
1339    #[test]
1340    fn unit_count() {
1341        test(3, || nvml().unit_count())
1342    }
1343
1344    #[test]
1345    fn create_event_set() {
1346        let nvml = nvml();
1347        test(3, || nvml.create_event_set())
1348    }
1349
1350    #[cfg(target_os = "linux")]
1351    #[should_panic(expected = "OperatingSystem")]
1352    #[test]
1353    fn discover_gpus() {
1354        let nvml = nvml();
1355        test_with_device(3, &nvml, |device| {
1356            let pci_info = device.pci_info()?;
1357
1358            // We don't test with admin perms and therefore expect an error
1359            match nvml.discover_gpus(pci_info) {
1360                Err(NvmlError::NoPermission) => panic!("NoPermission"),
1361                other => other,
1362            }
1363        })
1364    }
1365
1366    #[test]
1367    fn excluded_device_count() {
1368        let nvml = nvml();
1369        test(3, || nvml.excluded_device_count())
1370    }
1371
1372    #[test]
1373    fn excluded_device_info() {
1374        let nvml = nvml();
1375
1376        if nvml.excluded_device_count().unwrap() > 0 {
1377            test(3, || nvml.excluded_device_info(0))
1378        }
1379    }
1380
1381    #[test]
1382    fn vgpu_driver_capabilities() {
1383        let nvml = nvml();
1384        test(3, || nvml.vgpu_driver_capabilities(1))
1385    }
1386
1387    #[test]
1388    fn vgpu_version() {
1389        let nvml = nvml();
1390        test(3, || nvml.vgpu_version())
1391    }
1392
1393    #[test]
1394    fn set_vgpu_version() {
1395        let nvml = nvml();
1396        test(3, || nvml.set_vgpu_version(VgpuVersion { min: 0, max: 0 }))
1397    }
1398
1399    #[test]
1400    fn detect_field_id_scheme_v12_drivers() {
1401        assert_eq!(detect_field_id_scheme("575.51.03"), FieldIdScheme::V12);
1402        assert_eq!(detect_field_id_scheme("570.86.16"), FieldIdScheme::V12);
1403        assert_eq!(detect_field_id_scheme("580.65.06"), FieldIdScheme::V12);
1404        assert_eq!(detect_field_id_scheme("580.0.0"), FieldIdScheme::V12);
1405    }
1406
1407    #[test]
1408    fn detect_field_id_scheme_v13u1_drivers() {
1409        assert_eq!(
1410            detect_field_id_scheme("580.82.07"),
1411            FieldIdScheme::V13Update1
1412        );
1413        assert_eq!(
1414            detect_field_id_scheme("580.95.05"),
1415            FieldIdScheme::V13Update1
1416        );
1417        assert_eq!(
1418            detect_field_id_scheme("580.126.09"),
1419            FieldIdScheme::V13Update1
1420        );
1421        assert_eq!(detect_field_id_scheme("581.0.0"), FieldIdScheme::V13Update1);
1422        assert_eq!(detect_field_id_scheme("600.0.0"), FieldIdScheme::V13Update1);
1423    }
1424
1425    #[test]
1426    fn detect_field_id_scheme_malformed() {
1427        assert_eq!(detect_field_id_scheme(""), FieldIdScheme::V12);
1428        assert_eq!(detect_field_id_scheme("garbage"), FieldIdScheme::V12);
1429    }
1430
1431    #[test]
1432    fn translate_field_id_v12_is_noop() {
1433        for id in 0..300 {
1434            assert_eq!(translate_field_id(FieldIdScheme::V12, id), id);
1435        }
1436    }
1437
1438    #[test]
1439    fn translate_field_id_v13u1_remaps_affected_range() {
1440        use crate::ffi::bindings::field_id::*;
1441
1442        // Exhaustive check: CLOCKS_EVENT_REASON/POWER_SYNC (v12: 251-255) → v13U1: 269-273
1443        let v12_clocks_event = [
1444            (NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN, 269),
1445            (NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN, 270),
1446            (NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN, 271),
1447            (NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ, 272),
1448            (NVML_FI_DEV_POWER_SYNC_BALANCING_AF, 273),
1449        ];
1450        for (v12_id, expected_v13u1) in v12_clocks_event {
1451            assert_eq!(
1452                translate_field_id(FieldIdScheme::V13Update1, v12_id),
1453                expected_v13u1,
1454                "v12 ID {v12_id} should map to v13U1 ID {expected_v13u1}"
1455            );
1456        }
1457
1458        // Exhaustive check: PWR_SMOOTHING (v12: 256-273) → v13U1: 251-268
1459        let v12_pwr_smoothing = [
1460            (NVML_FI_PWR_SMOOTHING_ENABLED, 251),
1461            (NVML_FI_PWR_SMOOTHING_PRIV_LVL, 252),
1462            (NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED, 253),
1463            (NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL, 254),
1464            (NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR, 255),
1465            (NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING, 256),
1466            (NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING, 257),
1467            (
1468                NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING,
1469                258,
1470            ),
1471            (NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES, 259),
1472            (NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR, 260),
1473            (NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE, 261),
1474            (NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE, 262),
1475            (NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL, 263),
1476            (NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE, 264),
1477            (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR, 265),
1478            (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE, 266),
1479            (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE, 267),
1480            (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL, 268),
1481        ];
1482        for (v12_id, expected_v13u1) in v12_pwr_smoothing {
1483            assert_eq!(
1484                translate_field_id(FieldIdScheme::V13Update1, v12_id),
1485                expected_v13u1,
1486                "v12 ID {v12_id} should map to v13U1 ID {expected_v13u1}"
1487            );
1488        }
1489
1490        // Verify the mapping is bijective (no collisions) over the full 251-273 range
1491        let mut mapped: Vec<u32> = (251..=273)
1492            .map(|id| translate_field_id(FieldIdScheme::V13Update1, id))
1493            .collect();
1494        mapped.sort();
1495        let expected: Vec<u32> = (251..=273).collect();
1496        assert_eq!(
1497            mapped, expected,
1498            "remapping must be a bijection over 251-273"
1499        );
1500    }
1501
1502    #[test]
1503    fn translate_field_id_v13u1_passthrough_outside_range() {
1504        assert_eq!(translate_field_id(FieldIdScheme::V13Update1, 0), 0);
1505        assert_eq!(translate_field_id(FieldIdScheme::V13Update1, 250), 250);
1506        assert_eq!(translate_field_id(FieldIdScheme::V13Update1, 274), 274);
1507    }
1508}