nvml_wrapper/lib.rs
1/*!
2A safe and ergonomic Rust wrapper for the [NVIDIA Management Library][nvml] (NVML),
3a C-based programmatic interface for monitoring and managing various states within
4NVIDIA GPUs.
5
6```
7use nvml_wrapper::Nvml;
8# use nvml_wrapper::error::*;
9# fn test() -> Result<(), NvmlError> {
10
11let nvml = Nvml::init()?;
12// Get the first `Device` (GPU) in the system
13let device = nvml.device_by_index(0)?;
14
15let brand = device.brand()?; // GeForce on my system
16let fan_speed = device.fan_speed(0)?; // Currently 17% on my system
17let power_limit = device.enforced_power_limit()?; // 275k milliwatts on my system
18let encoder_util = device.encoder_utilization()?; // Currently 0 on my system; Not encoding anything
19let memory_info = device.memory_info()?; // Currently 1.63/6.37 GB used on my system
20
21// ... and there's a whole lot more you can do. Most everything in NVML is wrapped and ready to go
22# Ok(())
23# }
24```
25
26NVML is intended to be a platform for building 3rd-party applications, and is
27also the underlying library for NVIDIA's nvidia-smi tool.
28
29## Usage
30
31`nvml-wrapper` builds on top of generated bindings for NVML that make use of the
32[`libloading`][libloading] crate. This means the NVML library gets loaded upon
33calling `Nvml::init` and can return an error if NVML isn't present, making it
34possible to drop NVIDIA-related features in your code at runtime on systems that
35don't have relevant hardware.
36
37Successful execution of `Nvml::init` means:
38
39* The NVML library was present on the system and able to be opened
40* The function symbol to initialize NVML was loaded and called successfully
41* An attempt has been made to load all other NVML function symbols
42
43Every function you call thereafter will individually return an error if it couldn't
44be loaded from the NVML library during the `Nvml::init` call.
45
46Note that it's not advised to repeatedly call `Nvml::init` as the constructor
47has to perform all the work of loading the function symbols from the library
48each time it gets called. Instead, call `Nvml::init` once and store the resulting
49`Nvml` instance somewhere to be accessed throughout the lifetime of your program
50(perhaps in a [`once_cell`][once_cell]).
51
52## NVML Support
53
54This wrapper is being developed against and currently supports NVML version
5511. Each new version of NVML is guaranteed to be backwards-compatible according
56to NVIDIA, so this wrapper should continue to work without issue regardless of
57NVML version bumps.
58
59### Legacy Functions
60
61Sometimes there will be function-level API version bumps in new NVML releases.
62For example:
63
64```text
65nvmlDeviceGetComputeRunningProcesses
66nvmlDeviceGetComputeRunningProcesses_v2
67nvmlDeviceGetComputeRunningProcesses_v3
68```
69
70The older versions of the functions will generally continue to work with the
71newer NVML releases; however, the newer function versions will not work with
72older NVML installs.
73
74By default this wrapper only provides access to the newest function versions.
75Enable the `legacy-functions` feature if you require the ability to call older
76functions.
77
78## MSRV
79
80The Minimum Supported Rust Version is currently 1.51.0. I will not go out of my
81way to avoid bumping this.
82
83## Cargo Features
84
85The `serde` feature can be toggled on in order to `#[derive(Serialize, Deserialize)]`
86for every NVML data structure.
87
88[nvml]: https://developer.nvidia.com/nvidia-management-library-nvml
89[libloading]: https://github.com/nagisa/rust_libloading
90[once_cell]: https://docs.rs/once_cell/latest/once_cell/sync/struct.Lazy.html
91*/
92
93#![recursion_limit = "1024"]
94#![allow(non_upper_case_globals)]
95
96extern crate libloading;
97extern crate nvml_wrapper_sys as ffi;
98
99pub mod bitmasks;
100pub mod device;
101pub mod enum_wrappers;
102pub mod enums;
103pub mod error;
104pub mod event;
105pub mod gpm;
106pub mod high_level;
107pub mod nv_link;
108pub mod struct_wrappers;
109pub mod structs;
110#[cfg(test)]
111mod test_utils;
112pub mod unit;
113pub mod vgpu;
114
115// Re-exports for convenience
116pub use crate::device::Device;
117pub use crate::event::EventSet;
118pub use crate::gpm::GpmSample;
119pub use crate::nv_link::NvLink;
120pub use crate::unit::Unit;
121
122/// Re-exports from `nvml-wrapper-sys` that are necessary for use of this wrapper.
123pub mod sys_exports {
124 /// Use these constants to populate the `structs::device::FieldId` newtype.
125 pub mod field_id {
126 pub use crate::ffi::bindings::field_id::*;
127 }
128}
129
130#[cfg(target_os = "linux")]
131use std::convert::TryInto;
132#[cfg(target_os = "linux")]
133use std::ptr;
134use std::{
135 convert::TryFrom,
136 ffi::{CStr, CString, OsStr},
137 mem::{self, ManuallyDrop},
138 os::raw::{c_int, c_uint},
139};
140
141use static_assertions::assert_impl_all;
142
143#[cfg(target_os = "linux")]
144use crate::enum_wrappers::device::TopologyLevel;
145
146use crate::error::{nvml_sym, nvml_try, NvmlError};
147use crate::ffi::bindings::*;
148
149use crate::struct_wrappers::ExcludedDeviceInfo;
150
151#[cfg(target_os = "linux")]
152use crate::struct_wrappers::device::PciInfo;
153use crate::struct_wrappers::device::VgpuVersion;
154use crate::struct_wrappers::unit::HwbcEntry;
155
156use crate::bitmasks::InitFlags;
157
158#[cfg(not(target_os = "linux"))]
159const LIB_PATH: &str = "nvml.dll";
160
161#[cfg(target_os = "linux")]
162const LIB_PATH: &str = "libnvidia-ml.so.1";
163
164/// Determines the major version of the CUDA driver given the full version.
165///
166/// Obtain the full version via `Nvml.sys_cuda_driver_version()`.
167pub fn cuda_driver_version_major(version: i32) -> i32 {
168 version / 1000
169}
170
171/// Determines the minor version of the CUDA driver given the full version.
172///
173/// Obtain the full version via `NVML.sys_cuda_driver_version()`.
174pub fn cuda_driver_version_minor(version: i32) -> i32 {
175 (version % 1000) / 10
176}
177
178/**
179The main struct that this library revolves around.
180
181According to NVIDIA's documentation, "It is the user's responsibility to call `nvmlInit()`
182before calling any other methods, and `nvmlShutdown()` once NVML is no longer being used."
183This struct is used to enforce those rules.
184
185Also according to NVIDIA's documentation, "NVML is thread-safe so it is safe to make
186simultaneous NVML calls from multiple threads." In the Rust world, this translates to `NVML`
187being `Send` + `Sync`. You can `.clone()` an `Arc` wrapped `NVML` and enjoy using it on any thread.
188
189NOTE: If you care about possible errors returned from `nvmlShutdown()`, use the `.shutdown()`
190method on this struct. **The `Drop` implementation ignores errors.**
191
192When reading documentation on this struct and its members, remember that a lot of it,
193especially in regards to errors returned, is copied from NVIDIA's docs. While they can be found
194online [here](http://docs.nvidia.com/deploy/nvml-api/index.html), the hosted docs sometimes outdated
195and may not accurately reflect the version of NVML that this library is written for; beware. You
196should ideally read the doc comments on an up-to-date NVML API header. Such a header can be
197downloaded as part of the [CUDA toolkit](https://developer.nvidia.com/cuda-downloads).
198*/
199/// Describes which field ID numbering scheme the loaded NVML driver uses for
200/// IDs 251-273. NVIDIA broke ABI compatibility for these IDs between the
201/// original CUDA 13.0 release and CUDA 13.0 Update 1 (driver >= 580.82).
202///
203/// See <https://docs.nvidia.com/deploy/nvml-api/known-issues.html>
204#[derive(Debug, Clone, Copy, PartialEq, Eq)]
205pub enum FieldIdScheme {
206 /// Used by drivers before 580.82 (CUDA 12.x and original CUDA 13.0).
207 /// IDs 251-255 are CLOCKS_EVENT_REASON/POWER_SYNC, 256-273 are PWR_SMOOTHING.
208 V12,
209 /// Used by drivers >= 580.82 (CUDA 13.0 Update 1+).
210 /// IDs 251-268 are PWR_SMOOTHING, 269-273 are CLOCKS_EVENT_REASON/POWER_SYNC.
211 V13Update1,
212}
213
214pub struct Nvml {
215 lib: ManuallyDrop<NvmlLib>,
216 field_id_scheme: FieldIdScheme,
217}
218
219assert_impl_all!(Nvml: Send, Sync);
220
221impl std::fmt::Debug for Nvml {
222 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
223 f.write_str("NVML")
224 }
225}
226
227/// Parse a driver version string (e.g. "580.82.07") and determine the field ID scheme.
228/// Returns `V13Update1` for driver >= 580.82, `V12` otherwise.
229fn detect_field_id_scheme(driver_version: &str) -> FieldIdScheme {
230 let mut parts = driver_version.split('.');
231 let major: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
232 let minor: u32 = parts.next().and_then(|s| s.parse().ok()).unwrap_or(0);
233
234 if major > 580 || (major == 580 && minor >= 82) {
235 FieldIdScheme::V13Update1
236 } else {
237 FieldIdScheme::V12
238 }
239}
240
241/// Translate a field ID from the canonical v12 numbering to the v13U1 numbering.
242/// Only affects IDs in the 251-273 range. IDs outside this range pass through unchanged.
243pub(crate) fn translate_field_id(scheme: FieldIdScheme, id: u32) -> u32 {
244 if scheme == FieldIdScheme::V12 {
245 return id;
246 }
247 // V13Update1 remapping:
248 // v12 251-255 (CLOCKS_EVENT_REASON/POWER_SYNC) → v13U1 269-273
249 // v12 256-273 (PWR_SMOOTHING) → v13U1 251-268
250 match id {
251 251..=255 => id + 18,
252 256..=273 => id - 5,
253 other => other,
254 }
255}
256
257impl Nvml {
258 /**
259 Handles NVML initialization and must be called before doing anything else.
260
261 While it is possible to initialize `NVML` multiple times (NVIDIA's docs state
262 that reference counting is used internally), you should strive to initialize
263 `NVML` once at the start of your program's execution; the constructors handle
264 dynamically loading function symbols from the `NVML` lib and are therefore
265 somewhat expensive.
266
267 Note that this will initialize NVML but not any GPUs. This means that NVML can
268 communicate with a GPU even when other GPUs in a system are bad or unstable.
269
270 By default, initialization looks for "libnvidia-ml.so" on linux and "nvml.dll"
271 on Windows. These default names should work for default installs on those
272 platforms; if further specification is required, use `Nvml::builder`.
273
274 # Errors
275
276 * `DriverNotLoaded`, if the NVIDIA driver is not running
277 * `NoPermission`, if NVML does not have permission to talk to the driver
278 * `Unknown`, on any unexpected error
279 */
280 // Checked against local
281 #[doc(alias = "nvmlInit_v2")]
282 pub fn init() -> Result<Self, NvmlError> {
283 Self::init_internal(LIB_PATH)
284 }
285
286 fn init_internal(path: impl AsRef<std::ffi::OsStr>) -> Result<Self, NvmlError> {
287 let lib = unsafe {
288 let lib = NvmlLib::new(path)?;
289 let sym = nvml_sym(lib.nvmlInit_v2.as_ref())?;
290
291 nvml_try(sym())?;
292 ManuallyDrop::new(lib)
293 };
294
295 let mut nvml = Self {
296 lib,
297 field_id_scheme: FieldIdScheme::V12,
298 };
299 nvml.field_id_scheme = nvml
300 .sys_driver_version()
301 .map(|v| detect_field_id_scheme(&v))
302 .unwrap_or(FieldIdScheme::V12);
303
304 Ok(nvml)
305 }
306
307 /**
308 An initialization function that allows you to pass flags to control certain behaviors.
309
310 This is the same as `init()` except for the addition of flags.
311
312 # Errors
313
314 * `DriverNotLoaded`, if the NVIDIA driver is not running
315 * `NoPermission`, if NVML does not have permission to talk to the driver
316 * `Unknown`, on any unexpected error
317
318 # Examples
319
320 ```
321 # use nvml_wrapper::Nvml;
322 # use nvml_wrapper::error::*;
323 use nvml_wrapper::bitmasks::InitFlags;
324
325 # fn main() -> Result<(), NvmlError> {
326 // Don't fail if the system doesn't have any NVIDIA GPUs
327 //
328 // Also, don't attach any GPUs during initialization
329 Nvml::init_with_flags(InitFlags::NO_GPUS | InitFlags::NO_ATTACH)?;
330 # Ok(())
331 # }
332 ```
333 */
334 #[doc(alias = "nvmlInitWithFlags")]
335 pub fn init_with_flags(flags: InitFlags) -> Result<Self, NvmlError> {
336 Self::init_with_flags_internal(LIB_PATH, flags)
337 }
338
339 fn init_with_flags_internal(
340 path: impl AsRef<std::ffi::OsStr>,
341 flags: InitFlags,
342 ) -> Result<Self, NvmlError> {
343 let lib = unsafe {
344 let lib = NvmlLib::new(path)?;
345 let sym = nvml_sym(lib.nvmlInitWithFlags.as_ref())?;
346
347 nvml_try(sym(flags.bits()))?;
348 ManuallyDrop::new(lib)
349 };
350
351 let mut nvml = Self {
352 lib,
353 field_id_scheme: FieldIdScheme::V12,
354 };
355 nvml.field_id_scheme = nvml
356 .sys_driver_version()
357 .map(|v| detect_field_id_scheme(&v))
358 .unwrap_or(FieldIdScheme::V12);
359
360 Ok(nvml)
361 }
362
363 /// Create an `NvmlBuilder` for further flexibility in how NVML is initialized.
364 pub fn builder<'a>() -> NvmlBuilder<'a> {
365 NvmlBuilder::default()
366 }
367
368 /// Get the underlying `NvmlLib` instance.
369 pub fn lib(&self) -> &NvmlLib {
370 &self.lib
371 }
372
373 /// Returns the detected field ID numbering scheme for the loaded driver.
374 pub fn field_id_scheme(&self) -> FieldIdScheme {
375 self.field_id_scheme
376 }
377
378 /**
379 Use this to shutdown NVML and release allocated resources if you care about handling
380 potential errors (*the `Drop` implementation ignores errors!*).
381
382 # Errors
383
384 * `Uninitialized`, if the library has not been successfully initialized
385 * `Unknown`, on any unexpected error
386 */
387 // Thanks to `sorear` on IRC for suggesting this approach
388 // Checked against local
389 // Tested
390 #[doc(alias = "nvmlShutdown")]
391 pub fn shutdown(mut self) -> Result<(), NvmlError> {
392 let sym = nvml_sym(self.lib.nvmlShutdown.as_ref())?;
393
394 unsafe {
395 nvml_try(sym())?;
396 }
397
398 // SAFETY: we `mem::forget(self)` after this, so `self.lib` won't get
399 // touched by our `Drop` impl
400 let lib = unsafe { ManuallyDrop::take(&mut self.lib) };
401 mem::forget(self);
402
403 Ok(lib.__library.close()?)
404 }
405
406 /**
407 Get the number of compute devices in the system (compute device == one GPU).
408
409 Note that this count can include devices you do not have permission to access.
410
411 # Errors
412
413 * `Uninitialized`, if the library has not been successfully initialized
414 * `Unknown`, on any unexpected error
415 */
416 // Checked against local
417 // Tested
418 #[doc(alias = "nvmlDeviceGetCount_v2")]
419 pub fn device_count(&self) -> Result<u32, NvmlError> {
420 let sym = nvml_sym(self.lib.nvmlDeviceGetCount_v2.as_ref())?;
421
422 unsafe {
423 let mut count: c_uint = mem::zeroed();
424 nvml_try(sym(&mut count))?;
425
426 Ok(count)
427 }
428 }
429
430 /**
431 Gets the version of the system's graphics driver and returns it as an alphanumeric
432 string.
433
434 # Errors
435
436 * `Uninitialized`, if the library has not been successfully initialized
437 * `Utf8Error`, if the string obtained from the C function is not valid Utf8
438 */
439 // Checked against local
440 // Tested
441 #[doc(alias = "nvmlSystemGetDriverVersion")]
442 pub fn sys_driver_version(&self) -> Result<String, NvmlError> {
443 let sym = nvml_sym(self.lib.nvmlSystemGetDriverVersion.as_ref())?;
444
445 unsafe {
446 let mut version_vec = vec![0; NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE as usize];
447
448 nvml_try(sym(
449 version_vec.as_mut_ptr(),
450 NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE,
451 ))?;
452
453 let version_raw = CStr::from_ptr(version_vec.as_ptr());
454 Ok(version_raw.to_str()?.into())
455 }
456 }
457
458 /**
459 Gets the version of the system's NVML library and returns it as an alphanumeric
460 string.
461
462 # Errors
463
464 * `Utf8Error`, if the string obtained from the C function is not valid Utf8
465 */
466 // Checked against local
467 // Tested
468 #[doc(alias = "nvmlSystemGetNVMLVersion")]
469 pub fn sys_nvml_version(&self) -> Result<String, NvmlError> {
470 let sym = nvml_sym(self.lib.nvmlSystemGetNVMLVersion.as_ref())?;
471
472 unsafe {
473 let mut version_vec = vec![0; NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE as usize];
474
475 nvml_try(sym(
476 version_vec.as_mut_ptr(),
477 NVML_SYSTEM_NVML_VERSION_BUFFER_SIZE,
478 ))?;
479
480 // Thanks to `Amaranth` on IRC for help with this
481 let version_raw = CStr::from_ptr(version_vec.as_ptr());
482 Ok(version_raw.to_str()?.into())
483 }
484 }
485
486 /**
487 Gets the version of the system's CUDA driver.
488
489 Calls into the CUDA library (cuDriverGetVersion()).
490
491 You can use `cuda_driver_version_major` and `cuda_driver_version_minor`
492 to get the major and minor driver versions from this number.
493
494 # Errors
495
496 * `FunctionNotFound`, if cuDriverGetVersion() is not found in the shared library
497 * `LibraryNotFound`, if libcuda.so.1 or libcuda.dll cannot be found
498 */
499 #[doc(alias = "nvmlSystemGetCudaDriverVersion_v2")]
500 pub fn sys_cuda_driver_version(&self) -> Result<i32, NvmlError> {
501 let sym = nvml_sym(self.lib.nvmlSystemGetCudaDriverVersion_v2.as_ref())?;
502
503 unsafe {
504 let mut version: c_int = mem::zeroed();
505 nvml_try(sym(&mut version))?;
506
507 Ok(version)
508 }
509 }
510
511 /**
512 Gets the name of the process for the given process ID, cropped to the provided length.
513
514 # Errors
515
516 * `Uninitialized`, if the library has not been successfully initialized
517 * `InvalidArg`, if the length is 0 (if this is returned without length being 0, file an issue)
518 * `NotFound`, if the process does not exist
519 * `NoPermission`, if the user doesn't have permission to perform the operation
520 * `Utf8Error`, if the string obtained from the C function is not valid UTF-8. NVIDIA's docs say
521 that the string encoding is ANSI, so this may very well happen.
522 * `Unknown`, on any unexpected error
523 */
524 // TODO: The docs say the string is ANSI-encoded. Not sure if I should try
525 // to do anything about that
526 // Checked against local
527 // Tested
528 #[doc(alias = "nvmlSystemGetProcessName")]
529 pub fn sys_process_name(&self, pid: u32, length: usize) -> Result<String, NvmlError> {
530 let sym = nvml_sym(self.lib.nvmlSystemGetProcessName.as_ref())?;
531
532 unsafe {
533 let mut name_vec = vec![0; length];
534
535 nvml_try(sym(pid, name_vec.as_mut_ptr(), length as c_uint))?;
536
537 let name_raw = CStr::from_ptr(name_vec.as_ptr());
538 Ok(name_raw.to_str()?.into())
539 }
540 }
541
542 /**
543 Acquire the handle for a particular device based on its index (starts at 0).
544
545 Usage of this function causes NVML to initialize the target GPU. Additional
546 GPUs may be initialized if the target GPU is an SLI slave.
547
548 You can determine valid indices by using `.device_count()`. This
549 function doesn't call that for you, but the actual C function to get
550 the device handle will return an error in the case of an invalid index.
551 This means that the `InvalidArg` error will be returned if you pass in
552 an invalid index.
553
554 NVIDIA's docs state that "The order in which NVML enumerates devices has
555 no guarantees of consistency between reboots. For that reason it is recommended
556 that devices be looked up by their PCI ids or UUID." In this library, that translates
557 into usage of `.device_by_uuid()` and `.device_by_pci_bus_id()`.
558
559 The NVML index may not correlate with other APIs such as the CUDA device index.
560
561 # Errors
562
563 * `Uninitialized`, if the library has not been successfully initialized
564 * `InvalidArg`, if index is invalid
565 * `InsufficientPower`, if any attached devices have improperly attached external power cables
566 * `NoPermission`, if the user doesn't have permission to talk to this device
567 * `IrqIssue`, if the NVIDIA kernel detected an interrupt issue with the attached GPUs
568 * `GpuLost`, if the target GPU has fallen off the bus or is otherwise inaccessible
569 * `Unknown`, on any unexpected error
570 */
571 // Checked against local
572 // Tested
573 #[doc(alias = "nvmlDeviceGetHandleByIndex_v2")]
574 pub fn device_by_index(&self, index: u32) -> Result<Device<'_>, NvmlError> {
575 let sym = nvml_sym(self.lib.nvmlDeviceGetHandleByIndex_v2.as_ref())?;
576
577 unsafe {
578 let mut device: nvmlDevice_t = mem::zeroed();
579 nvml_try(sym(index, &mut device))?;
580
581 Ok(Device::new(device, self))
582 }
583 }
584
585 /**
586 Acquire the handle for a particular device based on its PCI bus ID.
587
588 Usage of this function causes NVML to initialize the target GPU. Additional
589 GPUs may be initialized if the target GPU is an SLI slave.
590
591 The bus ID corresponds to the `bus_id` returned by `Device.pci_info()`.
592
593 # Errors
594
595 * `Uninitialized`, if the library has not been successfully initialized
596 * `InvalidArg`, if `pci_bus_id` is invalid
597 * `NotFound`, if `pci_bus_id` does not match a valid device on the system
598 * `InsufficientPower`, if any attached devices have improperly attached external power cables
599 * `NoPermission`, if the user doesn't have permission to talk to this device
600 * `IrqIssue`, if the NVIDIA kernel detected an interrupt issue with the attached GPUs
601 * `GpuLost`, if the target GPU has fallen off the bus or is otherwise inaccessible
602 * `NulError`, for which you can read the docs on `std::ffi::NulError`
603 * `Unknown`, on any unexpected error
604 */
605 // Checked against local
606 // Tested
607 #[doc(alias = "nvmlDeviceGetHandleByPciBusId_v2")]
608 pub fn device_by_pci_bus_id<S: AsRef<str>>(
609 &self,
610 pci_bus_id: S,
611 ) -> Result<Device<'_>, NvmlError>
612 where
613 Vec<u8>: From<S>,
614 {
615 let sym = nvml_sym(self.lib.nvmlDeviceGetHandleByPciBusId_v2.as_ref())?;
616
617 unsafe {
618 let c_string = CString::new(pci_bus_id)?;
619 let mut device: nvmlDevice_t = mem::zeroed();
620
621 nvml_try(sym(c_string.as_ptr(), &mut device))?;
622
623 Ok(Device::new(device, self))
624 }
625 }
626
627 /// Not documenting this because it's deprecated and does not seem to work
628 /// anymore.
629 // Tested (for an error)
630 #[deprecated(note = "use `.device_by_uuid()`, this errors on dual GPU boards")]
631 #[doc(alias = "nvmlDeviceGetHandleBySerial")]
632 pub fn device_by_serial<S: AsRef<str>>(&self, board_serial: S) -> Result<Device<'_>, NvmlError>
633 where
634 Vec<u8>: From<S>,
635 {
636 let sym = nvml_sym(self.lib.nvmlDeviceGetHandleBySerial.as_ref())?;
637
638 unsafe {
639 let c_string = CString::new(board_serial)?;
640 let mut device: nvmlDevice_t = mem::zeroed();
641
642 nvml_try(sym(c_string.as_ptr(), &mut device))?;
643
644 Ok(Device::new(device, self))
645 }
646 }
647
648 /**
649 Acquire the handle for a particular device based on its globally unique immutable
650 UUID.
651
652 Usage of this function causes NVML to initialize the target GPU. Additional
653 GPUs may be initialized as the function called within searches for the target GPU.
654
655 # Errors
656
657 * `Uninitialized`, if the library has not been successfully initialized
658 * `InvalidArg`, if `uuid` is invalid
659 * `NotFound`, if `uuid` does not match a valid device on the system
660 * `InsufficientPower`, if any attached devices have improperly attached external power cables
661 * `IrqIssue`, if the NVIDIA kernel detected an interrupt issue with the attached GPUs
662 * `GpuLost`, if the target GPU has fallen off the bus or is otherwise inaccessible
663 * `NulError`, for which you can read the docs on `std::ffi::NulError`
664 * `Unknown`, on any unexpected error
665
666 NVIDIA doesn't mention `NoPermission` for this one. Strange!
667 */
668 // Checked against local
669 // Tested
670 #[doc(alias = "nvmlDeviceGetHandleByUUID")]
671 pub fn device_by_uuid<S: AsRef<str>>(&self, uuid: S) -> Result<Device<'_>, NvmlError>
672 where
673 Vec<u8>: From<S>,
674 {
675 let sym = nvml_sym(self.lib.nvmlDeviceGetHandleByUUID.as_ref())?;
676
677 unsafe {
678 let c_string = CString::new(uuid)?;
679 let mut device: nvmlDevice_t = mem::zeroed();
680
681 nvml_try(sym(c_string.as_ptr(), &mut device))?;
682
683 Ok(Device::new(device, self))
684 }
685 }
686
687 /**
688 Gets the common ancestor for two devices.
689
690 Note: this is the same as `Device.topology_common_ancestor()`.
691
692 # Errors
693
694 * `InvalidArg`, if the device is invalid
695 * `NotSupported`, if this `Device` or the OS does not support this feature
696 * `UnexpectedVariant`, for which you can read the docs for
697 * `Unknown`, on any unexpected error
698
699 # Platform Support
700
701 Only supports Linux.
702 */
703 // Checked against local
704 // Tested
705 #[cfg(target_os = "linux")]
706 #[doc(alias = "nvmlDeviceGetTopologyCommonAncestor")]
707 pub fn topology_common_ancestor(
708 &self,
709 device1: &Device,
710 device2: &Device,
711 ) -> Result<TopologyLevel, NvmlError> {
712 let sym = nvml_sym(self.lib.nvmlDeviceGetTopologyCommonAncestor.as_ref())?;
713
714 unsafe {
715 let mut level: nvmlGpuTopologyLevel_t = mem::zeroed();
716
717 nvml_try(sym(device1.handle(), device2.handle(), &mut level))?;
718
719 TopologyLevel::try_from(level)
720 }
721 }
722
723 /**
724 Acquire the handle for a particular `Unit` based on its index.
725
726 Valid indices are derived from the count returned by `.unit_count()`.
727 For example, if `unit_count` is 2 the valid indices are 0 and 1, corresponding
728 to UNIT 0 and UNIT 1.
729
730 Note that the order in which NVML enumerates units has no guarantees of
731 consistency between reboots.
732
733 # Errors
734
735 * `Uninitialized`, if the library has not been successfully initialized
736 * `InvalidArg`, if `index` is invalid
737 * `Unknown`, on any unexpected error
738
739 # Device Support
740
741 For S-class products.
742 */
743 // Checked against local
744 // Tested (for an error)
745 #[doc(alias = "nvmlUnitGetHandleByIndex")]
746 pub fn unit_by_index(&self, index: u32) -> Result<Unit<'_>, NvmlError> {
747 let sym = nvml_sym(self.lib.nvmlUnitGetHandleByIndex.as_ref())?;
748
749 unsafe {
750 let mut unit: nvmlUnit_t = mem::zeroed();
751 nvml_try(sym(index as c_uint, &mut unit))?;
752
753 Ok(Unit::new(unit, self))
754 }
755 }
756
757 /**
758 Checks if the passed-in `Device`s are on the same physical board.
759
760 Note: this is the same as `Device.is_on_same_board_as()`.
761
762 # Errors
763
764 * `Uninitialized`, if the library has not been successfully initialized
765 * `InvalidArg`, if either `Device` is invalid
766 * `NotSupported`, if this check is not supported by this `Device`
767 * `GpuLost`, if this `Device` has fallen off the bus or is otherwise inaccessible
768 * `Unknown`, on any unexpected error
769 */
770 // Checked against local
771 // Tested
772 #[doc(alias = "nvmlDeviceOnSameBoard")]
773 pub fn are_devices_on_same_board(
774 &self,
775 device1: &Device,
776 device2: &Device,
777 ) -> Result<bool, NvmlError> {
778 let sym = nvml_sym(self.lib.nvmlDeviceOnSameBoard.as_ref())?;
779
780 unsafe {
781 let mut bool_int: c_int = mem::zeroed();
782
783 nvml_try(sym(device1.handle(), device2.handle(), &mut bool_int))?;
784
785 match bool_int {
786 0 => Ok(false),
787 _ => Ok(true),
788 }
789 }
790 }
791
792 /**
793 Gets the set of GPUs that have a CPU affinity with the given CPU number.
794
795 # Errors
796
797 * `InvalidArg`, if `cpu_number` is invalid
798 * `NotSupported`, if this `Device` or the OS does not support this feature
799 * `Unknown`, an error has occurred in the underlying topology discovery
800
801 # Platform Support
802
803 Only supports Linux.
804 */
805 // Tested
806 #[cfg(target_os = "linux")]
807 #[doc(alias = "nvmlSystemGetTopologyGpuSet")]
808 pub fn topology_gpu_set(&self, cpu_number: u32) -> Result<Vec<Device<'_>>, NvmlError> {
809 let sym = nvml_sym(self.lib.nvmlSystemGetTopologyGpuSet.as_ref())?;
810
811 unsafe {
812 let mut count = match self.topology_gpu_set_count(cpu_number)? {
813 0 => return Ok(vec![]),
814 value => value,
815 };
816 let mut devices: Vec<nvmlDevice_t> = vec![mem::zeroed(); count as usize];
817
818 nvml_try(sym(cpu_number, &mut count, devices.as_mut_ptr()))?;
819
820 Ok(devices.into_iter().map(|d| Device::new(d, self)).collect())
821 }
822 }
823
824 // Helper function for the above.
825 #[cfg(target_os = "linux")]
826 fn topology_gpu_set_count(&self, cpu_number: u32) -> Result<c_uint, NvmlError> {
827 let sym = nvml_sym(self.lib.nvmlSystemGetTopologyGpuSet.as_ref())?;
828
829 unsafe {
830 // Indicates that we want the count
831 let mut count: c_uint = 0;
832
833 // Passing null doesn't indicate that we want the count, just allowed
834 nvml_try(sym(cpu_number, &mut count, ptr::null_mut()))?;
835
836 Ok(count)
837 }
838 }
839
840 /**
841 Gets the IDs and firmware versions for any Host Interface Cards in the system.
842
843 # Errors
844
845 * `Uninitialized`, if the library has not been successfully initialized
846
847 # Device Support
848
849 Supports S-class products.
850 */
851 // Checked against local
852 // Tested
853 #[doc(alias = "nvmlSystemGetHicVersion")]
854 pub fn hic_versions(&self) -> Result<Vec<HwbcEntry>, NvmlError> {
855 let sym = nvml_sym(self.lib.nvmlSystemGetHicVersion.as_ref())?;
856
857 unsafe {
858 let mut count: c_uint = match self.hic_count()? {
859 0 => return Ok(vec![]),
860 value => value,
861 };
862 let mut hics: Vec<nvmlHwbcEntry_t> = vec![mem::zeroed(); count as usize];
863
864 nvml_try(sym(&mut count, hics.as_mut_ptr()))?;
865
866 hics.into_iter().map(HwbcEntry::try_from).collect()
867 }
868 }
869
870 /**
871 Gets the count of Host Interface Cards in the system.
872
873 # Errors
874
875 * `Uninitialized`, if the library has not been successfully initialized
876
877 # Device Support
878
879 Supports S-class products.
880 */
881 // Tested as part of the above method
882 #[doc(alias = "nvmlSystemGetHicVersion")]
883 pub fn hic_count(&self) -> Result<u32, NvmlError> {
884 let sym = nvml_sym(self.lib.nvmlSystemGetHicVersion.as_ref())?;
885
886 unsafe {
887 /*
888 NVIDIA doesn't even say that `count` will be set to the count if
889 `InsufficientSize` is returned. But we can assume sanity, right?
890
891 The idea here is:
892 If there are 0 HICs, NVML_SUCCESS is returned, `count` is set
893 to 0. We return count, all good.
894 If there is 1 HIC, NVML_SUCCESS is returned, `count` is set to
895 1. We return count, all good.
896 If there are >= 2 HICs, NVML_INSUFFICIENT_SIZE is returned.
897 `count` is theoretically set to the actual count, and we
898 return it.
899 */
900 let mut count: c_uint = 1;
901 let mut hics: [nvmlHwbcEntry_t; 1] = [mem::zeroed()];
902
903 match sym(&mut count, hics.as_mut_ptr()) {
904 nvmlReturn_enum_NVML_SUCCESS | nvmlReturn_enum_NVML_ERROR_INSUFFICIENT_SIZE => {
905 Ok(count)
906 }
907 // We know that this will be an error
908 other => nvml_try(other).map(|_| 0),
909 }
910 }
911 }
912
913 /**
914 Gets the number of units in the system.
915
916 # Errors
917
918 * `Uninitialized`, if the library has not been successfully initialized
919 * `Unknown`, on any unexpected error
920
921 # Device Support
922
923 Supports S-class products.
924 */
925 // Checked against local
926 // Tested
927 #[doc(alias = "nvmlUnitGetCount")]
928 pub fn unit_count(&self) -> Result<u32, NvmlError> {
929 let sym = nvml_sym(self.lib.nvmlUnitGetCount.as_ref())?;
930
931 unsafe {
932 let mut count: c_uint = mem::zeroed();
933 nvml_try(sym(&mut count))?;
934
935 Ok(count)
936 }
937 }
938
939 /**
940 Create an empty set of events.
941
942 # Errors
943
944 * `Uninitialized`, if the library has not been successfully initialized
945 * `Unknown`, on any unexpected error
946
947 # Device Support
948
949 Supports Fermi and newer fully supported devices.
950 */
951 // Checked against local
952 // Tested
953 #[doc(alias = "nvmlEventSetCreate")]
954 pub fn create_event_set(&self) -> Result<EventSet<'_>, NvmlError> {
955 let sym = nvml_sym(self.lib.nvmlEventSetCreate.as_ref())?;
956
957 unsafe {
958 let mut set: nvmlEventSet_t = mem::zeroed();
959 nvml_try(sym(&mut set))?;
960
961 Ok(EventSet::new(set, self))
962 }
963 }
964
965 /**
966 Request the OS and the NVIDIA kernel driver to rediscover a portion of the PCI
967 subsystem in search of GPUs that were previously removed.
968
969 The portion of the PCI tree can be narrowed by specifying a domain, bus, and
970 device in the passed-in `pci_info`. **If all of these fields are zeroes, the
971 entire PCI tree will be searched.** Note that for long-running NVML processes,
972 the enumeration of devices will change based on how many GPUs are discovered
973 and where they are inserted in bus order.
974
975 All newly discovered GPUs will be initialized and have their ECC scrubbed which
976 may take several seconds per GPU. **All device handles are no longer guaranteed
977 to be valid post discovery**. I am not sure if this means **all** device
978 handles, literally, or if NVIDIA is referring to handles that had previously
979 been obtained to devices that were then removed and have now been
980 re-discovered.
981
982 Must be run as administrator.
983
984 # Errors
985
986 * `Uninitialized`, if the library has not been successfully initialized
987 * `OperatingSystem`, if the operating system is denying this feature
988 * `NoPermission`, if the calling process has insufficient permissions to
989 perform this operation
990 * `NulError`, if an issue is encountered when trying to convert a Rust
991 `String` into a `CString`.
992 * `Unknown`, on any unexpected error
993
994 # Device Support
995
996 Supports Pascal and newer fully supported devices.
997
998 Some Kepler devices are also supported (that's all NVIDIA says, no specifics).
999
1000 # Platform Support
1001
1002 Only supports Linux.
1003 */
1004 // TODO: constructor for default pci_infos ^
1005 // Checked against local
1006 // Tested
1007 #[cfg(target_os = "linux")]
1008 #[doc(alias = "nvmlDeviceDiscoverGpus")]
1009 pub fn discover_gpus(&self, pci_info: PciInfo) -> Result<(), NvmlError> {
1010 let sym = nvml_sym(self.lib.nvmlDeviceDiscoverGpus.as_ref())?;
1011
1012 unsafe { nvml_try(sym(&mut pci_info.try_into()?)) }
1013 }
1014
1015 /**
1016 Gets the number of excluded GPU devices in the system.
1017
1018 # Device Support
1019
1020 Supports all devices.
1021 */
1022 #[doc(alias = "nvmlGetExcludedDeviceCount")]
1023 pub fn excluded_device_count(&self) -> Result<u32, NvmlError> {
1024 let sym = nvml_sym(self.lib.nvmlGetExcludedDeviceCount.as_ref())?;
1025
1026 unsafe {
1027 let mut count: c_uint = mem::zeroed();
1028
1029 nvml_try(sym(&mut count))?;
1030 Ok(count)
1031 }
1032 }
1033
1034 /**
1035 Gets information for the specified excluded device.
1036
1037 # Errors
1038
1039 * `InvalidArg`, if the given index is invalid
1040 * `Utf8Error`, if strings obtained from the C function are not valid Utf8
1041
1042 # Device Support
1043
1044 Supports all devices.
1045 */
1046 #[doc(alias = "nvmlGetExcludedDeviceInfoByIndex")]
1047 pub fn excluded_device_info(&self, index: u32) -> Result<ExcludedDeviceInfo, NvmlError> {
1048 let sym = nvml_sym(self.lib.nvmlGetExcludedDeviceInfoByIndex.as_ref())?;
1049
1050 unsafe {
1051 let mut info: nvmlExcludedDeviceInfo_t = mem::zeroed();
1052
1053 nvml_try(sym(index, &mut info))?;
1054 ExcludedDeviceInfo::try_from(info)
1055 }
1056 }
1057
1058 /**
1059 Gets the loaded vGPU list of capabilities
1060
1061 # Errors
1062
1063 * `Uninitialized`, if the library has not been successfully initialized
1064 * `Unknown`, on any unexpected error
1065
1066 # Device Support
1067
1068 Supports all devices.
1069 */
1070 #[doc(alias = "nvmlGetVgpuDriverCapabilities")]
1071 pub fn vgpu_driver_capabilities(
1072 &self,
1073 capability: nvmlVgpuDriverCapability_t,
1074 ) -> Result<u32, NvmlError> {
1075 let sym = nvml_sym(self.lib.nvmlGetVgpuDriverCapabilities.as_ref())?;
1076
1077 unsafe {
1078 let mut mask: u32 = mem::zeroed();
1079
1080 nvml_try(sym(capability, &mut mask))?;
1081 Ok(mask)
1082 }
1083 }
1084
1085 /**
1086 Get the supported and actual vGPU versions range.
1087
1088 # Errors
1089
1090 * `Uninitialized`, if the library has not been successfully initialized
1091 * `Unknown`, on any unexpected error
1092
1093 # Device Support
1094 */
1095 #[doc(alias = "nvmlGetVgpuVersion")]
1096 pub fn vgpu_version(&self) -> Result<(VgpuVersion, VgpuVersion), NvmlError> {
1097 let sym = nvml_sym(self.lib.nvmlGetVgpuVersion.as_ref())?;
1098
1099 unsafe {
1100 let mut supported: nvmlVgpuVersion_t = mem::zeroed();
1101 let mut current: nvmlVgpuVersion_t = mem::zeroed();
1102
1103 nvml_try(sym(&mut supported, &mut current))?;
1104 Ok((VgpuVersion::from(supported), VgpuVersion::from(current)))
1105 }
1106 }
1107
1108 #[doc(alias = "nvmlSetVgpuVersion")]
1109 pub fn set_vgpu_version(&self, version: VgpuVersion) -> Result<(), NvmlError> {
1110 let sym = nvml_sym(self.lib.nvmlSetVgpuVersion.as_ref())?;
1111
1112 unsafe { nvml_try(sym(&mut version.as_c())) }
1113 }
1114}
1115
1116/// This `Drop` implementation ignores errors! Use the `.shutdown()` method on
1117/// the `Nvml` struct
1118/// if you care about handling them.
1119impl Drop for Nvml {
1120 #[doc(alias = "nvmlShutdown")]
1121 fn drop(&mut self) {
1122 unsafe {
1123 self.lib.nvmlShutdown();
1124
1125 // SAFETY: called after the last usage of `self.lib`
1126 ManuallyDrop::drop(&mut self.lib);
1127 }
1128 }
1129}
1130
1131/**
1132A builder struct that provides further flexibility in how NVML is initialized.
1133
1134# Examples
1135
1136Initialize NVML with a non-default name for the shared object file:
1137
1138```
1139use nvml_wrapper::Nvml;
1140use std::ffi::OsStr;
1141
1142let init_result = Nvml::builder().lib_path(OsStr::new("libnvidia-ml-other-name.so")).init();
1143```
1144
1145Initialize NVML with a non-default path to the shared object file:
1146
1147```
1148use nvml_wrapper::Nvml;
1149use std::ffi::OsStr;
1150
1151let init_result = Nvml::builder().lib_path(OsStr::new("/some/path/to/libnvidia-ml.so")).init();
1152```
1153*/
1154#[derive(Debug, Clone, Eq, PartialEq, Default)]
1155pub struct NvmlBuilder<'a> {
1156 lib_path: Option<&'a OsStr>,
1157 flags: InitFlags,
1158}
1159
1160impl<'a> NvmlBuilder<'a> {
1161 /**
1162 Set the path to the NVML lib file.
1163
1164 See [`libloading`'s docs][libloading] for details about how this lib path is
1165 handled.
1166
1167 [libloading]: https://docs.rs/libloading/0.6.6/libloading/struct.Library.html#method.new
1168 */
1169 pub fn lib_path(&mut self, path: &'a OsStr) -> &mut Self {
1170 self.lib_path = Some(path);
1171 self
1172 }
1173
1174 /// Set the `InitFlags` to initialize NVML with.
1175 pub fn flags(&mut self, flags: InitFlags) -> &mut Self {
1176 self.flags = flags;
1177 self
1178 }
1179
1180 /// Perform initialization.
1181 pub fn init(&self) -> Result<Nvml, NvmlError> {
1182 let lib_path = self.lib_path.unwrap_or_else(|| LIB_PATH.as_ref());
1183
1184 if self.flags.is_empty() {
1185 Nvml::init_internal(lib_path)
1186 } else {
1187 Nvml::init_with_flags_internal(lib_path, self.flags)
1188 }
1189 }
1190}
1191
1192#[cfg(test)]
1193mod test {
1194 use super::*;
1195 use crate::bitmasks::InitFlags;
1196 use crate::error::NvmlError;
1197 use crate::test_utils::*;
1198
1199 #[test]
1200 fn init_with_flags() {
1201 Nvml::init_with_flags(InitFlags::NO_GPUS).unwrap();
1202 }
1203
1204 #[test]
1205 fn shutdown() {
1206 test(3, || nvml().shutdown())
1207 }
1208
1209 #[test]
1210 fn device_count() {
1211 test(3, || nvml().device_count())
1212 }
1213
1214 #[test]
1215 fn sys_driver_version() {
1216 test(3, || nvml().sys_driver_version())
1217 }
1218
1219 #[test]
1220 fn sys_nvml_version() {
1221 test(3, || nvml().sys_nvml_version())
1222 }
1223
1224 #[test]
1225 fn sys_cuda_driver_version() {
1226 test(3, || nvml().sys_cuda_driver_version())
1227 }
1228
1229 #[test]
1230 fn sys_cuda_driver_version_major() {
1231 test(3, || {
1232 Ok(cuda_driver_version_major(nvml().sys_cuda_driver_version()?))
1233 })
1234 }
1235
1236 #[test]
1237 fn sys_cuda_driver_version_minor() {
1238 test(3, || {
1239 Ok(cuda_driver_version_minor(nvml().sys_cuda_driver_version()?))
1240 })
1241 }
1242
1243 #[test]
1244 fn sys_process_name() {
1245 let nvml = nvml();
1246 test_with_device(3, &nvml, |device| {
1247 let processes = device.running_graphics_processes()?;
1248 match nvml.sys_process_name(processes[0].pid, 64) {
1249 Err(NvmlError::NoPermission) => Ok("No permission error".into()),
1250 v => v,
1251 }
1252 })
1253 }
1254
1255 #[test]
1256 fn device_by_index() {
1257 let nvml = nvml();
1258 test(3, || nvml.device_by_index(0))
1259 }
1260
1261 #[test]
1262 fn device_by_pci_bus_id() {
1263 let nvml = nvml();
1264 test_with_device(3, &nvml, |device| {
1265 let id = device.pci_info()?.bus_id;
1266 nvml.device_by_pci_bus_id(id)
1267 })
1268 }
1269
1270 // Can't get serial on my machine
1271 #[ignore = "my machine does not support this call"]
1272 #[test]
1273 fn device_by_serial() {
1274 let nvml = nvml();
1275
1276 #[allow(deprecated)]
1277 test_with_device(3, &nvml, |device| {
1278 let serial = device.serial()?;
1279 nvml.device_by_serial(serial)
1280 })
1281 }
1282
1283 #[test]
1284 fn device_by_uuid() {
1285 let nvml = nvml();
1286 test_with_device(3, &nvml, |device| {
1287 let uuid = device.uuid()?;
1288 nvml.device_by_uuid(uuid)
1289 })
1290 }
1291
1292 // I don't have 2 devices
1293 #[ignore = "my machine does not support this call"]
1294 #[cfg(target_os = "linux")]
1295 #[test]
1296 fn topology_common_ancestor() {
1297 let nvml = nvml();
1298 let device1 = device(&nvml);
1299 let device2 = nvml.device_by_index(1).expect("device");
1300
1301 nvml.topology_common_ancestor(&device1, &device2)
1302 .expect("TopologyLevel");
1303 }
1304
1305 // Errors on my machine
1306
1307 #[test]
1308 #[ignore = "my machine does not support this call"]
1309 fn unit_by_index() {
1310 let nvml = nvml();
1311 test(3, || nvml.unit_by_index(0))
1312 }
1313
1314 // I don't have 2 devices
1315 #[ignore = "my machine does not support this call"]
1316 #[test]
1317 fn are_devices_on_same_board() {
1318 let nvml = nvml();
1319 let device1 = device(&nvml);
1320 let device2 = nvml.device_by_index(1).expect("device");
1321
1322 nvml.are_devices_on_same_board(&device1, &device2)
1323 .expect("bool");
1324 }
1325
1326 #[cfg(target_os = "linux")]
1327 #[test]
1328 fn topology_gpu_set() {
1329 let nvml = nvml();
1330 test(3, || nvml.topology_gpu_set(0))
1331 }
1332
1333 #[test]
1334 fn hic_version() {
1335 let nvml = nvml();
1336 test(3, || nvml.hic_versions())
1337 }
1338
1339 #[test]
1340 fn unit_count() {
1341 test(3, || nvml().unit_count())
1342 }
1343
1344 #[test]
1345 fn create_event_set() {
1346 let nvml = nvml();
1347 test(3, || nvml.create_event_set())
1348 }
1349
1350 #[cfg(target_os = "linux")]
1351 #[should_panic(expected = "OperatingSystem")]
1352 #[test]
1353 fn discover_gpus() {
1354 let nvml = nvml();
1355 test_with_device(3, &nvml, |device| {
1356 let pci_info = device.pci_info()?;
1357
1358 // We don't test with admin perms and therefore expect an error
1359 match nvml.discover_gpus(pci_info) {
1360 Err(NvmlError::NoPermission) => panic!("NoPermission"),
1361 other => other,
1362 }
1363 })
1364 }
1365
1366 #[test]
1367 fn excluded_device_count() {
1368 let nvml = nvml();
1369 test(3, || nvml.excluded_device_count())
1370 }
1371
1372 #[test]
1373 fn excluded_device_info() {
1374 let nvml = nvml();
1375
1376 if nvml.excluded_device_count().unwrap() > 0 {
1377 test(3, || nvml.excluded_device_info(0))
1378 }
1379 }
1380
1381 #[test]
1382 fn vgpu_driver_capabilities() {
1383 let nvml = nvml();
1384 test(3, || nvml.vgpu_driver_capabilities(1))
1385 }
1386
1387 #[test]
1388 fn vgpu_version() {
1389 let nvml = nvml();
1390 test(3, || nvml.vgpu_version())
1391 }
1392
1393 #[test]
1394 fn set_vgpu_version() {
1395 let nvml = nvml();
1396 test(3, || nvml.set_vgpu_version(VgpuVersion { min: 0, max: 0 }))
1397 }
1398
1399 #[test]
1400 fn detect_field_id_scheme_v12_drivers() {
1401 assert_eq!(detect_field_id_scheme("575.51.03"), FieldIdScheme::V12);
1402 assert_eq!(detect_field_id_scheme("570.86.16"), FieldIdScheme::V12);
1403 assert_eq!(detect_field_id_scheme("580.65.06"), FieldIdScheme::V12);
1404 assert_eq!(detect_field_id_scheme("580.0.0"), FieldIdScheme::V12);
1405 }
1406
1407 #[test]
1408 fn detect_field_id_scheme_v13u1_drivers() {
1409 assert_eq!(
1410 detect_field_id_scheme("580.82.07"),
1411 FieldIdScheme::V13Update1
1412 );
1413 assert_eq!(
1414 detect_field_id_scheme("580.95.05"),
1415 FieldIdScheme::V13Update1
1416 );
1417 assert_eq!(
1418 detect_field_id_scheme("580.126.09"),
1419 FieldIdScheme::V13Update1
1420 );
1421 assert_eq!(detect_field_id_scheme("581.0.0"), FieldIdScheme::V13Update1);
1422 assert_eq!(detect_field_id_scheme("600.0.0"), FieldIdScheme::V13Update1);
1423 }
1424
1425 #[test]
1426 fn detect_field_id_scheme_malformed() {
1427 assert_eq!(detect_field_id_scheme(""), FieldIdScheme::V12);
1428 assert_eq!(detect_field_id_scheme("garbage"), FieldIdScheme::V12);
1429 }
1430
1431 #[test]
1432 fn translate_field_id_v12_is_noop() {
1433 for id in 0..300 {
1434 assert_eq!(translate_field_id(FieldIdScheme::V12, id), id);
1435 }
1436 }
1437
1438 #[test]
1439 fn translate_field_id_v13u1_remaps_affected_range() {
1440 use crate::ffi::bindings::field_id::*;
1441
1442 // Exhaustive check: CLOCKS_EVENT_REASON/POWER_SYNC (v12: 251-255) → v13U1: 269-273
1443 let v12_clocks_event = [
1444 (NVML_FI_DEV_CLOCKS_EVENT_REASON_SW_THERM_SLOWDOWN, 269),
1445 (NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_THERM_SLOWDOWN, 270),
1446 (NVML_FI_DEV_CLOCKS_EVENT_REASON_HW_POWER_BRAKE_SLOWDOWN, 271),
1447 (NVML_FI_DEV_POWER_SYNC_BALANCING_FREQ, 272),
1448 (NVML_FI_DEV_POWER_SYNC_BALANCING_AF, 273),
1449 ];
1450 for (v12_id, expected_v13u1) in v12_clocks_event {
1451 assert_eq!(
1452 translate_field_id(FieldIdScheme::V13Update1, v12_id),
1453 expected_v13u1,
1454 "v12 ID {v12_id} should map to v13U1 ID {expected_v13u1}"
1455 );
1456 }
1457
1458 // Exhaustive check: PWR_SMOOTHING (v12: 256-273) → v13U1: 251-268
1459 let v12_pwr_smoothing = [
1460 (NVML_FI_PWR_SMOOTHING_ENABLED, 251),
1461 (NVML_FI_PWR_SMOOTHING_PRIV_LVL, 252),
1462 (NVML_FI_PWR_SMOOTHING_IMM_RAMP_DOWN_ENABLED, 253),
1463 (NVML_FI_PWR_SMOOTHING_APPLIED_TMP_CEIL, 254),
1464 (NVML_FI_PWR_SMOOTHING_APPLIED_TMP_FLOOR, 255),
1465 (NVML_FI_PWR_SMOOTHING_MAX_PERCENT_TMP_FLOOR_SETTING, 256),
1466 (NVML_FI_PWR_SMOOTHING_MIN_PERCENT_TMP_FLOOR_SETTING, 257),
1467 (
1468 NVML_FI_PWR_SMOOTHING_HW_CIRCUITRY_PERCENT_LIFETIME_REMAINING,
1469 258,
1470 ),
1471 (NVML_FI_PWR_SMOOTHING_MAX_NUM_PRESET_PROFILES, 259),
1472 (NVML_FI_PWR_SMOOTHING_PROFILE_PERCENT_TMP_FLOOR, 260),
1473 (NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_UP_RATE, 261),
1474 (NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_RATE, 262),
1475 (NVML_FI_PWR_SMOOTHING_PROFILE_RAMP_DOWN_HYST_VAL, 263),
1476 (NVML_FI_PWR_SMOOTHING_ACTIVE_PRESET_PROFILE, 264),
1477 (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_PERCENT_TMP_FLOOR, 265),
1478 (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_UP_RATE, 266),
1479 (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_RATE, 267),
1480 (NVML_FI_PWR_SMOOTHING_ADMIN_OVERRIDE_RAMP_DOWN_HYST_VAL, 268),
1481 ];
1482 for (v12_id, expected_v13u1) in v12_pwr_smoothing {
1483 assert_eq!(
1484 translate_field_id(FieldIdScheme::V13Update1, v12_id),
1485 expected_v13u1,
1486 "v12 ID {v12_id} should map to v13U1 ID {expected_v13u1}"
1487 );
1488 }
1489
1490 // Verify the mapping is bijective (no collisions) over the full 251-273 range
1491 let mut mapped: Vec<u32> = (251..=273)
1492 .map(|id| translate_field_id(FieldIdScheme::V13Update1, id))
1493 .collect();
1494 mapped.sort();
1495 let expected: Vec<u32> = (251..=273).collect();
1496 assert_eq!(
1497 mapped, expected,
1498 "remapping must be a bijection over 251-273"
1499 );
1500 }
1501
1502 #[test]
1503 fn translate_field_id_v13u1_passthrough_outside_range() {
1504 assert_eq!(translate_field_id(FieldIdScheme::V13Update1, 0), 0);
1505 assert_eq!(translate_field_id(FieldIdScheme::V13Update1, 250), 250);
1506 assert_eq!(translate_field_id(FieldIdScheme::V13Update1, 274), 274);
1507 }
1508}