singe_nvml/device.rs
1#![allow(deprecated)]
2
3use std::{
4 mem::{self, MaybeUninit},
5 ptr,
6};
7
8use singe_core::string_from_c_chars;
9use singe_nvml_sys as sys;
10
11use crate::{
12 error::{Error, Result, Status},
13 gpu_instance::{GpuInstance, OwnedGpuInstance},
14 library::GpmSample,
15 try_ffi,
16 types::{
17 AccountingStats, AdaptiveClockInfoStatus, AffinityScope, Architecture, AutoBoostClocks,
18 Bar1MemoryInfo, BbxFlushTime, Brand, BridgeChipHierarchy, BusType, C2cModeInfo,
19 ClkMonStatus, ClockId, ClockOffset, ClockRange, ClockRangeI32, ClockType,
20 ComputeCapability, ComputeMode, ConfComputeGpuAttestationReport, ConfComputeGpuCertificate,
21 ConfComputeMemSizeInfo, CoolerInfo, CurrentClockFreqs, CurrentPending,
22 DeviceAddressingMode, DeviceAttributes, DeviceCapabilities, DeviceVgpuCapability,
23 DramEncryptionInfo, DriverModel, DriverModelFlags, DynamicPstatesInfo, EccCounterType,
24 EccErrorCounts, EccSramErrorStatus, EccSramUniqueUncorrectedErrorCounts, EnableState,
25 EncoderSessionInfo, EncoderStats, EncoderType, EventTypes, FanPolicy, FanSpeedInfo,
26 FbcSessionInfo, FbcStats, FieldId, FieldQuery, FieldSample, FieldValue, GpmSupport,
27 GpuFabricInfo, GpuInstancePlacement, GpuInstanceProfileInfo, GpuOperationMode,
28 GspFirmwareMode, HostVgpuMode, InforomObject, MarginTemperature, MemoryErrorType,
29 MemoryInfo, MemoryLocation, MigMode, MigModeActivation, MinMaxFanSpeed, NvLinkBwMode,
30 NvLinkCapability, NvLinkErrorCounter, NvLinkInfo, NvLinkRemoteDeviceType,
31 NvLinkSupportedBwModes, NvLinkVersion, P2pCapabilityIndex, P2pStatus, PageRetirementCause,
32 PciInfo, PciInfoExt, PcieUtilCounter, Pdi, PerfPolicyType, PerformanceModes,
33 PerformanceState, PgpuMetadata, PlatformInfo, PowerLimits, PowerMizerMode, PowerMizerModes,
34 PowerSource, ProcessDetail, ProcessInfo, ProcessMode, ProcessUtilizationInfo,
35 ProcessUtilizationSample, RemappedRows, RepairStatus, RestrictedApi, RetiredPage,
36 RowRemapperHistogram, Sample, Samples, SamplingType, TemperatureInfo, TemperatureSensor,
37 TemperatureThreshold, ThermalSettings, TopologyLevel, Utilization, UtilizationCounter,
38 VgpuInstanceId, VgpuPlacementId, VgpuPlacementMode, VgpuTypeId, ViolationTime,
39 VirtualizationMode, WorkloadPowerCurrentProfiles, WorkloadPowerProfilesInfo,
40 try_from_nvml_enum,
41 },
42 utility::{
43 device_clock_offset_range, device_string_query, device_ulong_bitmask_list,
44 device_utilization_counter, query_process_info_list, query_sized_raw, query_u32_list,
45 struct_version,
46 },
47 vgpu_instance::VgpuInstance,
48};
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51#[repr(transparent)]
52pub struct Device(sys::nvmlDevice_t);
53
54impl Device {
55 pub const unsafe fn from_raw(handle: sys::nvmlDevice_t) -> Self {
56 Self(handle)
57 }
58
59 pub const fn as_raw(self) -> sys::nvmlDevice_t {
60 self.0
61 }
62
63 pub const fn is_null(self) -> bool {
64 self.0.is_null()
65 }
66
67 /// Returns the NVML index of this device.
68 ///
69 /// For all products.
70 ///
71 /// Valid indices are derived from the accessible device count returned by [`Library::device_count`](crate::library::Library::device_count).
72 /// For example, if the count is 2 the valid indices are 0 and 1, corresponding to GPU 0 and GPU 1.
73 ///
74 /// The order in which NVML enumerates devices has no guarantees of consistency between reboots.
75 /// Prefer PCI bus IDs or GPU UUIDs for stable device lookup.
76 /// See [`Library::device_by_pci_bus_id`](crate::library::Library::device_by_pci_bus_id) and [`Library::device_by_uuid`](crate::library::Library::device_by_uuid).
77 ///
78 /// With MIG device handles, this returns indices that can be passed to [`Device::mig_device`] to retrieve an identical handle.
79 /// MIG device indices are unique within a device.
80 ///
81 /// The NVML index may not correlate with other APIs, such as the CUDA device index.
82 ///
83 /// # Errors
84 ///
85 /// Returns an error if the device is inaccessible, if NVML rejects the
86 /// handle or index output, if NVML has not been initialized, or if NVML
87 /// reports an unexpected failure.
88 pub fn index(self) -> Result<u32> {
89 let mut index = 0;
90 unsafe {
91 try_ffi!(sys::nvmlDeviceGetIndex(self.0, &raw mut index))?;
92 }
93 Ok(index)
94 }
95
96 /// Returns the name of this device.
97 ///
98 /// For all products.
99 ///
100 /// The name is an alphanumeric product identifier such as `Tesla C2070`.
101 /// It does not exceed 96 bytes including the terminating NUL byte.
102 /// This wrapper allocates the required NVML buffer internally.
103 ///
104 /// With MIG device handles, this returns MIG device names that can identify devices based on their attributes.
105 ///
106 /// # Errors
107 ///
108 /// Returns an error if the device is inaccessible, if the internal name
109 /// buffer is too small, if NVML rejects the handle or output buffer, if NVML
110 /// has not been initialized, or if NVML reports an unexpected failure.
111 pub fn name(self) -> Result<String> {
112 let mut buffer = [0i8; sys::NVML_DEVICE_NAME_BUFFER_SIZE as usize];
113 unsafe {
114 try_ffi!(sys::nvmlDeviceGetName(
115 self.0,
116 buffer.as_mut_ptr(),
117 buffer.len() as u32,
118 ))?;
119 }
120 Ok(string_from_c_chars(&buffer))
121 }
122
123 /// Returns the hostname for the device.
124 ///
125 /// For Blackwell or newer fully supported devices.
126 /// Supported on Linux only.
127 ///
128 /// Returns the hostname string for the GPU device that was set using [`sys::nvmlDeviceSetHostname_v1`].
129 ///
130 /// # Errors
131 ///
132 /// Returns an error if the device is inaccessible, if NVML rejects the
133 /// handle or hostname output, if the device does not support hostnames, if
134 /// NVML has not been initialized, or if NVML reports an unexpected failure.
135 pub fn hostname(self) -> Result<String> {
136 let mut hostname = sys::nvmlHostname_v1_t::default();
137 unsafe {
138 try_ffi!(sys::nvmlDeviceGetHostname_v1(self.0, &raw mut hostname))?;
139 }
140 Ok(string_from_c_chars(&hostname.value))
141 }
142
143 /// Returns the brand of this device.
144 ///
145 /// For all products.
146 ///
147 /// The type is a member of [`Brand`] defined above.
148 ///
149 /// # Errors
150 ///
151 /// Returns an error if the device is inaccessible, if NVML rejects the
152 /// handle or brand output, if NVML has not been initialized, or if NVML
153 /// reports an unexpected failure.
154 pub fn brand(self) -> Result<Brand> {
155 let mut brand = sys::nvmlBrandType_t::NVML_BRAND_UNKNOWN as u32;
156 unsafe {
157 try_ffi!(sys::nvmlDeviceGetBrand(
158 self.0,
159 (&raw mut brand).cast::<sys::nvmlBrandType_t>(),
160 ))?;
161 }
162 Ok(Brand::from_raw(brand))
163 }
164
165 /// Returns architecture for the device.
166 ///
167 /// # Errors
168 ///
169 /// Returns an error if NVML rejects the handle or architecture output, or if
170 /// NVML has not been initialized.
171 pub fn architecture(self) -> Result<Architecture> {
172 let mut architecture = 0;
173 unsafe {
174 try_ffi!(sys::nvmlDeviceGetArchitecture(
175 self.0,
176 &raw mut architecture
177 ))?;
178 }
179 Ok(Architecture::from_raw(architecture))
180 }
181
182 pub fn serial(self) -> Result<String> {
183 device_string_query(
184 self,
185 sys::NVML_DEVICE_SERIAL_BUFFER_SIZE as usize,
186 sys::nvmlDeviceGetSerial,
187 )
188 }
189
190 pub fn uuid(self) -> Result<String> {
191 device_string_query(
192 self,
193 sys::NVML_DEVICE_UUID_V2_BUFFER_SIZE as usize,
194 sys::nvmlDeviceGetUUID,
195 )
196 }
197
198 pub fn vbios_version(self) -> Result<String> {
199 // From docs: "It will not exceed 32 characters in length (including the terminating NUL byte)"
200 // https://docs.nvidia.com/deploy/nvml-api/group__nvmlDeviceQueries.html#group__nvmlDeviceQueries_1g4a02015969489ad8a28d8c56b34c825e
201 device_string_query(self, 32, sys::nvmlDeviceGetVbiosVersion)
202 }
203
204 pub fn board_part_number(self) -> Result<String> {
205 device_string_query(
206 self,
207 sys::NVML_DEVICE_PART_NUMBER_BUFFER_SIZE as usize,
208 sys::nvmlDeviceGetBoardPartNumber,
209 )
210 }
211
212 /// Returns a unique identifier for the device module on the baseboard.
213 ///
214 /// Returns a unique identifier for each GPU module on a given baseboard.
215 /// For non-baseboard products, this ID would always be 0.
216 ///
217 /// # Errors
218 ///
219 /// Returns an error if NVML rejects the handle or module ID output, if NVML
220 /// has not been initialized, or if NVML reports an unexpected failure.
221 pub fn module_id(self) -> Result<u32> {
222 let mut module_id = 0;
223 unsafe {
224 try_ffi!(sys::nvmlDeviceGetModuleId(self.0, &raw mut module_id))?;
225 }
226 Ok(module_id)
227 }
228
229 /// Returns whether the device is on a multi-GPU board.
230 ///
231 /// For Fermi or newer fully supported devices.
232 ///
233 /// # Errors
234 ///
235 /// Returns an error if the device is inaccessible, if NVML rejects the
236 /// handle or output, if the device does not support this query, if NVML has
237 /// not been initialized, or if NVML reports an unexpected failure.
238 pub fn is_multi_gpu_board(self) -> Result<bool> {
239 let mut multi_gpu = 0;
240 unsafe {
241 try_ffi!(sys::nvmlDeviceGetMultiGpuBoard(self.0, &raw mut multi_gpu))?;
242 }
243 Ok(multi_gpu != 0)
244 }
245
246 /// Returns attributes (engine counts etc.) for the given NVML device handle.
247 ///
248 /// This currently supports only MIG device handles.
249 ///
250 /// For Ampere or newer fully supported devices.
251 /// Supported on Linux only.
252 ///
253 /// # Errors
254 ///
255 /// Returns an error if NVML rejects the device handle, if the device does
256 /// not support this query, if NVML has not been initialized, or if NVML
257 /// reports an unexpected failure.
258 pub fn attributes(self) -> Result<DeviceAttributes> {
259 unsafe {
260 let mut attributes = MaybeUninit::<sys::nvmlDeviceAttributes_t>::uninit();
261 try_ffi!(sys::nvmlDeviceGetAttributes_v2(
262 self.0,
263 attributes.as_mut_ptr()
264 ))?;
265 Ok(attributes.assume_init().into())
266 }
267 }
268
269 /// Returns the root/admin permissions for the target NVML operation.
270 /// See [`RestrictedApi`] for the list of supported operations.
271 /// If an operation is restricted, only callers with root privileges can call it.
272 /// See [`sys::nvmlDeviceSetAPIRestriction`] to change current permissions.
273 ///
274 /// For all fully supported products.
275 ///
276 /// # Errors
277 ///
278 /// Returns an error if the device is inaccessible, if NVML rejects the
279 /// handle, API restriction, or output, if the device or queried feature does
280 /// not support API restriction reporting, if NVML has not been initialized,
281 /// or if NVML reports an unexpected failure.
282 pub fn is_api_restricted(self, api: RestrictedApi) -> Result<EnableState> {
283 let mut state = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
284 unsafe {
285 try_ffi!(sys::nvmlDeviceGetAPIRestriction(
286 self.0,
287 api.into(),
288 &raw mut state,
289 ))?;
290 }
291 Ok(state.into())
292 }
293
294 /// Returns platform information of this device.
295 ///
296 /// For Blackwell or newer fully supported devices.
297 ///
298 /// Returns the platform information reported by NVML for this device.
299 ///
300 /// # Errors
301 ///
302 /// Returns an error if the installed NVML version does not support the
303 /// request layout, if NVML rejects the request, if system memory is
304 /// insufficient, if the device does not support this query, or if NVML
305 /// reports an unexpected failure.
306 pub fn platform_info(self) -> Result<PlatformInfo> {
307 let mut info = sys::nvmlPlatformInfo_t {
308 version: struct_version::<sys::nvmlPlatformInfo_t>(2),
309 ..Default::default()
310 };
311 unsafe {
312 try_ffi!(sys::nvmlDeviceGetPlatformInfo(self.0, &raw mut info))?;
313 }
314 Ok(info.into())
315 }
316
317 /// Returns the Per Device Identifier (PDI) associated with this device.
318 ///
319 /// For Pascal or newer fully supported devices.
320 ///
321 /// Returns the per-device identifier reported by NVML.
322 ///
323 /// # Errors
324 ///
325 /// Returns an error if the installed NVML version does not support the
326 /// request layout, if the device is inaccessible, if NVML rejects the handle
327 /// or output, if the device does not support PDI reporting, if NVML has not
328 /// been initialized, or if NVML reports an unexpected failure.
329 pub fn pdi(self) -> Result<Pdi> {
330 let mut pdi = sys::nvmlPdi_t {
331 version: struct_version::<sys::nvmlPdi_t>(1),
332 ..Default::default()
333 };
334 unsafe {
335 try_ffi!(sys::nvmlDeviceGetPdi(self.0, &raw mut pdi))?;
336 }
337 Ok(pdi.into())
338 }
339
340 /// Returns the Device's C2C Mode information.
341 ///
342 /// # Errors
343 ///
344 /// Returns an error if the device is inaccessible, if NVML rejects the
345 /// handle or output, if the device does not support C2C mode reporting, or
346 /// if NVML reports an unexpected failure.
347 pub fn c2c_mode_info(self) -> Result<C2cModeInfo> {
348 unsafe {
349 let mut info = MaybeUninit::<sys::nvmlC2cModeInfo_v1_t>::uninit();
350 try_ffi!(sys::nvmlDeviceGetC2cModeInfoV(self.0, info.as_mut_ptr()))?;
351 Ok(info.assume_init().into())
352 }
353 }
354
355 /// Returns the current Auto Boosted clocks state for this device.
356 ///
357 /// For Kepler or newer fully supported devices.
358 ///
359 /// Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates to maximize performance as thermal limits allow.
360 ///
361 /// On Pascal and newer hardware, Auto Boosted clocks are controlled through application clocks.
362 /// Use [`sys::nvmlDeviceSetApplicationsClocks`] and [`sys::nvmlDeviceResetApplicationsClocks`] to control Auto Boost behavior.
363 ///
364 /// # Errors
365 ///
366 /// Returns an error if the device is inaccessible, if NVML rejects the
367 /// handle or output, if the device does not support Auto Boosted clocks, if
368 /// NVML has not been initialized, or if NVML reports an unexpected failure.
369 pub fn auto_boosted_clocks(self) -> Result<AutoBoostClocks> {
370 let mut enabled = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
371 let mut default_enabled = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
372 unsafe {
373 try_ffi!(sys::nvmlDeviceGetAutoBoostedClocksEnabled(
374 self.0,
375 &raw mut enabled,
376 &raw mut default_enabled,
377 ))?;
378 }
379 Ok(AutoBoostClocks {
380 enabled: enabled.into(),
381 default_enabled: default_enabled.into(),
382 })
383 }
384
385 /// Tries to set the default state of Auto Boosted clocks on a device.
386 /// Auto Boosted clocks return to this default state when no compute
387 /// processes, such as CUDA applications with active contexts, are running.
388 ///
389 /// For Kepler or newer non-GeForce fully supported devices and Maxwell or newer GeForce devices.
390 /// Requires root/admin permissions.
391 ///
392 /// Auto Boosted clocks are enabled by default on some hardware, allowing the GPU to run at higher clock rates to maximize performance as thermal limits allow.
393 /// Disable Auto Boosted clocks when fixed clock rates are required.
394 ///
395 /// On Pascal and newer hardware, Auto Boosted clocks are controlled through application clocks.
396 /// Use [`sys::nvmlDeviceSetApplicationsClocks`] and [`sys::nvmlDeviceResetApplicationsClocks`] to control Auto Boost behavior.
397 ///
398 /// # Errors
399 ///
400 /// Returns an error if the device is inaccessible, if NVML rejects the
401 /// handle, if the device does not support Auto Boosted clocks, if the
402 /// current process lacks permission to change the default state, if NVML has
403 /// not been initialized, or if NVML reports an unexpected failure.
404 pub fn set_default_auto_boosted_clocks_enabled(
405 self,
406 enabled: EnableState,
407 flags: DriverModelFlags,
408 ) -> Result<()> {
409 unsafe {
410 try_ffi!(sys::nvmlDeviceSetDefaultAutoBoostedClocksEnabled(
411 self.0,
412 enabled.into(),
413 flags.bits(),
414 ))?;
415 }
416 Ok(())
417 }
418
419 /// Returns the PCI attributes of this device.
420 ///
421 /// For all products.
422 ///
423 /// Returns the PCI information reported by NVML.
424 ///
425 /// # Errors
426 ///
427 /// Returns an error if the device is inaccessible, if NVML rejects the
428 /// handle or PCI info output, if NVML has not been initialized, or if NVML
429 /// reports an unexpected failure.
430 pub fn pci_info(self) -> Result<PciInfo> {
431 unsafe {
432 let mut pci = MaybeUninit::<sys::nvmlPciInfo_t>::uninit();
433 try_ffi!(sys::nvmlDeviceGetPciInfo_v3(self.0, pci.as_mut_ptr()))?;
434 Ok(pci.assume_init().into())
435 }
436 }
437
438 /// Returns PCI attributes of this device.
439 ///
440 /// For all products.
441 ///
442 /// Returns the extended PCI information reported by NVML.
443 ///
444 /// # Errors
445 ///
446 /// Returns an error if the device is inaccessible, if NVML rejects the
447 /// handle or PCI info output, if NVML has not been initialized, or if NVML
448 /// reports an unexpected failure.
449 pub fn pci_info_ext(self) -> Result<PciInfoExt> {
450 let mut pci = sys::nvmlPciInfoExt_t {
451 version: struct_version::<sys::nvmlPciInfoExt_t>(1),
452 ..Default::default()
453 };
454 unsafe {
455 try_ffi!(sys::nvmlDeviceGetPciInfoExt(self.0, &raw mut pci))?;
456 }
457 Ok(pci.into())
458 }
459
460 /// Returns bridge chip information for all the bridge chips on the board.
461 ///
462 /// For all fully supported products.
463 /// Only applicable to multi-GPU products.
464 ///
465 /// # Errors
466 ///
467 /// Returns an error if the device is inaccessible, if NVML rejects the
468 /// handle or output, if bridge-chip reporting is not supported for the
469 /// device, if NVML has not been initialized, or if NVML reports an
470 /// unexpected failure.
471 pub fn bridge_chip_hierarchy(self) -> Result<BridgeChipHierarchy> {
472 unsafe {
473 let mut hierarchy = MaybeUninit::<sys::nvmlBridgeChipHierarchy_t>::uninit();
474 try_ffi!(sys::nvmlDeviceGetBridgeChipInfo(
475 self.0,
476 hierarchy.as_mut_ptr()
477 ))?;
478 Ok(hierarchy.assume_init().into())
479 }
480 }
481
482 /// Returns the current PCIe link generation.
483 ///
484 /// For Fermi or newer fully supported devices.
485 ///
486 /// # Errors
487 ///
488 /// Returns an error if the device is inaccessible, if NVML rejects the
489 /// query, if PCIe link information is unavailable, if NVML has not been
490 /// initialized, or if NVML reports an unexpected failure.
491 pub fn current_pcie_link_generation(self) -> Result<u32> {
492 let mut generation = 0;
493 unsafe {
494 try_ffi!(sys::nvmlDeviceGetCurrPcieLinkGeneration(
495 self.0,
496 &raw mut generation,
497 ))?;
498 }
499 Ok(generation)
500 }
501
502 /// Returns the current PCIe link width.
503 ///
504 /// For Fermi or newer fully supported devices.
505 ///
506 /// # Errors
507 ///
508 /// Returns an error if the device is inaccessible, if NVML rejects the
509 /// query, if PCIe link information is unavailable, if NVML has not been
510 /// initialized, or if NVML reports an unexpected failure.
511 pub fn current_pcie_link_width(self) -> Result<u32> {
512 let mut width = 0;
513 unsafe {
514 try_ffi!(sys::nvmlDeviceGetCurrPcieLinkWidth(self.0, &raw mut width))?;
515 }
516 Ok(width)
517 }
518
519 /// Returns the maximum PCIe link generation possible with this device and system.
520 ///
521 /// For example, a generation 2 PCIe device attached to a generation 1 PCIe bus reports generation 1.
522 ///
523 /// For Fermi or newer fully supported devices.
524 ///
525 /// # Errors
526 ///
527 /// Returns an error if the device is inaccessible, if NVML rejects the
528 /// query, if PCIe link information is unavailable, if NVML has not been
529 /// initialized, or if NVML reports an unexpected failure.
530 pub fn max_pcie_link_generation(self) -> Result<u32> {
531 let mut generation = 0;
532 unsafe {
533 try_ffi!(sys::nvmlDeviceGetMaxPcieLinkGeneration(
534 self.0,
535 &raw mut generation,
536 ))?;
537 }
538 Ok(generation)
539 }
540
541 /// Returns the maximum PCIe link generation supported by this device.
542 ///
543 /// For Fermi or newer fully supported devices.
544 ///
545 /// # Errors
546 ///
547 /// Returns an error if the device is inaccessible, if NVML rejects the
548 /// query, if PCIe link information is unavailable, if NVML has not been
549 /// initialized, or if NVML reports an unexpected failure.
550 pub fn gpu_max_pcie_link_generation(self) -> Result<u32> {
551 let mut generation = 0;
552 unsafe {
553 try_ffi!(sys::nvmlDeviceGetGpuMaxPcieLinkGeneration(
554 self.0,
555 &raw mut generation,
556 ))?;
557 }
558 Ok(generation)
559 }
560
561 /// Returns the maximum PCIe link width possible with this device and system.
562 ///
563 /// For example, a device with a 16x PCIe bus width attached to an 8x PCIe
564 /// system bus reports a maximum link width of 8.
565 ///
566 /// For Fermi or newer fully supported devices.
567 ///
568 /// # Errors
569 ///
570 /// Returns an error if the device is inaccessible, if NVML rejects the
571 /// handle or output, if PCIe link information is unavailable, if NVML has
572 /// not been initialized, or if NVML reports an unexpected failure.
573 pub fn max_pcie_link_width(self) -> Result<u32> {
574 let mut width = 0;
575 unsafe {
576 try_ffi!(sys::nvmlDeviceGetMaxPcieLinkWidth(self.0, &raw mut width))?;
577 }
578 Ok(width)
579 }
580
581 /// Returns PCIe utilization information.
582 /// Queries a byte counter over a 20 ms interval to report PCIe throughput.
583 ///
584 /// For Maxwell or newer fully supported devices.
585 ///
586 /// Not supported in virtual machines running virtual GPU (vGPU).
587 ///
588 /// # Errors
589 ///
590 /// Returns an error if the device is inaccessible, if NVML rejects the
591 /// handle, counter, or output, if the device does not support PCIe
592 /// utilization queries, if NVML has not been initialized, or if NVML reports
593 /// an unexpected failure.
594 pub fn pcie_throughput(self, counter: PcieUtilCounter) -> Result<u32> {
595 let mut throughput = 0;
596 unsafe {
597 try_ffi!(sys::nvmlDeviceGetPcieThroughput(
598 self.0,
599 counter.into(),
600 &raw mut throughput,
601 ))?;
602 }
603 Ok(throughput)
604 }
605
606 /// Returns the PCIe replay counter.
607 ///
608 /// For Kepler or newer fully supported devices.
609 ///
610 /// # Errors
611 ///
612 /// Returns an error if the device is inaccessible, if NVML rejects the
613 /// handle or output, if the device does not support replay-counter queries,
614 /// if NVML has not been initialized, or if NVML reports an unexpected
615 /// failure.
616 pub fn pcie_replay_counter(self) -> Result<u32> {
617 let mut counter = 0;
618 unsafe {
619 try_ffi!(sys::nvmlDeviceGetPcieReplayCounter(
620 self.0,
621 &raw mut counter
622 ))?;
623 }
624 Ok(counter)
625 }
626
627 /// Returns the device's PCIe Max Link speed in MB/s.
628 ///
629 /// # Errors
630 ///
631 /// Returns an error if the device is inaccessible, if NVML rejects the
632 /// handle or output, if the device does not support this query, or if NVML
633 /// has not been initialized.
634 pub fn pcie_link_max_speed(self) -> Result<u32> {
635 let mut speed = 0;
636 unsafe {
637 try_ffi!(sys::nvmlDeviceGetPcieLinkMaxSpeed(self.0, &raw mut speed))?;
638 }
639 Ok(speed)
640 }
641
642 /// Returns the device's PCIe Link speed in Mbps.
643 ///
644 /// # Errors
645 ///
646 /// Returns an error if NVML rejects the handle or output, if the device does
647 /// not support PCIe speed queries, if NVML has not been initialized, or if
648 /// NVML reports an unexpected failure.
649 pub fn pcie_speed(self) -> Result<u32> {
650 let mut speed = 0;
651 unsafe {
652 try_ffi!(sys::nvmlDeviceGetPcieSpeed(self.0, &raw mut speed))?;
653 }
654 Ok(speed)
655 }
656
657 /// Indicates whether the supplied device supports GPM.
658 ///
659 /// For Hopper or newer fully supported devices.
660 ///
661 /// This supports device handles and MIG device handles.
662 ///
663 /// # Errors
664 ///
665 /// Returns an error if NVML rejects the handle or cannot query GPM support.
666 pub fn gpm_support(self) -> Result<GpmSupport> {
667 let mut support = sys::nvmlGpmSupport_t {
668 version: sys::NVML_GPM_SUPPORT_VERSION,
669 ..Default::default()
670 };
671 unsafe {
672 try_ffi!(sys::nvmlGpmQueryDeviceSupport(self.0, &raw mut support))?;
673 }
674 Ok(support.into())
675 }
676
677 /// Returns GPM stream state.
678 ///
679 /// For Hopper or newer fully supported devices.
680 /// Supported on Linux, Windows TCC.
681 ///
682 /// # Errors
683 ///
684 /// Returns an error if NVML rejects the handle or output, if the device does
685 /// not support GPM streaming-state queries, or if NVML has not been
686 /// initialized.
687 pub fn gpm_streaming_enabled(self) -> Result<EnableState> {
688 let mut state = 0;
689 unsafe {
690 try_ffi!(sys::nvmlGpmQueryIfStreamingEnabled(self.0, &raw mut state))?;
691 }
692 try_from_nvml_enum("enable state", state)
693 }
694
695 /// Read a sample of GPM metrics into the provided `sample` buffer.
696 /// After two samples are gathered, you can call [`Library::gpm_metrics`](crate::library::Library::gpm_metrics) on those samples to retrieve metrics.
697 ///
698 /// For Hopper or newer fully supported devices.
699 ///
700 /// * The interval between two [`Device::gpm_sample`] calls must be greater than 100 ms due to the internal sample refresh rate.
701 /// * Supports device handles and MIG device handles.
702 ///
703 /// # Errors
704 ///
705 /// Returns an error if NVML rejects the handle or sample buffer, if the
706 /// device does not support GPM sampling, or if samples are requested too
707 /// quickly.
708 pub fn gpm_sample(self, sample: &GpmSample) -> Result<()> {
709 unsafe { try_ffi!(sys::nvmlGpmSampleGet(self.0, sample.as_raw())) }
710 }
711
712 /// Read a sample of GPM metrics into the provided `sample` buffer for a MIG GPU instance.
713 ///
714 /// After two samples are gathered, you can call [`Library::gpm_metrics`](crate::library::Library::gpm_metrics) on those samples to retrieve metrics.
715 ///
716 /// For Hopper or newer fully supported devices.
717 ///
718 /// The interval between two [`Device::gpm_mig_sample`] calls must be greater than 100 ms due to the internal sample refresh rate.
719 ///
720 /// # Errors
721 ///
722 /// Returns an error if NVML rejects the handle, GPU instance id, or sample
723 /// buffer, if the device does not support GPM MIG sampling, or if samples
724 /// are requested too quickly.
725 pub fn gpm_mig_sample(self, gpu_instance_id: u32, sample: &GpmSample) -> Result<()> {
726 unsafe {
727 try_ffi!(sys::nvmlGpmMigSampleGet(
728 self.0,
729 gpu_instance_id,
730 sample.as_raw(),
731 ))
732 }
733 }
734
735 pub fn memory_info(self) -> Result<MemoryInfo> {
736 let mut memory = sys::nvmlMemory_v2_t {
737 version: struct_version::<sys::nvmlMemory_v2_t>(2),
738 ..Default::default()
739 };
740 unsafe {
741 try_ffi!(sys::nvmlDeviceGetMemoryInfoV(self.0, &raw mut memory))?;
742 }
743 Ok(memory.into())
744 }
745
746 /// Returns the current utilization rates for the device's major subsystems.
747 ///
748 /// For Fermi or newer fully supported devices.
749 ///
750 /// * During driver initialization when ECC is enabled, GPU and memory utilization readings can be high.
751 /// ECC memory scrubbing during driver initialization causes this.
752 /// * On MIG-enabled GPUs, querying device utilization rates is not currently supported.
753 ///
754 /// # Errors
755 ///
756 /// Returns an error if the device is inaccessible, if NVML rejects the
757 /// handle or output, if the device does not support utilization queries, if
758 /// NVML has not been initialized, or if NVML reports an unexpected failure.
759 pub fn utilization(self) -> Result<Utilization> {
760 unsafe {
761 let mut utilization = MaybeUninit::<sys::nvmlUtilization_t>::uninit();
762 try_ffi!(sys::nvmlDeviceGetUtilizationRates(
763 self.0,
764 utilization.as_mut_ptr(),
765 ))?;
766 Ok(utilization.assume_init().into())
767 }
768 }
769
770 pub fn encoder_utilization(self) -> Result<UtilizationCounter> {
771 device_utilization_counter(self, sys::nvmlDeviceGetEncoderUtilization)
772 }
773
774 pub fn decoder_utilization(self) -> Result<UtilizationCounter> {
775 device_utilization_counter(self, sys::nvmlDeviceGetDecoderUtilization)
776 }
777
778 pub fn jpg_utilization(self) -> Result<UtilizationCounter> {
779 device_utilization_counter(self, sys::nvmlDeviceGetJpgUtilization)
780 }
781
782 pub fn ofa_utilization(self) -> Result<UtilizationCounter> {
783 device_utilization_counter(self, sys::nvmlDeviceGetOfaUtilization)
784 }
785
786 /// Returns the current capacity of the device's encoder, as a percentage of maximum encoder capacity with valid values in the range 0-100.
787 ///
788 /// For Maxwell or newer fully supported devices.
789 ///
790 /// # Errors
791 ///
792 /// Returns an error if the device is inaccessible, if NVML rejects the
793 /// query, if the device does not support the requested encoder, if NVML has
794 /// not been initialized, or if NVML reports an unexpected failure.
795 pub fn encoder_capacity(self, encoder: EncoderType) -> Result<u32> {
796 let mut capacity = 0;
797 unsafe {
798 try_ffi!(sys::nvmlDeviceGetEncoderCapacity(
799 self.0,
800 encoder.into(),
801 &raw mut capacity,
802 ))?;
803 }
804 Ok(capacity)
805 }
806
807 /// Returns the current encoder statistics for the given device.
808 ///
809 /// For Maxwell or newer fully supported devices.
810 ///
811 /// # Errors
812 ///
813 /// Returns an error if the device is inaccessible, if NVML rejects the
814 /// query, if NVML has not been initialized, or if NVML reports an
815 /// unexpected failure.
816 pub fn encoder_stats(self) -> Result<EncoderStats> {
817 let mut session_count = 0;
818 let mut average_fps = 0;
819 let mut average_latency_us = 0;
820 unsafe {
821 try_ffi!(sys::nvmlDeviceGetEncoderStats(
822 self.0,
823 &raw mut session_count,
824 &raw mut average_fps,
825 &raw mut average_latency_us,
826 ))?;
827 }
828 Ok(EncoderStats {
829 session_count,
830 average_fps,
831 average_latency_us,
832 })
833 }
834
835 /// Returns information about active encoder sessions on a target device.
836 ///
837 /// This wrapper queries the required session count first, then returns the active encoder sessions as a [`Vec`].
838 ///
839 /// For Maxwell or newer fully supported devices.
840 ///
841 /// # Errors
842 ///
843 /// Returns an error if the device is inaccessible, if the active-session
844 /// count changes while the wrapper is fetching sessions, if NVML reports an
845 /// invalid session count, if the device does not support this query, if NVML
846 /// has not been initialized, or if NVML reports an unexpected failure.
847 pub fn encoder_sessions(self) -> Result<Vec<EncoderSessionInfo>> {
848 let mut count = 0;
849 let status = unsafe {
850 sys::nvmlDeviceGetEncoderSessions(self.as_raw(), &raw mut count, ptr::null_mut())
851 };
852 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
853 return Ok(Vec::new());
854 }
855 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
856 return Err(status.into());
857 }
858
859 let mut sessions = vec![sys::nvmlEncoderSessionInfo_t::default(); count as usize];
860 unsafe {
861 try_ffi!(sys::nvmlDeviceGetEncoderSessions(
862 self.as_raw(),
863 &raw mut count,
864 sessions.as_mut_ptr(),
865 ))?;
866 }
867 sessions.truncate(count as usize);
868 Ok(sessions.into_iter().map(Into::into).collect())
869 }
870
871 /// Returns total, available, and used size of BAR1 memory.
872 ///
873 /// BAR1 maps framebuffer memory so the CPU or third-party PCIe peer devices can access it directly.
874 ///
875 /// In MIG mode, a device handle returns aggregate information only if the caller has appropriate privileges.
876 /// Per-instance information can be queried by using specific MIG device handles.
877 ///
878 /// For Kepler or newer fully supported devices.
879 ///
880 /// # Errors
881 ///
882 /// Returns an error if the device is inaccessible, if NVML rejects the
883 /// query, if the device does not support BAR1 memory reporting, if NVML has
884 /// not been initialized, or if NVML reports an unexpected failure.
885 pub fn bar1_memory_info(self) -> Result<Bar1MemoryInfo> {
886 unsafe {
887 let mut memory = MaybeUninit::<sys::nvmlBAR1Memory_t>::uninit();
888 try_ffi!(sys::nvmlDeviceGetBAR1MemoryInfo(
889 self.0,
890 memory.as_mut_ptr()
891 ))?;
892 Ok(memory.assume_init().into())
893 }
894 }
895
896 /// Returns the current clock speeds for the device.
897 ///
898 /// For Fermi or newer fully supported devices.
899 ///
900 /// See [`ClockType`] for details on available clock information.
901 ///
902 /// # Errors
903 ///
904 /// Returns an error if the device is inaccessible, if NVML rejects the
905 /// handle, clock type, or output, if the device cannot report the requested
906 /// clock, if NVML has not been initialized, or if NVML reports an
907 /// unexpected failure.
908 pub fn clock(self, kind: ClockType) -> Result<u32> {
909 let mut clock = 0;
910 unsafe {
911 try_ffi!(sys::nvmlDeviceGetClockInfo(
912 self.0,
913 kind.into(),
914 &raw mut clock
915 ))?;
916 }
917 Ok(clock)
918 }
919
920 /// Returns the clock speed for the clock specified by the clock type and clock ID.
921 ///
922 /// For Kepler or newer fully supported devices.
923 ///
924 /// # Errors
925 ///
926 /// Returns an error if the device is inaccessible, if NVML rejects the
927 /// handle, clock type, clock ID, or output, if the device does not support
928 /// this clock query, if NVML has not been initialized, or if NVML reports an
929 /// unexpected failure.
930 pub fn clock_with_id(self, kind: ClockType, clock_id: ClockId) -> Result<u32> {
931 let mut clock = 0;
932 unsafe {
933 try_ffi!(sys::nvmlDeviceGetClock(
934 self.0,
935 kind.into(),
936 clock_id.into(),
937 &raw mut clock,
938 ))?;
939 }
940 Ok(clock)
941 }
942
943 /// Returns the maximum clock speeds for the device.
944 ///
945 /// For Fermi or newer fully supported devices.
946 ///
947 /// See [`ClockType`] for details on available clock information.
948 ///
949 /// Current P0 clocks (reported by [`Device::clock`]) can differ from max clocks by a few MHz.
950 ///
951 /// # Errors
952 ///
953 /// Returns an error if the device is inaccessible, if NVML rejects the
954 /// handle, clock type, or output, if the device cannot report the requested
955 /// maximum clock, if NVML has not been initialized, or if NVML reports an
956 /// unexpected failure.
957 pub fn max_clock(self, kind: ClockType) -> Result<u32> {
958 let mut clock = 0;
959 unsafe {
960 try_ffi!(sys::nvmlDeviceGetMaxClockInfo(
961 self.0,
962 kind.into(),
963 &raw mut clock,
964 ))?;
965 }
966 Ok(clock)
967 }
968
969 /// Returns a string with the associated current GPU Clock and Memory Clock values.
970 ///
971 /// Not all tokens are reported on all GPUs, and additional tokens may be added in the future.
972 ///
973 /// These clock values include the offset set by clients through [`Device::set_clock_offsets`].
974 ///
975 /// Clock values are returned as a comma-separated list of "token=value" pairs.
976 /// Valid tokens:
977 ///
978 /// - `perf`: performance level.
979 /// - `nvclock`: GPU clock in MHz for the performance level.
980 /// - `nvclockmin`: minimum GPU clock in MHz for the performance level.
981 /// - `nvclockmax`: maximum GPU clock in MHz for the performance level.
982 /// - `nvclockeditable`: whether the GPU clock domain is editable for the performance level.
983 /// - `memclock`: memory clock in MHz for the performance level.
984 /// - `memclockmin`: minimum memory clock in MHz for the performance level.
985 /// - `memclockmax`: maximum memory clock in MHz for the performance level.
986 /// - `memclockeditable`: whether the memory clock domain is editable for the performance level.
987 /// - `memtransferrate`: memory transfer rate in MHz for the performance level.
988 /// - `memtransferratemin`: minimum memory transfer rate in MHz for the performance level.
989 /// - `memtransferratemax`: maximum memory transfer rate in MHz for the performance level.
990 /// - `memtransferrateeditable`: whether the memory transfer rate is editable for the performance level.
991 ///
992 /// Example:
993 ///
994 /// `nvclock=324, nvclockmin=324, nvclockmax=324, nvclockeditable=0, memclock=324, memclockmin=324, memclockmax=324, memclockeditable=0, memtransferrate=648, memtransferratemin=648, memtransferratemax=648, memtransferrateeditable=0;`
995 ///
996 /// # Errors
997 ///
998 /// Returns an error if the device is inaccessible, if the internal
999 /// clock-frequency buffer is too small, if NVML rejects the handle or
1000 /// output, if NVML has not been initialized, or if NVML reports an
1001 /// unexpected failure.
1002 pub fn current_clock_freqs(self) -> Result<CurrentClockFreqs> {
1003 let mut current = sys::nvmlDeviceCurrentClockFreqs_t {
1004 version: struct_version::<sys::nvmlDeviceCurrentClockFreqs_t>(1),
1005 ..Default::default()
1006 };
1007 unsafe {
1008 try_ffi!(sys::nvmlDeviceGetCurrentClockFreqs(
1009 self.0,
1010 &raw mut current
1011 ))?;
1012 }
1013 Ok(current.into())
1014 }
1015
1016 /// Returns a performance mode string with all the performance modes defined for this device along with their associated GPU Clock and Memory Clock values.
1017 /// Not all tokens are reported on all GPUs, and additional tokens may be added in the future.
1018 /// For backward compatibility, NVML still provides `nvclock` and `memclock`; those are the same as `nvclockmin` and `memclockmin`.
1019 ///
1020 /// These clock values include the offset set by clients through [`Device::set_clock_offsets`].
1021 ///
1022 /// Maximum available Pstate (P15) shows the minimum performance level (0) and vice versa.
1023 ///
1024 /// Each performance mode is returned as a comma-separated list of "token=value" pairs.
1025 /// Performance-mode token sets are separated by semicolons.
1026 /// Valid tokens:
1027 ///
1028 /// - `perf`: performance level.
1029 /// - `nvclock`: GPU clock in MHz for the performance level.
1030 /// - `nvclockmin`: minimum GPU clock in MHz for the performance level.
1031 /// - `nvclockmax`: maximum GPU clock in MHz for the performance level.
1032 /// - `nvclockeditable`: whether the GPU clock domain is editable for the performance level.
1033 /// - `memclock`: memory clock in MHz for the performance level.
1034 /// - `memclockmin`: minimum memory clock in MHz for the performance level.
1035 /// - `memclockmax`: maximum memory clock in MHz for the performance level.
1036 /// - `memclockeditable`: whether the memory clock domain is editable for the performance level.
1037 /// - `memtransferrate`: memory transfer rate in MHz for the performance level.
1038 /// - `memtransferratemin`: minimum memory transfer rate in MHz for the performance level.
1039 /// - `memtransferratemax`: maximum memory transfer rate in MHz for the performance level.
1040 /// - `memtransferrateeditable`: whether the memory transfer rate is editable for the performance level.
1041 ///
1042 /// Example:
1043 ///
1044 /// `perf=0, nvclock=324, nvclockmin=324, nvclockmax=324, nvclockeditable=0, memclock=324, memclockmin=324, memclockmax=324, memclockeditable=0, memtransferrate=648, memtransferratemin=648, memtransferratemax=648, memtransferrateeditable=0; perf=1, nvclock=324, nvclockmin=324, nvclockmax=640, nvclockeditable=0, memclock=810, memclockmin=810, memclockmax=810, memclockeditable=0, memtransferrate=1620, memtransferrate=1620, memtransferrate=1620, memtransferrateeditable=0;`
1045 ///
1046 /// # Errors
1047 ///
1048 /// Returns an error if the device is inaccessible, if the internal
1049 /// performance-mode buffer is too small, if NVML rejects the handle or
1050 /// output, if NVML has not been initialized, or if NVML reports an
1051 /// unexpected failure.
1052 pub fn performance_modes(self) -> Result<PerformanceModes> {
1053 let mut modes = sys::nvmlDevicePerfModes_t {
1054 version: struct_version::<sys::nvmlDevicePerfModes_t>(1),
1055 ..Default::default()
1056 };
1057 unsafe {
1058 try_ffi!(sys::nvmlDeviceGetPerformanceModes(self.0, &raw mut modes))?;
1059 }
1060 Ok(modes.into())
1061 }
1062
1063 pub fn applications_clock(self, kind: ClockType) -> Result<u32> {
1064 self.clock_with_id(kind, ClockId::AppClockTarget)
1065 }
1066
1067 pub fn default_applications_clock(self, kind: ClockType) -> Result<u32> {
1068 self.clock_with_id(kind, ClockId::AppClockDefault)
1069 }
1070
1071 /// Returns the customer-defined maximum boost clock speed specified by `kind`.
1072 ///
1073 /// For Pascal or newer fully supported devices.
1074 ///
1075 /// # Errors
1076 ///
1077 /// Returns an error if the device is inaccessible, if NVML rejects the
1078 /// handle, clock type, or output, if the device or requested clock type does
1079 /// not support customer boost clocks, if NVML has not been initialized, or
1080 /// if NVML reports an unexpected failure.
1081 pub fn max_customer_boost_clock(self, kind: ClockType) -> Result<u32> {
1082 let mut clock = 0;
1083 unsafe {
1084 try_ffi!(sys::nvmlDeviceGetMaxCustomerBoostClock(
1085 self.0,
1086 kind.into(),
1087 &raw mut clock,
1088 ))?;
1089 }
1090 Ok(clock)
1091 }
1092
1093 /// Returns the minimum and maximum clocks of a clock domain for a P-state.
1094 ///
1095 /// # Errors
1096 ///
1097 /// Returns an error if NVML rejects the handle, clock type, P-state, or
1098 /// outputs, if the device does not support this query, if NVML has not been
1099 /// initialized, or if NVML reports an unexpected failure.
1100 pub fn min_max_clock_of_pstate(
1101 self,
1102 kind: ClockType,
1103 pstate: PerformanceState,
1104 ) -> Result<ClockRange> {
1105 let mut min = 0;
1106 let mut max = 0;
1107 unsafe {
1108 try_ffi!(sys::nvmlDeviceGetMinMaxClockOfPState(
1109 self.0,
1110 kind.into(),
1111 pstate.into(),
1112 &raw mut min,
1113 &raw mut max,
1114 ))?;
1115 }
1116 Ok(ClockRange { min, max })
1117 }
1118
1119 /// Returns the minimum, maximum, and current clock offset for a clock domain and P-state.
1120 ///
1121 /// For Maxwell or newer fully supported devices.
1122 ///
1123 /// [`Device::gpc_clock_vf_offset`], [`Device::memory_clock_vf_offset`],
1124 /// [`sys::nvmlDeviceGetGpcClkMinMaxVfOffset`], and
1125 /// [`sys::nvmlDeviceGetMemClkMinMaxVfOffset`] are deprecated and are planned
1126 /// for removal in a future release.
1127 /// Use [`Device::clock_offsets`] instead.
1128 ///
1129 /// # Errors
1130 ///
1131 /// Returns an error if the installed NVML version does not support the
1132 /// request layout, if NVML rejects the device, clock type, P-state, or
1133 /// outputs, if the device does not support clock-offset queries, or if NVML
1134 /// has not been initialized.
1135 pub fn clock_offsets(self, kind: ClockType, pstate: PerformanceState) -> Result<ClockOffset> {
1136 let mut info = sys::nvmlClockOffset_t {
1137 version: struct_version::<sys::nvmlClockOffset_t>(1),
1138 type_: kind.into(),
1139 pstate: pstate.into(),
1140 ..Default::default()
1141 };
1142 unsafe {
1143 try_ffi!(sys::nvmlDeviceGetClockOffsets(self.0, &raw mut info))?;
1144 }
1145 Ok(info.into())
1146 }
1147
1148 /// Controls current clock offset of some clock domain for the given PState
1149 ///
1150 /// For Maxwell or newer fully supported devices.
1151 ///
1152 /// Requires privileged access.
1153 ///
1154 /// # Errors
1155 ///
1156 /// Returns an error if the installed NVML version does not support the
1157 /// request layout, if NVML rejects the handle, clock type, P-state, or
1158 /// offset, if the device does not support clock-offset control, if the
1159 /// current process lacks permission, or if NVML has not been initialized.
1160 pub fn set_clock_offsets(self, offset: ClockOffset) -> Result<()> {
1161 let mut info = sys::nvmlClockOffset_t {
1162 version: struct_version::<sys::nvmlClockOffset_t>(1),
1163 type_: offset.kind.into(),
1164 pstate: offset.pstate.into(),
1165 clockOffsetMHz: offset.clock_offset_mhz,
1166 minClockOffsetMHz: offset.min_clock_offset_mhz,
1167 maxClockOffsetMHz: offset.max_clock_offset_mhz,
1168 };
1169 unsafe { try_ffi!(sys::nvmlDeviceSetClockOffsets(self.0, &raw mut info)) }
1170 }
1171
1172 /// Returns the GPCCLK VF offset value.
1173 ///
1174 /// # Errors
1175 ///
1176 /// Returns an error if NVML rejects the handle or output, if the device does
1177 /// not support this deprecated offset query, if NVML has not been
1178 /// initialized, or if NVML reports an unexpected failure.
1179 pub fn gpc_clock_vf_offset(self) -> Result<i32> {
1180 let mut offset = 0;
1181 unsafe {
1182 try_ffi!(sys::nvmlDeviceGetGpcClkVfOffset(self.0, &raw mut offset))?;
1183 }
1184 Ok(offset)
1185 }
1186
1187 /// Returns the MemClk (Memory Clock) VF offset value.
1188 ///
1189 /// # Errors
1190 ///
1191 /// Returns an error if NVML rejects the handle or output, if the device does
1192 /// not support this deprecated offset query, if NVML has not been
1193 /// initialized, or if NVML reports an unexpected failure.
1194 pub fn memory_clock_vf_offset(self) -> Result<i32> {
1195 let mut offset = 0;
1196 unsafe {
1197 try_ffi!(sys::nvmlDeviceGetMemClkVfOffset(self.0, &raw mut offset))?;
1198 }
1199 Ok(offset)
1200 }
1201
1202 pub fn gpc_clock_vf_offset_range(self) -> Result<ClockRangeI32> {
1203 device_clock_offset_range(self, sys::nvmlDeviceGetGpcClkMinMaxVfOffset)
1204 }
1205
1206 pub fn memory_clock_vf_offset_range(self) -> Result<ClockRangeI32> {
1207 device_clock_offset_range(self, sys::nvmlDeviceGetMemClkMinMaxVfOffset)
1208 }
1209
1210 /// Returns the list of possible memory clocks that can be used as an argument for [`sys::nvmlDeviceSetMemoryLockedClocks`].
1211 ///
1212 /// For Kepler or newer fully supported devices.
1213 ///
1214 /// # Errors
1215 ///
1216 /// Returns an error if the device is inaccessible, if the clock list changes
1217 /// while the wrapper is fetching it, if NVML rejects the handle or count
1218 /// output, if the device does not support this query, if NVML has not been
1219 /// initialized, or if NVML reports an unexpected failure.
1220 pub fn supported_memory_clocks(self) -> Result<Vec<u32>> {
1221 query_u32_list(|count, values| unsafe {
1222 sys::nvmlDeviceGetSupportedMemoryClocks(self.0, count, values)
1223 })
1224 }
1225
1226 /// Returns the list of possible graphics clocks that can be used as an argument for [`sys::nvmlDeviceSetGpuLockedClocks`].
1227 ///
1228 /// For Kepler or newer fully supported devices.
1229 ///
1230 /// # Errors
1231 ///
1232 /// Returns an error if the device is inaccessible, if the clock list changes
1233 /// while the wrapper is fetching it, if NVML rejects the handle, memory
1234 /// clock, or output, if `memory_clock_mhz` is unsupported, if the device does
1235 /// not support this query, if NVML has not been initialized, or if NVML
1236 /// reports an unexpected failure.
1237 pub fn supported_graphics_clocks(self, memory_clock_mhz: u32) -> Result<Vec<u32>> {
1238 query_u32_list(|count, values| unsafe {
1239 sys::nvmlDeviceGetSupportedGraphicsClocks(self.0, memory_clock_mhz, count, values)
1240 })
1241 }
1242
1243 pub fn temperature_reading(self, sensor: TemperatureSensor) -> Result<u32> {
1244 let value = self.temperature(sensor)?.temperature;
1245 u32::try_from(value).map_err(|_| Error::NegativeValue {
1246 name: "temperature".into(),
1247 value: i64::from(value),
1248 })
1249 }
1250
1251 /// Returns the temperature threshold for the GPU with the specified threshold type in degrees C.
1252 ///
1253 /// For Kepler or newer fully supported devices.
1254 ///
1255 /// See [`TemperatureThreshold`] for details on available temperature thresholds.
1256 ///
1257 /// This is no longer the preferred interface for retrieving the following temperature thresholds on Ada and later architectures: [`TemperatureThreshold::Shutdown`], [`TemperatureThreshold::Slowdown`], [`TemperatureThreshold::MemoryMax`] and [`TemperatureThreshold::GpuMax`].
1258 ///
1259 /// Support for reading these temperature thresholds for Ada and later architectures may be removed in future releases.
1260 /// Use [`Device::field_values`] with `NVML_FI_DEV_TEMPERATURE_*` fields to retrieve temperature thresholds on these architectures.
1261 ///
1262 /// # Errors
1263 ///
1264 /// Returns an error if the device is inaccessible, if NVML rejects the
1265 /// handle, threshold type, or output, if the device does not support the
1266 /// requested temperature threshold, if NVML has not been initialized, or if
1267 /// NVML reports an unexpected failure.
1268 pub fn temperature_threshold(self, threshold: TemperatureThreshold) -> Result<u32> {
1269 let mut temperature = 0;
1270 unsafe {
1271 try_ffi!(sys::nvmlDeviceGetTemperatureThreshold(
1272 self.0,
1273 threshold.into(),
1274 &raw mut temperature,
1275 ))?;
1276 }
1277 Ok(temperature)
1278 }
1279
1280 /// Sets the temperature threshold for the GPU with the specified threshold type in degrees C.
1281 ///
1282 /// For Maxwell or newer fully supported devices.
1283 ///
1284 /// See [`TemperatureThreshold`] for details on available temperature thresholds.
1285 ///
1286 /// # Errors
1287 ///
1288 /// Returns an error if the device is inaccessible, if NVML rejects the
1289 /// handle, threshold type, or temperature value, if the device does not
1290 /// support setting this threshold, if NVML has not been initialized, or if
1291 /// NVML reports an unexpected failure.
1292 pub fn set_temperature_threshold(
1293 self,
1294 threshold: TemperatureThreshold,
1295 temperature: i32,
1296 ) -> Result<i32> {
1297 let mut temperature = temperature;
1298 unsafe {
1299 try_ffi!(sys::nvmlDeviceSetTemperatureThreshold(
1300 self.0,
1301 threshold.into(),
1302 &raw mut temperature,
1303 ))?;
1304 }
1305 Ok(temperature)
1306 }
1307
1308 /// Returns the thermal margin temperature (distance to nearest slowdown threshold).
1309 ///
1310 /// # Errors
1311 ///
1312 /// Returns an error if the installed NVML version does not support the
1313 /// request layout, if the device is inaccessible, if NVML rejects the handle
1314 /// or output, if the platform does not support this query, or if NVML
1315 /// reports an unexpected failure.
1316 pub fn margin_temperature(self) -> Result<MarginTemperature> {
1317 let mut margin = sys::nvmlMarginTemperature_t {
1318 version: struct_version::<sys::nvmlMarginTemperature_t>(1),
1319 ..Default::default()
1320 };
1321 unsafe {
1322 try_ffi!(sys::nvmlDeviceGetMarginTemperature(self.0, &raw mut margin))?;
1323 }
1324 Ok(margin.into())
1325 }
1326
1327 /// Used to execute a list of thermal system instructions.
1328 ///
1329 /// # Errors
1330 ///
1331 /// Returns an error if the device is inaccessible, if NVML rejects the
1332 /// handle, sensor index, or output, if the device does not support thermal
1333 /// settings, if NVML has not been initialized, or if NVML reports an
1334 /// unexpected failure.
1335 pub fn thermal_settings(self, sensor_index: u32) -> Result<ThermalSettings> {
1336 unsafe {
1337 let mut settings = MaybeUninit::<sys::nvmlGpuThermalSettings_t>::uninit();
1338 try_ffi!(sys::nvmlDeviceGetThermalSettings(
1339 self.0,
1340 sensor_index,
1341 settings.as_mut_ptr(),
1342 ))?;
1343 Ok(settings.assume_init().into())
1344 }
1345 }
1346
1347 /// Returns power usage for this GPU and its associated circuitry, such as memory, in milliwatts.
1348 ///
1349 /// For Fermi or newer fully supported devices.
1350 ///
1351 /// On Fermi and Kepler GPUs the reading is accurate to within +/- 5% of current power draw.
1352 /// On Ampere (except GA100) or newer GPUs, this returns power averaged over a one-second interval.
1353 /// On GA100 and older architectures, instantaneous power is returned.
1354 ///
1355 /// See `NVML_FI_DEV_POWER_AVERAGE` on newer architectures and `NVML_FI_DEV_POWER_INSTANT` to query specific power values.
1356 ///
1357 /// It is only available if power management mode is supported.
1358 /// See [`sys::nvmlDeviceGetPowerManagementMode`].
1359 ///
1360 /// # Errors
1361 ///
1362 /// Returns an error if the device is inaccessible, if NVML rejects the
1363 /// handle or output, if the device does not support power readings, if NVML
1364 /// has not been initialized, or if NVML reports an unexpected failure.
1365 pub fn power_usage(self) -> Result<u32> {
1366 let mut power = 0;
1367 unsafe {
1368 try_ffi!(sys::nvmlDeviceGetPowerUsage(self.0, &raw mut power))?;
1369 }
1370 Ok(power)
1371 }
1372
1373 pub fn power_management_mode(self) -> Result<EnableState> {
1374 let sample = self.single_field_value(FieldId::POWER_CURRENT_LIMIT, 0)?;
1375 match sample.result {
1376 Ok(_) => Ok(EnableState::Enabled),
1377 Err(Error::Nvml {
1378 code: Status::NotSupported,
1379 ..
1380 }) => Ok(EnableState::Disabled),
1381 Err(err) => Err(err),
1382 }
1383 }
1384
1385 /// Returns the power management limit associated with this device.
1386 ///
1387 /// For Fermi or newer fully supported devices.
1388 ///
1389 /// The power limit defines the upper boundary for the card's power draw.
1390 /// If the card's total power draw reaches this limit the power management algorithm kicks in.
1391 ///
1392 /// This reading is only available if power management mode is supported.
1393 /// See [`sys::nvmlDeviceGetPowerManagementMode`].
1394 ///
1395 /// # Errors
1396 ///
1397 /// Returns an error if the device is inaccessible, if NVML rejects the
1398 /// handle or output, if the device does not support power limits, if NVML
1399 /// has not been initialized, or if NVML reports an unexpected failure.
1400 pub fn power_management_limit(self) -> Result<u32> {
1401 let mut limit = 0;
1402 unsafe {
1403 try_ffi!(sys::nvmlDeviceGetPowerManagementLimit(
1404 self.0,
1405 &raw mut limit
1406 ))?;
1407 }
1408 Ok(limit)
1409 }
1410
1411 /// Sets a new power limit for this device.
1412 ///
1413 /// For Kepler or newer fully supported devices.
1414 /// Requires root/admin permissions.
1415 ///
1416 /// See [`Device::power_management_limit_constraints`] to check the allowed ranges of values.
1417 ///
1418 /// Limit is not persistent across reboots or driver unloads.
1419 /// Enable persistent mode to prevent driver from unloading when no application is using the device.
1420 ///
1421 /// # Errors
1422 ///
1423 /// Returns an error if the device is inaccessible, if NVML rejects the
1424 /// handle or power limit, if the device does not support power-limit
1425 /// control, if NVML has not been initialized, or if NVML reports an
1426 /// unexpected failure.
1427 pub fn set_power_management_limit(self, limit: u32) -> Result<()> {
1428 unsafe {
1429 try_ffi!(sys::nvmlDeviceSetPowerManagementLimit(self.0, limit))?;
1430 }
1431 Ok(())
1432 }
1433
1434 /// Returns default power management limit on this device, in milliwatts.
1435 /// Default power management limit is a power management limit that the device boots with.
1436 ///
1437 /// For Kepler or newer fully supported devices.
1438 ///
1439 /// # Errors
1440 ///
1441 /// Returns an error if the device is inaccessible, if NVML rejects the
1442 /// handle or output, if the device does not support power limits, if NVML
1443 /// has not been initialized, or if NVML reports an unexpected failure.
1444 pub fn power_management_default_limit(self) -> Result<u32> {
1445 let mut limit = 0;
1446 unsafe {
1447 try_ffi!(sys::nvmlDeviceGetPowerManagementDefaultLimit(
1448 self.0,
1449 &raw mut limit,
1450 ))?;
1451 }
1452 Ok(limit)
1453 }
1454
1455 /// Returns the effective power limit that the driver enforces after taking into account all limiters.
1456 ///
1457 /// This can differ from [`Device::power_management_limit`] if other limits are set elsewhere.
1458 /// This includes the out-of-band power-limit interface.
1459 ///
1460 /// For Kepler or newer fully supported devices.
1461 ///
1462 /// # Errors
1463 ///
1464 /// Returns an error if the device is inaccessible, if NVML rejects the
1465 /// handle or output, if the device does not support power limits, if NVML
1466 /// has not been initialized, or if NVML reports an unexpected failure.
1467 pub fn enforced_power_limit(self) -> Result<u32> {
1468 let mut limit = 0;
1469 unsafe {
1470 try_ffi!(sys::nvmlDeviceGetEnforcedPowerLimit(self.0, &raw mut limit))?;
1471 }
1472 Ok(limit)
1473 }
1474
1475 /// Returns information about possible values of power management limits on this device.
1476 ///
1477 /// For Kepler or newer fully supported devices.
1478 ///
1479 /// # Errors
1480 ///
1481 /// Returns an error if the device is inaccessible, if NVML rejects the
1482 /// handle or outputs, if the device does not support power-limit ranges, if
1483 /// NVML has not been initialized, or if NVML reports an unexpected failure.
1484 pub fn power_management_limit_constraints(self) -> Result<PowerLimits> {
1485 let mut min = 0;
1486 let mut max = 0;
1487 unsafe {
1488 try_ffi!(sys::nvmlDeviceGetPowerManagementLimitConstraints(
1489 self.0,
1490 &raw mut min,
1491 &raw mut max,
1492 ))?;
1493 }
1494 Ok(PowerLimits { min, max })
1495 }
1496
1497 /// Returns current power mizer mode on this device.
1498 ///
1499 /// PowerMizerMode provides a hint to the driver as to how to manage the performance of the GPU.
1500 ///
1501 /// For Maxwell or newer fully supported devices.
1502 ///
1503 /// # Errors
1504 ///
1505 /// Returns an error if the device is inaccessible, if NVML rejects the
1506 /// handle or output, if the device does not support PowerMizer mode
1507 /// readings, if NVML has not been initialized, or if NVML reports an
1508 /// unexpected failure.
1509 pub fn power_mizer_mode(self) -> Result<PowerMizerModes> {
1510 unsafe {
1511 let mut modes = MaybeUninit::<sys::nvmlDevicePowerMizerModes_v1_t>::uninit();
1512 try_ffi!(sys::nvmlDeviceGetPowerMizerMode_v1(
1513 self.0,
1514 modes.as_mut_ptr()
1515 ))?;
1516 Ok(modes.assume_init().into())
1517 }
1518 }
1519
1520 /// Sets the new power mizer mode.
1521 ///
1522 /// For Maxwell or newer fully supported devices.
1523 ///
1524 /// # Errors
1525 ///
1526 /// Returns an error if the device is inaccessible, if NVML rejects the
1527 /// handle or mode, if the device does not support PowerMizer mode changes,
1528 /// if NVML has not been initialized, or if NVML reports an unexpected
1529 /// failure.
1530 pub fn set_power_mizer_mode(self, mode: PowerMizerMode) -> Result<()> {
1531 let mut power_mizer = sys::nvmlDevicePowerMizerModes_v1_t {
1532 mode: mode.into(),
1533 ..Default::default()
1534 };
1535 unsafe {
1536 try_ffi!(sys::nvmlDeviceSetPowerMizerMode_v1(
1537 self.0,
1538 &raw mut power_mizer
1539 ))
1540 }
1541 }
1542
1543 /// Returns total energy consumption for this GPU in millijoules (mJ) since the driver was last reloaded.
1544 ///
1545 /// For Volta or newer fully supported devices.
1546 ///
1547 /// # Errors
1548 ///
1549 /// Returns an error if the device is inaccessible, if NVML rejects the
1550 /// handle or output, if the device does not support energy readings, if NVML
1551 /// has not been initialized, or if NVML reports an unexpected failure.
1552 pub fn total_energy_consumption(self) -> Result<u64> {
1553 let mut energy = 0;
1554 unsafe {
1555 try_ffi!(sys::nvmlDeviceGetTotalEnergyConsumption(
1556 self.0,
1557 &raw mut energy
1558 ))?;
1559 }
1560 Ok(energy)
1561 }
1562
1563 /// Returns the CUDA compute capability of the device.
1564 ///
1565 /// For all products.
1566 ///
1567 /// Returns the major and minor compute capability version numbers of the device.
1568 /// The major and minor versions are equivalent to the `CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR` and `CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR` attributes returned by `cuDeviceGetAttribute`.
1569 ///
1570 /// # Errors
1571 ///
1572 /// Returns an error if the device is inaccessible, if NVML rejects the
1573 /// handle or outputs, if NVML has not been initialized, or if NVML reports
1574 /// an unexpected failure.
1575 pub fn compute_capability(self) -> Result<ComputeCapability> {
1576 let mut major = 0;
1577 let mut minor = 0;
1578 unsafe {
1579 try_ffi!(sys::nvmlDeviceGetCudaComputeCapability(
1580 self.0,
1581 &raw mut major,
1582 &raw mut minor,
1583 ))?;
1584 }
1585 Ok(ComputeCapability { major, minor })
1586 }
1587
1588 /// Returns the current performance state for the device.
1589 ///
1590 /// For Fermi or newer fully supported devices.
1591 ///
1592 /// See [`PerformanceState`] for details on allowed performance states.
1593 ///
1594 /// # Errors
1595 ///
1596 /// Returns an error if the device is inaccessible, if NVML rejects the
1597 /// handle or output, if the device does not support performance-state
1598 /// queries, if NVML has not been initialized, or if NVML reports an
1599 /// unexpected failure.
1600 pub fn performance_state(self) -> Result<PerformanceState> {
1601 let mut state = sys::nvmlPstates_t::NVML_PSTATE_UNKNOWN;
1602 unsafe {
1603 try_ffi!(sys::nvmlDeviceGetPerformanceState(self.0, &raw mut state))?;
1604 }
1605 Ok(state.into())
1606 }
1607
1608 pub fn power_state(self) -> Result<PerformanceState> {
1609 self.performance_state()
1610 }
1611
1612 /// Returns all supported performance states (P-states) for the device.
1613 ///
1614 /// The returned array contains a contiguous list of valid P-states supported
1615 /// by the device.
1616 /// If the number of supported P-states is smaller than the supplied array,
1617 /// missing elements contain [`PerformanceState::Unknown`].
1618 ///
1619 /// The number of returned elements never exceeds `NVML_MAX_GPU_PERF_PSTATES`.
1620 ///
1621 /// # Errors
1622 ///
1623 /// Returns an error if the fixed internal P-state buffer is too small, if
1624 /// NVML rejects the query, if the device does not support performance-state
1625 /// readings, if NVML has not been initialized, or if NVML reports an
1626 /// unexpected failure.
1627 pub fn supported_performance_states(self) -> Result<Vec<PerformanceState>> {
1628 let mut states =
1629 [sys::nvmlPstates_t::NVML_PSTATE_UNKNOWN; sys::NVML_MAX_GPU_PERF_PSTATES as usize];
1630 unsafe {
1631 try_ffi!(sys::nvmlDeviceGetSupportedPerformanceStates(
1632 self.0,
1633 states.as_mut_ptr(),
1634 states.len() as u32,
1635 ))?;
1636 }
1637 Ok(states
1638 .into_iter()
1639 .filter(|state| *state != sys::nvmlPstates_t::NVML_PSTATE_UNKNOWN)
1640 .map(Into::into)
1641 .collect())
1642 }
1643
1644 /// Returns current clocks event reasons.
1645 ///
1646 /// For all fully supported products.
1647 ///
1648 /// More than one bit can be enabled at the same time.
1649 /// Multiple reasons can be affecting clocks at once.
1650 ///
1651 /// # Errors
1652 ///
1653 /// Returns an error if the device is inaccessible, if NVML rejects the
1654 /// handle or output, if the device does not support clocks-event reasons, if
1655 /// NVML has not been initialized, or if NVML reports an unexpected failure.
1656 pub fn current_clocks_event_reasons(self) -> Result<u64> {
1657 let mut reasons = 0;
1658 unsafe {
1659 try_ffi!(sys::nvmlDeviceGetCurrentClocksEventReasons(
1660 self.0,
1661 &raw mut reasons,
1662 ))?;
1663 }
1664 Ok(reasons)
1665 }
1666
1667 pub fn current_clocks_throttle_reasons(self) -> Result<u64> {
1668 self.current_clocks_event_reasons()
1669 }
1670
1671 /// Returns bitmask of supported clocks event reasons that can be returned by [`Device::current_clocks_event_reasons`].
1672 ///
1673 /// For all fully supported products.
1674 ///
1675 /// Not supported in virtual machines running virtual GPU (vGPU).
1676 ///
1677 /// # Errors
1678 ///
1679 /// Returns an error if the device is inaccessible, if NVML rejects the
1680 /// handle or output, if NVML has not been initialized, or if NVML reports an
1681 /// unexpected failure.
1682 pub fn supported_clocks_event_reasons(self) -> Result<u64> {
1683 let mut reasons = 0;
1684 unsafe {
1685 try_ffi!(sys::nvmlDeviceGetSupportedClocksEventReasons(
1686 self.0,
1687 &raw mut reasons,
1688 ))?;
1689 }
1690 Ok(reasons)
1691 }
1692
1693 pub fn supported_clocks_throttle_reasons(self) -> Result<u64> {
1694 self.supported_clocks_event_reasons()
1695 }
1696
1697 /// Returns the event types supported by this device.
1698 ///
1699 /// For Fermi or newer fully supported devices.
1700 ///
1701 /// Events are not supported on Windows.
1702 /// Therefore, this call returns an empty event mask on Windows.
1703 ///
1704 /// # Errors
1705 ///
1706 /// Returns an error if the device is inaccessible, if NVML rejects the event
1707 /// mask output, if NVML has not been initialized, or if NVML reports an
1708 /// unexpected failure.
1709 pub fn supported_event_types(self) -> Result<EventTypes> {
1710 let mut event_types = 0;
1711 unsafe {
1712 try_ffi!(sys::nvmlDeviceGetSupportedEventTypes(
1713 self.0,
1714 &raw mut event_types,
1715 ))?;
1716 }
1717 Ok(EventTypes::from_bits_retain(event_types))
1718 }
1719
1720 /// Returns the device board ID in the range `0..N`.
1721 /// Devices with the same board ID indicate GPUs connected to the same PLX.
1722 /// Use in conjunction with [`Device::is_multi_gpu_board`] to decide if they are on the same board as well.
1723 /// The returned board ID is unique for the current configuration.
1724 /// Uniqueness and ordering across reboots and system configurations are not
1725 /// guaranteed, but IDs remain distinct within one configuration.
1726 ///
1727 /// For Fermi or newer fully supported devices.
1728 ///
1729 /// # Errors
1730 ///
1731 /// Returns an error if the device is inaccessible, if NVML rejects the
1732 /// handle or output, if the device does not support board IDs, if NVML has
1733 /// not been initialized, or if NVML reports an unexpected failure.
1734 pub fn board_id(self) -> Result<u32> {
1735 let mut board_id = 0;
1736 unsafe {
1737 try_ffi!(sys::nvmlDeviceGetBoardId(self.0, &raw mut board_id))?;
1738 }
1739 Ok(board_id)
1740 }
1741
1742 /// Returns the display mode for the device.
1743 ///
1744 /// For all products.
1745 ///
1746 /// Indicates whether a physical display, such as a monitor, is currently connected to any of the device's connectors.
1747 ///
1748 /// See [`EnableState`] for details on allowed modes.
1749 ///
1750 /// # Errors
1751 ///
1752 /// Returns an error if the device is inaccessible, if NVML rejects the
1753 /// handle or output, if the device does not support display-mode reporting,
1754 /// if NVML has not been initialized, or if NVML reports an unexpected
1755 /// failure.
1756 pub fn display_mode(self) -> Result<EnableState> {
1757 let mut mode = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
1758 unsafe {
1759 try_ffi!(sys::nvmlDeviceGetDisplayMode(self.0, &raw mut mode))?;
1760 }
1761 try_from_nvml_enum("enable state", mode as u32)
1762 }
1763
1764 /// Returns the display active state for the device.
1765 ///
1766 /// For all products.
1767 ///
1768 /// Indicates whether a display is initialized on the device.
1769 /// For example whether X Server is attached to this device and has allocated memory for the screen.
1770 ///
1771 /// Display can be active even when no monitor is physically attached.
1772 ///
1773 /// See [`EnableState`] for details on allowed modes.
1774 ///
1775 /// # Errors
1776 ///
1777 /// Returns an error if the device is inaccessible, if NVML rejects the
1778 /// query, if the device does not support display-active reporting, if NVML
1779 /// has not been initialized, or if NVML reports an unexpected failure.
1780 pub fn display_active(self) -> Result<EnableState> {
1781 let mut active = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
1782 unsafe {
1783 try_ffi!(sys::nvmlDeviceGetDisplayActive(self.0, &raw mut active))?;
1784 }
1785 Ok(active.into())
1786 }
1787
1788 /// Returns the current and pending ECC modes for the device.
1789 ///
1790 /// For Fermi or newer fully supported devices.
1791 /// Only applicable to devices with ECC.
1792 /// Requires [`InforomObject::Ecc`] version 1.0 or higher.
1793 ///
1794 /// Changing ECC modes requires a reboot.
1795 /// The "pending" ECC mode refers to the target mode following the next reboot.
1796 ///
1797 /// See [`EnableState`] for details on allowed modes.
1798 ///
1799 /// # Errors
1800 ///
1801 /// Returns an error if the device is inaccessible, if NVML rejects the
1802 /// handle or outputs, if the device does not support ECC mode reporting, if
1803 /// NVML has not been initialized, or if NVML reports an unexpected failure.
1804 pub fn ecc_mode(self) -> Result<CurrentPending<EnableState>> {
1805 let mut current = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
1806 let mut pending = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
1807 unsafe {
1808 try_ffi!(sys::nvmlDeviceGetEccMode(
1809 self.0,
1810 &raw mut current,
1811 &raw mut pending,
1812 ))?;
1813 }
1814 Ok(CurrentPending {
1815 current: current.into(),
1816 pending: pending.into(),
1817 })
1818 }
1819
1820 /// Returns the default ECC modes for the device.
1821 ///
1822 /// For Fermi or newer fully supported devices.
1823 /// Only applicable to devices with ECC.
1824 /// Requires [`InforomObject::Ecc`] version 1.0 or higher.
1825 ///
1826 /// See [`EnableState`] for details on allowed modes.
1827 ///
1828 /// # Errors
1829 ///
1830 /// Returns an error if the device is inaccessible, if NVML rejects the
1831 /// handle or output, if the device does not support ECC mode reporting, if
1832 /// NVML has not been initialized, or if NVML reports an unexpected failure.
1833 pub fn default_ecc_mode(self) -> Result<EnableState> {
1834 let mut default_mode = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
1835 unsafe {
1836 try_ffi!(sys::nvmlDeviceGetDefaultEccMode(
1837 self.0,
1838 &raw mut default_mode
1839 ))?;
1840 }
1841 Ok(default_mode.into())
1842 }
1843
1844 /// Returns the current GPU operation mode and the pending mode that will
1845 /// take effect after reboot.
1846 ///
1847 /// For GK110 M-class and X-class Tesla products from the Kepler family.
1848 /// Modes [`GpuOperationMode::LowDp`] and [`GpuOperationMode::AllOn`] are supported on fully supported GeForce products.
1849 /// Not supported on Quadro and Tesla C-class products.
1850 ///
1851 /// # Errors
1852 ///
1853 /// Returns an error if the device is inaccessible, if NVML rejects the
1854 /// handle or outputs, if the device does not support GPU operation modes, if
1855 /// NVML has not been initialized, or if NVML reports an unexpected failure.
1856 pub fn gpu_operation_mode(self) -> Result<CurrentPending<GpuOperationMode>> {
1857 let mut current = sys::nvmlGpuOperationMode_t::NVML_GOM_ALL_ON;
1858 let mut pending = sys::nvmlGpuOperationMode_t::NVML_GOM_ALL_ON;
1859 unsafe {
1860 try_ffi!(sys::nvmlDeviceGetGpuOperationMode(
1861 self.0,
1862 &raw mut current,
1863 &raw mut pending,
1864 ))?;
1865 }
1866 Ok(CurrentPending {
1867 current: current.into(),
1868 pending: pending.into(),
1869 })
1870 }
1871
1872 /// Returns the current and pending driver model for the device.
1873 ///
1874 /// For Kepler or newer fully supported devices.
1875 /// For windows only.
1876 ///
1877 /// On Windows platforms the device driver can run in either WDDM, MCDM or WDM (TCC) modes.
1878 /// If a display is attached to the device it must run in WDDM mode.
1879 /// MCDM mode is preferred if a display is not attached.
1880 /// TCC mode is deprecated.
1881 ///
1882 /// See [`DriverModel`] for details on available driver models.
1883 ///
1884 /// # Errors
1885 ///
1886 /// Returns an error if the device is inaccessible, if NVML rejects the
1887 /// handle or outputs, if the platform is not Windows, if NVML has not been
1888 /// initialized, or if NVML reports an unexpected failure.
1889 pub fn driver_model(self) -> Result<CurrentPending<DriverModel>> {
1890 let mut current = sys::nvmlDriverModel_t::NVML_DRIVER_WDDM;
1891 let mut pending = sys::nvmlDriverModel_t::NVML_DRIVER_WDDM;
1892 unsafe {
1893 try_ffi!(sys::nvmlDeviceGetDriverModel_v2(
1894 self.0,
1895 &raw mut current,
1896 &raw mut pending,
1897 ))?;
1898 }
1899 Ok(CurrentPending {
1900 current: current.into(),
1901 pending: pending.into(),
1902 })
1903 }
1904
1905 /// Sets the driver model for the device.
1906 ///
1907 /// For Fermi or newer fully supported devices.
1908 /// For windows only.
1909 /// Requires root/admin permissions.
1910 ///
1911 /// On Windows platforms the device driver can run in either WDDM or WDM (TCC) mode.
1912 /// If a display is attached to the device it must run in WDDM mode.
1913 ///
1914 /// It is possible to force the change to WDM (TCC) while the display is still attached with a force flag (nvmlFlagForce).
1915 /// Use the force flag only if the host is powered down afterward and the display is detached from the device before the next reboot.
1916 ///
1917 /// This operation takes effect after the next reboot.
1918 ///
1919 /// Windows driver model may only be set to WDDM when running in DEFAULT compute mode.
1920 ///
1921 /// Changing the driver model to WDDM is not supported when the GPU does not
1922 /// support graphics acceleration, or would not support it after reboot.
1923 /// See [`Device::set_gpu_operation_mode`].
1924 ///
1925 /// See [`DriverModel`] for details on available driver models.
1926 /// See [`DriverModelFlags`] for the available flag values.
1927 ///
1928 /// # Errors
1929 ///
1930 /// Returns an error if the device is inaccessible, if NVML rejects the
1931 /// handle, driver model, or flags, if the platform is not Windows or the
1932 /// device does not support driver-model changes, if the current process
1933 /// lacks permission, if NVML has not been initialized, or if NVML reports an
1934 /// unexpected failure.
1935 pub fn set_driver_model(self, model: DriverModel, flags: DriverModelFlags) -> Result<()> {
1936 unsafe {
1937 try_ffi!(sys::nvmlDeviceSetDriverModel(
1938 self.0,
1939 model.into(),
1940 flags.bits(),
1941 ))?;
1942 }
1943 Ok(())
1944 }
1945
1946 /// Sets the GPU operation mode.
1947 /// See [`GpuOperationMode`] for details.
1948 ///
1949 /// For GK110 M-class and X-class Tesla products from the Kepler family.
1950 /// Modes [`GpuOperationMode::LowDp`] and [`GpuOperationMode::AllOn`] are supported on fully supported GeForce products.
1951 /// Not supported on Quadro and Tesla C-class products.
1952 /// Requires root/admin permissions.
1953 ///
1954 /// Changing GOMs requires a reboot.
1955 /// The reboot requirement might be removed in the future.
1956 ///
1957 /// Compute-only GOMs do not support graphics acceleration.
1958 /// On Windows, switching to these GOMs is not supported when the pending driver model is WDDM.
1959 /// See [`Device::set_driver_model`].
1960 ///
1961 /// # Errors
1962 ///
1963 /// Returns an error if the device is inaccessible, if NVML rejects the
1964 /// handle or mode, if the device does not support GPU operation mode changes
1965 /// or the requested mode, if the current process lacks permission, if NVML
1966 /// has not been initialized, or if NVML reports an unexpected failure.
1967 pub fn set_gpu_operation_mode(self, mode: GpuOperationMode) -> Result<()> {
1968 unsafe {
1969 try_ffi!(sys::nvmlDeviceSetGpuOperationMode(self.0, mode.into()))?;
1970 }
1971 Ok(())
1972 }
1973
1974 pub fn inforom_image_version(self) -> Result<String> {
1975 device_string_query(self, 16, sys::nvmlDeviceGetInforomImageVersion)
1976 }
1977
1978 /// Returns the version information for the device's infoROM object.
1979 ///
1980 /// For all products with an inforom.
1981 ///
1982 /// Fermi and higher parts have non-volatile on-board memory for persisting device info, such as aggregate ECC counts.
1983 /// The version of the data structures in this memory may change from time to time.
1984 /// It does not exceed 16 bytes including the terminating NUL byte.
1985 /// This wrapper allocates the required NVML buffer internally.
1986 ///
1987 /// See [`InforomObject`] for details on the available infoROM objects.
1988 ///
1989 /// # Errors
1990 ///
1991 /// Returns an error if the device is inaccessible, if the internal infoROM
1992 /// version buffer is too small, if NVML rejects the output, if the device
1993 /// does not have an infoROM, if NVML has not been initialized, or if NVML
1994 /// reports an unexpected failure.
1995 pub fn inforom_version(self, object: InforomObject) -> Result<String> {
1996 let mut buffer = vec![0i8; sys::NVML_DEVICE_INFOROM_VERSION_BUFFER_SIZE as usize];
1997 unsafe {
1998 try_ffi!(sys::nvmlDeviceGetInforomVersion(
1999 self.as_raw(),
2000 object.into(),
2001 buffer.as_mut_ptr(),
2002 buffer.len() as u32,
2003 ))?;
2004 }
2005 Ok(string_from_c_chars(&buffer))
2006 }
2007
2008 /// Returns the checksum of the configuration stored in the device's infoROM.
2009 ///
2010 /// For all products with an inforom.
2011 ///
2012 /// Can be used to make sure that two GPUs have the exact same configuration.
2013 /// Current checksum takes into account configuration stored in PWR and ECC infoROM objects.
2014 /// The checksum can change between driver releases or when configuration changes, such as disabling or enabling ECC.
2015 ///
2016 /// # Errors
2017 ///
2018 /// Returns an error if the infoROM checksum cannot be read because the
2019 /// infoROM is corrupted, if the device is inaccessible, if NVML rejects the
2020 /// output, if the device does not support checksums, if NVML has not been
2021 /// initialized, or if NVML reports an unexpected failure.
2022 pub fn inforom_configuration_checksum(self) -> Result<u32> {
2023 let mut checksum = 0;
2024 unsafe {
2025 try_ffi!(sys::nvmlDeviceGetInforomConfigurationChecksum(
2026 self.0,
2027 &raw mut checksum,
2028 ))?;
2029 }
2030 Ok(checksum)
2031 }
2032
2033 /// Reads the infoROM from the flash and verifies the checksums.
2034 ///
2035 /// For all products with an inforom.
2036 ///
2037 /// # Errors
2038 ///
2039 /// Returns an error if the infoROM is corrupted, if the device is
2040 /// inaccessible, if the device does not support infoROM validation, if NVML
2041 /// has not been initialized, or if NVML reports an unexpected failure.
2042 pub fn validate_inforom(self) -> Result<()> {
2043 unsafe { try_ffi!(sys::nvmlDeviceValidateInforom(self.0)) }
2044 }
2045
2046 /// Returns the timestamp and the duration of the last flush of the BBX (blackbox) infoROM object during the current run.
2047 ///
2048 /// For all products with an inforom.
2049 ///
2050 /// # Errors
2051 ///
2052 /// Returns an error if the device is inaccessible, if the BBX object has not
2053 /// been flushed yet, if the device does not have an infoROM, or if NVML
2054 /// reports an unexpected failure.
2055 pub fn last_bbx_flush_time(self) -> Result<BbxFlushTime> {
2056 let mut timestamp = 0;
2057 let mut duration_us = 0;
2058 unsafe {
2059 try_ffi!(sys::nvmlDeviceGetLastBBXFlushTime(
2060 self.0,
2061 &raw mut timestamp,
2062 &raw mut duration_us,
2063 ))?;
2064 }
2065 Ok(BbxFlushTime {
2066 timestamp,
2067 duration_us,
2068 })
2069 }
2070
2071 /// Returns the device's Adaptive Clock status.
2072 ///
2073 /// # Errors
2074 ///
2075 /// Returns an error if the device is inaccessible, if NVML rejects the
2076 /// handle or output, if the device does not support adaptive-clock status,
2077 /// or if NVML has not been initialized.
2078 pub fn adaptive_clock_info_status(self) -> Result<AdaptiveClockInfoStatus> {
2079 let mut status = 0;
2080 unsafe {
2081 try_ffi!(sys::nvmlDeviceGetAdaptiveClockInfoStatus(
2082 self.0,
2083 &raw mut status
2084 ))?;
2085 }
2086 try_from_nvml_enum("adaptive clock info status", status)
2087 }
2088
2089 /// Returns the frequency monitor fault status for the device.
2090 ///
2091 /// For Ampere or newer fully supported devices.
2092 /// Requires root privileges.
2093 ///
2094 /// Returns the decoded frequency-monitor fault status reported by NVML.
2095 ///
2096 /// # Errors
2097 ///
2098 /// Returns an error if the device is inaccessible, if NVML rejects the
2099 /// handle or output, if the device does not support frequency-monitor
2100 /// status, if NVML has not been initialized, or if NVML reports an
2101 /// unexpected failure.
2102 pub fn clk_mon_status(self) -> Result<ClkMonStatus> {
2103 unsafe {
2104 let mut status = MaybeUninit::<sys::nvmlClkMonStatus_t>::uninit();
2105 try_ffi!(sys::nvmlDeviceGetClkMonStatus(self.0, status.as_mut_ptr()))?;
2106 Ok(status.assume_init().into())
2107 }
2108 }
2109
2110 /// Returns SRAM ECC error status of this device.
2111 ///
2112 /// For Ampere or newer fully supported devices.
2113 /// Requires root/admin permissions.
2114 ///
2115 /// Returns the SRAM ECC error status reported by NVML.
2116 ///
2117 /// # Errors
2118 ///
2119 /// Returns an error if the installed NVML version does not support the
2120 /// request layout, if the device is inaccessible, if NVML rejects the handle
2121 /// or output, if the device does not support SRAM ECC status, if NVML has
2122 /// not been initialized, or if NVML reports an unexpected failure.
2123 pub fn sram_ecc_error_status(self) -> Result<EccSramErrorStatus> {
2124 let mut status = sys::nvmlEccSramErrorStatus_t {
2125 version: struct_version::<sys::nvmlEccSramErrorStatus_t>(1),
2126 ..Default::default()
2127 };
2128 unsafe {
2129 try_ffi!(sys::nvmlDeviceGetSramEccErrorStatus(
2130 self.0,
2131 &raw mut status,
2132 ))?;
2133 }
2134 Ok(status.into())
2135 }
2136
2137 pub fn sram_unique_uncorrected_ecc_error_counts(
2138 self,
2139 ) -> Result<EccSramUniqueUncorrectedErrorCounts> {
2140 let mut counts = sys::nvmlEccSramUniqueUncorrectedErrorCounts_t {
2141 version: struct_version::<sys::nvmlEccSramUniqueUncorrectedErrorCounts_t>(1),
2142 ..Default::default()
2143 };
2144
2145 unsafe {
2146 try_ffi!(sys::nvmlDeviceGetSramUniqueUncorrectedEccErrorCounts(
2147 self.0,
2148 &raw mut counts,
2149 ))?;
2150
2151 let entries = if counts.entryCount == 0 || counts.entries.is_null() {
2152 Vec::new()
2153 } else {
2154 std::slice::from_raw_parts(counts.entries, counts.entryCount as usize)
2155 .iter()
2156 .copied()
2157 .map(Into::into)
2158 .collect()
2159 };
2160
2161 Ok(EccSramUniqueUncorrectedErrorCounts { entries })
2162 }
2163 }
2164
2165 /// Returns the device's interrupt number.
2166 ///
2167 /// # Errors
2168 ///
2169 /// Returns an error if the device is inaccessible, if NVML rejects the
2170 /// handle or output, if the device does not support IRQ-number reporting, or
2171 /// if NVML has not been initialized.
2172 pub fn irq_number(self) -> Result<u32> {
2173 let mut irq = 0;
2174 unsafe {
2175 try_ffi!(sys::nvmlDeviceGetIrqNum(self.0, &raw mut irq))?;
2176 }
2177 Ok(irq)
2178 }
2179
2180 /// Returns the device's core count.
2181 ///
2182 /// On MIG-enabled GPUs, querying the device's core count is currently not supported by this operation.
2183 /// Use [`sys::nvmlDeviceGetGpuInstanceProfileInfo`] to fetch the MIG device's core count.
2184 ///
2185 /// # Errors
2186 ///
2187 /// Returns an error if the device is inaccessible, if NVML rejects the
2188 /// handle or output, if the device or MIG device does not support core-count
2189 /// reporting, or if NVML has not been initialized.
2190 pub fn num_gpu_cores(self) -> Result<u32> {
2191 let mut cores = 0;
2192 unsafe {
2193 try_ffi!(sys::nvmlDeviceGetNumGpuCores(self.0, &raw mut cores))?;
2194 }
2195 Ok(cores)
2196 }
2197
2198 /// Returns the device's memory bus width.
2199 ///
2200 /// # Errors
2201 ///
2202 /// Returns an error if the device is inaccessible, if NVML rejects the
2203 /// handle or output, if the device does not support memory-bus-width
2204 /// reporting, or if NVML has not been initialized.
2205 pub fn memory_bus_width(self) -> Result<u32> {
2206 let mut width = 0;
2207 unsafe {
2208 try_ffi!(sys::nvmlDeviceGetMemoryBusWidth(self.0, &raw mut width))?;
2209 }
2210 Ok(width)
2211 }
2212
2213 /// Returns the GPU bus type, such as PCIe or PCI.
2214 ///
2215 /// # Errors
2216 ///
2217 /// Returns an error if NVML has not been initialized, if NVML rejects the
2218 /// handle or output, or if NVML reports an unexpected failure.
2219 pub fn bus_type(self) -> Result<BusType> {
2220 let mut bus_type = 0;
2221 unsafe {
2222 try_ffi!(sys::nvmlDeviceGetBusType(self.0, &raw mut bus_type))?;
2223 }
2224 Ok(BusType::from_raw(bus_type))
2225 }
2226
2227 /// Returns the device power source.
2228 ///
2229 /// # Errors
2230 ///
2231 /// Returns an error if the device is inaccessible, if NVML rejects the
2232 /// handle or output, if the device does not support power-source reporting,
2233 /// or if NVML has not been initialized.
2234 pub fn power_source(self) -> Result<PowerSource> {
2235 let mut power_source = 0;
2236 unsafe {
2237 try_ffi!(sys::nvmlDeviceGetPowerSource(self.0, &raw mut power_source))?;
2238 }
2239 Ok(PowerSource::from_raw(power_source))
2240 }
2241
2242 /// Returns the total ECC error counts for the device.
2243 ///
2244 /// For Fermi or newer fully supported devices.
2245 /// Only applicable to devices with ECC.
2246 /// Requires [`InforomObject::Ecc`] version 1.0 or higher.
2247 /// Requires ECC Mode to be enabled.
2248 ///
2249 /// The total error count is the sum of errors across each separate memory system, that is, the total set of errors across the entire device.
2250 ///
2251 /// See [`MemoryErrorType`] for a description of available error types.
2252 /// See [`EccCounterType`] for a description of available counter types.
2253 ///
2254 /// # Errors
2255 ///
2256 /// Returns an error if the device is inaccessible, if NVML rejects the
2257 /// handle, error type, counter type, or output, if the device does not
2258 /// support ECC error reporting, if NVML has not been initialized, or if NVML
2259 /// reports an unexpected failure.
2260 pub fn total_ecc_errors(
2261 self,
2262 error_type: MemoryErrorType,
2263 counter_type: EccCounterType,
2264 ) -> Result<u64> {
2265 let mut count = 0;
2266 unsafe {
2267 try_ffi!(sys::nvmlDeviceGetTotalEccErrors(
2268 self.0,
2269 error_type.into(),
2270 counter_type.into(),
2271 &raw mut count,
2272 ))?;
2273 }
2274 Ok(count)
2275 }
2276
2277 pub fn detailed_ecc_errors(
2278 self,
2279 error_type: MemoryErrorType,
2280 counter_type: EccCounterType,
2281 ) -> Result<EccErrorCounts> {
2282 Ok(EccErrorCounts {
2283 l1_cache: self.memory_error_counter(
2284 error_type,
2285 counter_type,
2286 MemoryLocation::L1Cache,
2287 )?,
2288 l2_cache: self.memory_error_counter(
2289 error_type,
2290 counter_type,
2291 MemoryLocation::L2Cache,
2292 )?,
2293 device_memory: self.memory_error_counter(
2294 error_type,
2295 counter_type,
2296 MemoryLocation::Dram,
2297 )?,
2298 register_file: self.memory_error_counter(
2299 error_type,
2300 counter_type,
2301 MemoryLocation::RegisterFile,
2302 )?,
2303 })
2304 }
2305
2306 /// Returns the requested memory error counter for the device.
2307 ///
2308 /// For Fermi or newer fully supported devices.
2309 /// Requires [`InforomObject::Ecc`] version 2.0 or higher to report aggregate location-based memory error counts.
2310 /// Requires [`InforomObject::Ecc`] version 1.0 or higher to report all other memory error counts.
2311 ///
2312 /// Only applicable to devices with ECC.
2313 ///
2314 /// Requires ECC Mode to be enabled.
2315 ///
2316 /// On MIG-enabled GPUs, per instance information can be queried using specific MIG device handles.
2317 /// Per instance information is currently only supported for non-DRAM uncorrectable volatile errors.
2318 /// Querying volatile errors using device handles is currently not supported.
2319 ///
2320 /// See [`MemoryErrorType`] for a description of available memory error types.
2321 /// See [`EccCounterType`] for a description of available counter types.
2322 /// See [`MemoryLocation`] for a description of available counter locations.
2323 ///
2324 /// # Errors
2325 ///
2326 /// Returns an error if the device is inaccessible, if NVML rejects the
2327 /// handle, error type, counter type, memory location, or output, if the
2328 /// device does not support ECC error reporting for the requested memory
2329 /// location, if NVML has not been initialized, or if NVML reports an
2330 /// unexpected failure.
2331 pub fn memory_error_counter(
2332 self,
2333 error_type: MemoryErrorType,
2334 counter_type: EccCounterType,
2335 location: MemoryLocation,
2336 ) -> Result<u64> {
2337 let mut count = 0;
2338 unsafe {
2339 try_ffi!(sys::nvmlDeviceGetMemoryErrorCounter(
2340 self.0,
2341 error_type.into(),
2342 counter_type.into(),
2343 location.into(),
2344 &raw mut count,
2345 ))?;
2346 }
2347 Ok(count)
2348 }
2349
2350 pub fn violation_status(self, perf_policy: PerfPolicyType) -> Result<ViolationTime> {
2351 let sample = self.single_field_value(field_id_for_perf_policy(perf_policy), 0)?;
2352 Ok(ViolationTime {
2353 reference_time: u64::try_from(sample.timestamp).unwrap_or_default(),
2354 violation_time: field_sample_as_u64(sample)?,
2355 })
2356 }
2357
2358 /// Returns the list of retired pages by source, including pages that are pending retirement.
2359 /// The returned address information is the hardware address of the retired page.
2360 /// This does not match the virtual address used in CUDA, but it matches the
2361 /// address information in Xid 63.
2362 ///
2363 /// [`Device::retired_pages`] adds an additional timestamps parameter to return the time of each page's retirement.
2364 /// This is supported for Pascal and newer architecture.
2365 ///
2366 /// For Kepler or newer fully supported devices.
2367 ///
2368 /// # Errors
2369 ///
2370 /// Returns an error if the device is inaccessible, if the retired-page list
2371 /// changes while the wrapper is fetching it, if NVML rejects the handle,
2372 /// cause, count, or outputs, if the device does not support page retirement,
2373 /// if NVML has not been initialized, or if NVML reports an unexpected
2374 /// failure.
2375 pub fn retired_pages(self, cause: PageRetirementCause) -> Result<Vec<RetiredPage>> {
2376 let mut count = 0;
2377 let status = unsafe {
2378 sys::nvmlDeviceGetRetiredPages_v2(
2379 self.0,
2380 cause.into(),
2381 &raw mut count,
2382 ptr::null_mut(),
2383 ptr::null_mut(),
2384 )
2385 };
2386 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
2387 return Ok(Vec::new());
2388 }
2389 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
2390 return Err(status.into());
2391 }
2392
2393 let mut addresses = vec![0u64; count as usize];
2394 let mut timestamps = vec![0u64; count as usize];
2395 unsafe {
2396 try_ffi!(sys::nvmlDeviceGetRetiredPages_v2(
2397 self.0,
2398 cause.into(),
2399 &raw mut count,
2400 addresses.as_mut_ptr(),
2401 timestamps.as_mut_ptr(),
2402 ))?;
2403 }
2404 addresses.truncate(count as usize);
2405 timestamps.truncate(count as usize);
2406 Ok(addresses
2407 .into_iter()
2408 .zip(timestamps)
2409 .map(|(address, timestamp)| RetiredPage {
2410 address,
2411 timestamp: Some(timestamp),
2412 })
2413 .collect())
2414 }
2415
2416 /// Check if any pages are pending retirement and need a reboot to fully retire.
2417 ///
2418 /// For Kepler or newer fully supported devices.
2419 ///
2420 /// # Errors
2421 ///
2422 /// Returns an error if the device is inaccessible, if NVML rejects the
2423 /// handle or output, if the device does not support page-retirement status,
2424 /// if NVML has not been initialized, or if NVML reports an unexpected
2425 /// failure.
2426 pub fn retired_pages_pending_status(self) -> Result<EnableState> {
2427 let mut pending = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
2428 unsafe {
2429 try_ffi!(sys::nvmlDeviceGetRetiredPagesPendingStatus(
2430 self.0,
2431 &raw mut pending,
2432 ))?;
2433 }
2434 Ok(pending.into())
2435 }
2436
2437 /// Returns the number of remapped rows.
2438 /// The number of rows reported is based on the remapping cause.
2439 /// `is_pending` indicates whether there are pending remappings.
2440 /// A reset is required to actually remap the row.
2441 /// `failure_occurred` is set if a row remapping ever failed in the past.
2442 /// A pending remapping does not affect future GPU work because error
2443 /// containment and dynamic page blacklisting handle it.
2444 ///
2445 /// On MIG-enabled GPUs with active instances, querying the number of remapped rows is not supported.
2446 ///
2447 /// For Ampere or newer fully supported devices.
2448 ///
2449 /// # Errors
2450 ///
2451 /// Returns an error if NVML rejects the handle or outputs, if MIG is enabled
2452 /// or the device does not support remapped-row reporting, or if NVML reports
2453 /// an unexpected failure.
2454 pub fn remapped_rows(self) -> Result<RemappedRows> {
2455 let mut corrected = 0;
2456 let mut uncorrected = 0;
2457 let mut pending = 0;
2458 let mut failure_occurred = 0;
2459 unsafe {
2460 try_ffi!(sys::nvmlDeviceGetRemappedRows(
2461 self.0,
2462 &raw mut corrected,
2463 &raw mut uncorrected,
2464 &raw mut pending,
2465 &raw mut failure_occurred,
2466 ))?;
2467 }
2468 Ok(RemappedRows {
2469 corrected,
2470 uncorrected,
2471 pending: pending != 0,
2472 failure_occurred: failure_occurred != 0,
2473 })
2474 }
2475
2476 /// Returns the row remapper histogram.
2477 /// Returns the remap availability for each bank on the GPU.
2478 ///
2479 /// # Errors
2480 ///
2481 /// Returns an error if NVML reports an unexpected row-remapper query
2482 /// failure.
2483 pub fn row_remapper_histogram(self) -> Result<RowRemapperHistogram> {
2484 unsafe {
2485 let mut histogram = MaybeUninit::<sys::nvmlRowRemapperHistogramValues_t>::uninit();
2486 try_ffi!(sys::nvmlDeviceGetRowRemapperHistogram(
2487 self.0,
2488 histogram.as_mut_ptr(),
2489 ))?;
2490 Ok(histogram.assume_init().into())
2491 }
2492 }
2493
2494 /// Requests values for a list of device fields.
2495 /// Allows multiple fields to be queried at once.
2496 /// If multiple field IDs are populated by the same driver call, NVML
2497 /// populates those results from one call rather than one call per field ID.
2498 ///
2499 /// # Errors
2500 ///
2501 /// Returns an error if NVML rejects the device handle or field-value buffer.
2502 pub fn field_values(self, queries: &[FieldQuery]) -> Result<Vec<FieldSample>> {
2503 let mut values = queries
2504 .iter()
2505 .map(|query| sys::nvmlFieldValue_t {
2506 fieldId: query.field.0,
2507 scopeId: query.scope_id,
2508 ..Default::default()
2509 })
2510 .collect::<Vec<_>>();
2511 unsafe {
2512 try_ffi!(sys::nvmlDeviceGetFieldValues(
2513 self.0,
2514 values.len() as i32,
2515 values.as_mut_ptr(),
2516 ))?;
2517 }
2518 Ok(values.into_iter().map(FieldSample::from_raw).collect())
2519 }
2520
2521 fn single_field_value(self, field: FieldId, scope_id: u32) -> Result<FieldSample> {
2522 self.field_values(&[FieldQuery { field, scope_id }])?
2523 .into_iter()
2524 .next()
2525 .ok_or(Error::EmptyOutput {
2526 name: "nvmlDeviceGetFieldValues".into(),
2527 })
2528 }
2529
2530 /// Clear values for a list of fields for a device.
2531 /// Allows multiple fields to be cleared at once.
2532 ///
2533 /// # Errors
2534 ///
2535 /// Returns an error if NVML rejects the device handle or field-value buffer.
2536 pub fn clear_field_values(self, queries: &[FieldQuery]) -> Result<Vec<FieldSample>> {
2537 let mut values = queries
2538 .iter()
2539 .map(|query| sys::nvmlFieldValue_t {
2540 fieldId: query.field.0,
2541 scopeId: query.scope_id,
2542 ..Default::default()
2543 })
2544 .collect::<Vec<_>>();
2545 unsafe {
2546 try_ffi!(sys::nvmlDeviceClearFieldValues(
2547 self.0,
2548 values.len() as i32,
2549 values.as_mut_ptr(),
2550 ))?;
2551 }
2552 Ok(values.into_iter().map(FieldSample::from_raw).collect())
2553 }
2554
2555 /// Returns recent samples for the GPU.
2556 ///
2557 /// For Kepler or newer fully supported devices.
2558 ///
2559 /// Fetches power, utilization, or clock samples maintained in the driver's buffer, depending on the requested sample type.
2560 ///
2561 /// Power, utilization, and clock samples are returned as unsigned integer values.
2562 ///
2563 /// This wrapper performs the size query internally and returns the samples as an owned collection.
2564 ///
2565 /// `last_seen_timestamp` represents a CPU timestamp in microseconds.
2566 /// Use `0` to fetch all samples maintained by the buffer.
2567 /// Use a timestamp returned by a previous query to get more recent samples.
2568 ///
2569 /// This wrapper performs the NVML size query internally and returns the samples that were actually retrieved.
2570 /// Compared with polling the current-value methods, samples provide higher-frequency data at lower polling cost.
2571 ///
2572 /// On MIG-enabled GPUs, querying the following sample types, [`SamplingType::GpuUtilization`], [`SamplingType::MemoryUtilization`], [`SamplingType::EncoderUtilization`], and [`SamplingType::DecoderUtilization`], is not currently supported.
2573 ///
2574 /// # Errors
2575 ///
2576 /// Returns an error if the device is inaccessible, if NVML rejects the
2577 /// query, if the device does not support the requested sample type, if NVML
2578 /// has not been initialized, or if NVML reports an unexpected failure.
2579 /// Missing sample entries are returned as an empty sample list.
2580 pub fn samples(self, kind: SamplingType, last_seen_timestamp: u64) -> Result<Samples> {
2581 let mut value_type = sys::nvmlValueType_t::NVML_VALUE_TYPE_UNSIGNED_INT;
2582 let mut count = 0;
2583 let status = unsafe {
2584 sys::nvmlDeviceGetSamples(
2585 self.0,
2586 kind.into(),
2587 last_seen_timestamp,
2588 &raw mut value_type,
2589 &raw mut count,
2590 ptr::null_mut(),
2591 )
2592 };
2593 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
2594 return Ok(Samples {
2595 value_type: try_from_nvml_enum("field value type", u32::from(value_type))?,
2596 samples: Vec::new(),
2597 });
2598 }
2599 if status == sys::nvmlReturn_t::NVML_ERROR_NOT_FOUND {
2600 return Ok(Samples {
2601 value_type: try_from_nvml_enum("field value type", u32::from(value_type))?,
2602 samples: Vec::new(),
2603 });
2604 }
2605 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
2606 return Err(status.into());
2607 }
2608
2609 let mut samples = vec![sys::nvmlSample_t::default(); count as usize];
2610 unsafe {
2611 try_ffi!(sys::nvmlDeviceGetSamples(
2612 self.0,
2613 kind.into(),
2614 last_seen_timestamp,
2615 &raw mut value_type,
2616 &raw mut count,
2617 samples.as_mut_ptr(),
2618 ))?;
2619 }
2620 samples.truncate(count as usize);
2621 Ok(Samples {
2622 value_type: try_from_nvml_enum("field value type", u32::from(value_type))?,
2623 samples: samples
2624 .into_iter()
2625 .map(|sample| Sample::from_raw(value_type, sample))
2626 .collect::<Result<Vec<_>>>()?,
2627 })
2628 }
2629
2630 /// Returns GSP firmware version.
2631 ///
2632 /// This wrapper allocates the firmware-version buffer internally and returns it as a [`String`].
2633 ///
2634 /// # Errors
2635 ///
2636 /// Returns an error if NVML rejects the device or output buffer, if GSP
2637 /// firmware is not enabled for the GPU, or if NVML reports an unexpected
2638 /// failure.
2639 pub fn gsp_firmware_version(self) -> Result<String> {
2640 let mut buffer = vec![0i8; sys::NVML_GSP_FIRMWARE_VERSION_BUF_SIZE as usize];
2641 unsafe {
2642 try_ffi!(sys::nvmlDeviceGetGspFirmwareVersion(
2643 self.0,
2644 buffer.as_mut_ptr()
2645 ))?;
2646 }
2647 Ok(string_from_c_chars(&buffer))
2648 }
2649
2650 /// Returns GSP firmware mode.
2651 ///
2652 /// Returns GSP firmware enablement and default mode information.
2653 ///
2654 /// # Errors
2655 ///
2656 /// Returns an error if NVML rejects the query, if GSP firmware is not
2657 /// enabled for the GPU, or if NVML reports an unexpected failure.
2658 pub fn gsp_firmware_mode(self) -> Result<GspFirmwareMode> {
2659 let mut enabled = 0;
2660 let mut default_mode = 0;
2661 unsafe {
2662 try_ffi!(sys::nvmlDeviceGetGspFirmwareMode(
2663 self.0,
2664 &raw mut enabled,
2665 &raw mut default_mode,
2666 ))?;
2667 }
2668 Ok(GspFirmwareMode {
2669 enabled: enabled != 0,
2670 default_mode: default_mode != 0,
2671 })
2672 }
2673
2674 /// Returns the number of fans on the device.
2675 ///
2676 /// For all discrete products with dedicated fans.
2677 ///
2678 /// # Errors
2679 ///
2680 /// Returns an error if the device is inaccessible, if NVML rejects the
2681 /// handle or output, if the device does not have a fan, if NVML has not been
2682 /// initialized, or if NVML reports an unexpected failure.
2683 pub fn num_fans(self) -> Result<u32> {
2684 let mut fans = 0;
2685 unsafe {
2686 try_ffi!(sys::nvmlDeviceGetNumFans(self.0, &raw mut fans))?;
2687 }
2688 Ok(fans)
2689 }
2690
2691 /// Returns the intended operating speed of the device's fan.
2692 ///
2693 /// The reported speed is the intended fan speed.
2694 /// If the fan is physically blocked and unable to spin, the output does not
2695 /// match the actual fan speed.
2696 ///
2697 /// For all discrete products with dedicated fans.
2698 ///
2699 /// The fan speed is expressed as a percentage of the product's maximum noise tolerance fan speed.
2700 /// This value may exceed 100% in certain cases.
2701 ///
2702 /// # Errors
2703 ///
2704 /// Returns an error if the device is inaccessible, if NVML rejects the
2705 /// handle or output, if the device does not have a fan, if NVML has not been
2706 /// initialized, or if NVML reports an unexpected failure.
2707 pub fn default_fan_speed(self) -> Result<u32> {
2708 let mut speed = 0;
2709 unsafe {
2710 try_ffi!(sys::nvmlDeviceGetFanSpeed(self.0, &raw mut speed))?;
2711 }
2712 Ok(speed)
2713 }
2714
2715 /// Returns the intended operating speed of the device's specified fan.
2716 ///
2717 /// The reported speed is the intended fan speed.
2718 /// If the fan is physically blocked and unable to spin, the output does not
2719 /// match the actual fan speed.
2720 ///
2721 /// For all discrete products with dedicated fans.
2722 ///
2723 /// The fan speed is expressed as a percentage of the product's maximum noise tolerance fan speed.
2724 /// This value may exceed 100% in certain cases.
2725 ///
2726 /// # Errors
2727 ///
2728 /// Returns an error if the device is inaccessible, if NVML rejects the
2729 /// handle, fan index, or output, if the device does not have a supported fan
2730 /// interface, if NVML has not been initialized, or if NVML reports an
2731 /// unexpected failure.
2732 pub fn fan_speed(self, fan: u32) -> Result<u32> {
2733 let mut speed = 0;
2734 unsafe {
2735 try_ffi!(sys::nvmlDeviceGetFanSpeed_v2(self.0, fan, &raw mut speed))?;
2736 }
2737 Ok(speed)
2738 }
2739
2740 /// Returns the intended operating speed in rotations per minute (RPM) of the device's specified fan.
2741 ///
2742 /// For Maxwell or newer fully supported devices.
2743 ///
2744 /// For all discrete products with dedicated fans.
2745 ///
2746 /// The reported speed is the intended fan speed.
2747 /// If the fan is physically blocked and unable to spin, the output does not
2748 /// match the actual fan speed.
2749 ///
2750 /// # Errors
2751 ///
2752 /// Returns an error if the installed NVML version does not support the
2753 /// request layout, if NVML rejects the handle, fan index, or output, if the
2754 /// device does not support fan RPM reporting, or if NVML has not been
2755 /// initialized.
2756 pub fn fan_speed_rpm(self, fan: u32) -> Result<FanSpeedInfo> {
2757 let mut info = sys::nvmlFanSpeedInfo_t {
2758 version: struct_version::<sys::nvmlFanSpeedInfo_t>(1),
2759 fan,
2760 ..Default::default()
2761 };
2762 unsafe {
2763 try_ffi!(sys::nvmlDeviceGetFanSpeedRPM(self.0, &raw mut info))?;
2764 }
2765 Ok(FanSpeedInfo {
2766 fan: info.fan,
2767 speed: info.speed,
2768 })
2769 }
2770
2771 /// Returns current fan control policy.
2772 ///
2773 /// For Maxwell or newer fully supported devices.
2774 ///
2775 /// For all cuda-capable discrete products with fans.
2776 ///
2777 /// # Errors
2778 ///
2779 /// Returns an error if NVML rejects the handle, fan index, or output, if
2780 /// the device does not support fan policies, if NVML has not been
2781 /// initialized, or if NVML reports an unexpected failure.
2782 pub fn fan_control_policy(self, fan: u32) -> Result<FanPolicy> {
2783 let mut policy = 0;
2784 unsafe {
2785 try_ffi!(sys::nvmlDeviceGetFanControlPolicy_v2(
2786 self.0,
2787 fan,
2788 &raw mut policy
2789 ))?;
2790 }
2791 try_from_nvml_enum("fan policy", policy)
2792 }
2793
2794 /// Sets the speed of the fan control policy to default.
2795 ///
2796 /// For all cuda-capable discrete products with fans.
2797 ///
2798 /// # Errors
2799 ///
2800 /// Returns an error if NVML rejects the handle or fan index, if the device
2801 /// does not support fan-speed control, if NVML has not been initialized, or
2802 /// if NVML reports an unexpected failure.
2803 pub fn set_default_fan_speed(self, fan: u32) -> Result<()> {
2804 unsafe {
2805 try_ffi!(sys::nvmlDeviceSetDefaultFanSpeed_v2(self.0, fan))?;
2806 }
2807 Ok(())
2808 }
2809
2810 /// Sets current fan control policy.
2811 ///
2812 /// For Maxwell or newer fully supported devices.
2813 ///
2814 /// Requires privileged access.
2815 ///
2816 /// For all cuda-capable discrete products with fans.
2817 ///
2818 /// # Errors
2819 ///
2820 /// Returns an error if NVML rejects the handle, fan index, or policy, if the
2821 /// device does not support fan policies, if NVML has not been initialized,
2822 /// or if NVML reports an unexpected failure.
2823 pub fn set_fan_control_policy(self, fan: u32, policy: FanPolicy) -> Result<()> {
2824 unsafe {
2825 try_ffi!(sys::nvmlDeviceSetFanControlPolicy(
2826 self.0,
2827 fan,
2828 policy.into()
2829 ))?;
2830 }
2831 Ok(())
2832 }
2833
2834 /// Sets the speed of a specified fan.
2835 ///
2836 /// Warning: this changes the fan control policy to manual.
2837 /// You must monitor the temperature and adjust the fan speed accordingly.
2838 /// Setting the fan speed too low can damage the GPU.
2839 /// Use [`Device::set_default_fan_speed`] to restore default control policy.
2840 ///
2841 /// For all cuda-capable discrete products with fans that are Maxwell or Newer.
2842 ///
2843 /// # Errors
2844 ///
2845 /// Returns an error if NVML has not been initialized, if NVML rejects the
2846 /// handle, fan index, or speed, if the device does not support manual fan
2847 /// speed control, or if NVML reports an unexpected failure.
2848 pub fn set_fan_speed(self, fan: u32, speed: u32) -> Result<()> {
2849 unsafe {
2850 try_ffi!(sys::nvmlDeviceSetFanSpeed_v2(self.0, fan, speed))?;
2851 }
2852 Ok(())
2853 }
2854
2855 /// Returns the intended target speed of the device's specified fan.
2856 ///
2857 /// Normally, the driver dynamically adjusts the fan based on the needs of the GPU.
2858 /// When the caller sets fan speed with [`Device::set_fan_speed`], the driver attempts to make the fan achieve that setting.
2859 /// The actual current speed of the fan is reported in [`Device::fan_speed`].
2860 ///
2861 /// For all discrete products with dedicated fans.
2862 ///
2863 /// The fan speed is expressed as a percentage of the product's maximum noise tolerance fan speed.
2864 /// This value may exceed 100% in certain cases.
2865 ///
2866 /// # Errors
2867 ///
2868 /// Returns an error if the device is inaccessible, if NVML rejects the
2869 /// handle, fan index, or output, if the device does not have a supported fan
2870 /// interface, if NVML has not been initialized, or if NVML reports an
2871 /// unexpected failure.
2872 pub fn target_fan_speed(self, fan: u32) -> Result<u32> {
2873 let mut speed = 0;
2874 unsafe {
2875 try_ffi!(sys::nvmlDeviceGetTargetFanSpeed(
2876 self.0,
2877 fan,
2878 &raw mut speed
2879 ))?;
2880 }
2881 Ok(speed)
2882 }
2883
2884 /// Returns the min and max fan speed that can be set for the GPU fan.
2885 ///
2886 /// For all cuda-capable discrete products with fans.
2887 ///
2888 /// # Errors
2889 ///
2890 /// Returns an error if NVML rejects the handle or output, if the device does
2891 /// not support fan-speed range reporting, if NVML has not been initialized,
2892 /// or if NVML reports an unexpected failure.
2893 pub fn min_max_fan_speed(self) -> Result<MinMaxFanSpeed> {
2894 let mut min = 0;
2895 let mut max = 0;
2896 unsafe {
2897 try_ffi!(sys::nvmlDeviceGetMinMaxFanSpeed(
2898 self.0,
2899 &raw mut min,
2900 &raw mut max,
2901 ))?;
2902 }
2903 Ok(MinMaxFanSpeed { min, max })
2904 }
2905
2906 /// Returns the cooler's information.
2907 /// Returns a cooler's control signal characteristics.
2908 /// The possible types are restricted, Variable and Toggle.
2909 /// See [`CoolerControl`](crate::types::CoolerControl) for details on available signal types.
2910 /// Returns objects that cooler cools.
2911 /// Targets may be GPU, Memory, Power Supply or All of these.
2912 /// See [`CoolerTarget`](crate::types::CoolerTarget) for details on available targets.
2913 ///
2914 /// For Maxwell or newer fully supported devices.
2915 ///
2916 /// For all discrete products with dedicated fans.
2917 ///
2918 /// # Errors
2919 ///
2920 /// Returns an error if the installed NVML version does not support the
2921 /// request layout, if NVML rejects the handle or cooler index, if the device
2922 /// does not support cooler reporting, or if NVML has not been initialized.
2923 pub fn cooler_info(self, index: u32) -> Result<CoolerInfo> {
2924 let mut info = sys::nvmlCoolerInfo_t {
2925 version: struct_version::<sys::nvmlCoolerInfo_t>(1),
2926 index,
2927 ..Default::default()
2928 };
2929 unsafe {
2930 try_ffi!(sys::nvmlDeviceGetCoolerInfo(self.0, &raw mut info))?;
2931 }
2932 Ok(CoolerInfo {
2933 index: info.index,
2934 signal_type: info.signalType.into(),
2935 target: info.target.into(),
2936 })
2937 }
2938
2939 /// Returns the current temperature readings (in degrees C) for the given device.
2940 ///
2941 /// For all products.
2942 ///
2943 /// # Errors
2944 ///
2945 /// Returns an error if the device is inaccessible, if NVML rejects the
2946 /// handle, sensor type, or output, if the device does not have the requested
2947 /// sensor, if NVML has not been initialized, or if NVML reports an
2948 /// unexpected failure.
2949 pub fn temperature(self, sensor: TemperatureSensor) -> Result<TemperatureInfo> {
2950 let mut temperature = sys::nvmlTemperature_t {
2951 version: struct_version::<sys::nvmlTemperature_t>(1),
2952 sensorType: sensor.into(),
2953 ..Default::default()
2954 };
2955 unsafe {
2956 try_ffi!(sys::nvmlDeviceGetTemperatureV(self.0, &raw mut temperature))?;
2957 }
2958 Ok(TemperatureInfo {
2959 sensor: temperature.sensorType.into(),
2960 temperature: temperature.temperature,
2961 })
2962 }
2963
2964 /// Returns minor number for the device.
2965 /// The minor number is the suffix in the Linux device node path
2966 /// `/dev/nvidia[minor_number]`.
2967 ///
2968 /// For all products.
2969 /// Supported only for Linux.
2970 ///
2971 /// # Errors
2972 ///
2973 /// Returns an error if the device is inaccessible, if NVML rejects the
2974 /// handle or output, if the device does not support Linux minor-number
2975 /// reporting, if NVML has not been initialized, or if NVML reports an
2976 /// unexpected failure.
2977 pub fn minor_number(self) -> Result<u32> {
2978 let mut minor_number = 0;
2979 unsafe {
2980 try_ffi!(sys::nvmlDeviceGetMinorNumber(self.0, &raw mut minor_number))?;
2981 }
2982 Ok(minor_number)
2983 }
2984
2985 /// Returns the current compute mode for the device.
2986 ///
2987 /// For all products.
2988 ///
2989 /// See [`ComputeMode`] for details on allowed compute modes.
2990 ///
2991 /// # Errors
2992 ///
2993 /// Returns an error if the device is inaccessible, if NVML rejects the
2994 /// handle or output, if the device does not support compute-mode reporting,
2995 /// if NVML has not been initialized, or if NVML reports an unexpected
2996 /// failure.
2997 pub fn compute_mode(self) -> Result<ComputeMode> {
2998 let mut compute_mode = sys::nvmlComputeMode_t::NVML_COMPUTEMODE_DEFAULT;
2999 unsafe {
3000 try_ffi!(sys::nvmlDeviceGetComputeMode(self.0, &raw mut compute_mode))?;
3001 }
3002 Ok(compute_mode.into())
3003 }
3004
3005 /// Returns the persistence mode associated with this device.
3006 ///
3007 /// For all products.
3008 /// For Linux only.
3009 ///
3010 /// When driver persistence mode is enabled the driver software state is not torn down when the last client disconnects.
3011 /// By default this feature is disabled.
3012 ///
3013 /// See [`EnableState`] for details on allowed modes.
3014 ///
3015 /// # Errors
3016 ///
3017 /// Returns an error if the device is inaccessible, if NVML rejects the
3018 /// handle or output, if the device does not support persistence mode, if
3019 /// NVML has not been initialized, or if NVML reports an unexpected failure.
3020 pub fn persistence_mode(self) -> Result<EnableState> {
3021 let mut mode = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
3022 unsafe {
3023 try_ffi!(sys::nvmlDeviceGetPersistenceMode(self.0, &raw mut mode))?;
3024 }
3025 Ok(mode.into())
3026 }
3027
3028 /// Sets the compute mode for the device.
3029 ///
3030 /// For all products.
3031 /// Requires root/admin permissions.
3032 ///
3033 /// The compute mode determines whether a GPU can be used for compute operations and whether it can be shared across contexts.
3034 ///
3035 /// This operation takes effect immediately.
3036 /// Under Linux it is not persistent across reboots and always resets to "Default".
3037 /// Under Windows it is persistent.
3038 ///
3039 /// Under Windows, compute mode may only be set to default when running in
3040 /// WDDM.
3041 ///
3042 /// On MIG-enabled GPUs, compute mode is set to default and changing it is
3043 /// not supported.
3044 ///
3045 /// See [`ComputeMode`] for details on available compute modes.
3046 ///
3047 /// # Errors
3048 ///
3049 /// Returns an error if the device is inaccessible, if NVML rejects the
3050 /// handle or mode, if the device does not support compute-mode changes, if
3051 /// the current process lacks permission, if NVML has not been initialized,
3052 /// or if NVML reports an unexpected failure.
3053 pub fn set_compute_mode(self, mode: ComputeMode) -> Result<()> {
3054 unsafe {
3055 try_ffi!(sys::nvmlDeviceSetComputeMode(self.0, mode.into()))?;
3056 }
3057 Ok(())
3058 }
3059
3060 /// Sets the persistence mode for the device.
3061 ///
3062 /// For all products.
3063 /// For Linux only.
3064 /// Requires root/admin permissions.
3065 ///
3066 /// The persistence mode determines whether the GPU driver software is torn down after the last client exits.
3067 ///
3068 /// This operation takes effect immediately.
3069 /// It is not persistent across reboots.
3070 /// After each reboot the persistence mode is reset to "Disabled".
3071 ///
3072 /// See [`EnableState`] for available modes.
3073 ///
3074 /// After disabling persistence mode on a device that has its own NUMA memory,
3075 /// the current device handle is no longer valid. Acquire a fresh handle from
3076 /// the library before continuing to interact with the device.
3077 /// This limitation is currently only applicable to devices that have a coherent NVLink connection to system memory.
3078 ///
3079 /// # Errors
3080 ///
3081 /// Returns an error if the device is inaccessible, if NVML rejects the
3082 /// handle or mode, if the device does not support persistence mode, if the
3083 /// current process lacks permission, if NVML has not been initialized, or if
3084 /// NVML reports an unexpected failure.
3085 pub fn set_persistence_mode(self, mode: EnableState) -> Result<()> {
3086 unsafe {
3087 try_ffi!(sys::nvmlDeviceSetPersistenceMode(self.0, mode.into()))?;
3088 }
3089 Ok(())
3090 }
3091
3092 /// Sets the ECC mode for the device.
3093 ///
3094 /// For Kepler or newer fully supported devices.
3095 /// Only applicable to devices with ECC.
3096 /// Requires [`InforomObject::Ecc`] version 1.0 or higher.
3097 /// Requires root/admin permissions.
3098 ///
3099 /// The ECC mode determines whether the GPU enables its ECC support.
3100 ///
3101 /// This operation takes effect after the next reboot.
3102 ///
3103 /// See [`EnableState`] for details on available modes.
3104 ///
3105 /// # Errors
3106 ///
3107 /// Returns an error if the device is inaccessible, if NVML rejects the
3108 /// handle or mode, if the device does not support ECC mode changes, if the
3109 /// current process lacks permission, if NVML has not been initialized, or if
3110 /// NVML reports an unexpected failure.
3111 pub fn set_ecc_mode(self, mode: EnableState) -> Result<()> {
3112 unsafe {
3113 try_ffi!(sys::nvmlDeviceSetEccMode(self.0, mode.into()))?;
3114 }
3115 Ok(())
3116 }
3117
3118 /// Clear the ECC error and other memory error counts for the device.
3119 ///
3120 /// For Kepler or newer fully supported devices.
3121 /// Only applicable to devices with ECC.
3122 /// Requires [`InforomObject::Ecc`] version 2.0 or higher to clear aggregate location-based ECC counts.
3123 /// Requires [`InforomObject::Ecc`] version 1.0 or higher to clear all other ECC counts.
3124 /// Requires root/admin permissions.
3125 /// Requires ECC Mode to be enabled.
3126 ///
3127 /// Sets all of the specified ECC counters to 0, including both detailed and total counts.
3128 ///
3129 /// This operation takes effect immediately.
3130 ///
3131 /// See [`MemoryErrorType`] for details on available counter types.
3132 ///
3133 /// # Errors
3134 ///
3135 /// Returns an error if the device is inaccessible, if NVML rejects the
3136 /// handle or counter type, if the device does not support clearing ECC
3137 /// counters, if the current process lacks permission, if NVML has not been
3138 /// initialized, or if NVML reports an unexpected failure.
3139 pub fn clear_ecc_error_counts(self, counter_type: EccCounterType) -> Result<()> {
3140 unsafe {
3141 try_ffi!(sys::nvmlDeviceClearEccErrorCounts(
3142 self.0,
3143 counter_type.into()
3144 ))?;
3145 }
3146 Ok(())
3147 }
3148
3149 /// Returns MIG mode for the device.
3150 ///
3151 /// For Ampere or newer fully supported devices.
3152 ///
3153 /// Changing MIG modes may require device unbind or reset.
3154 /// The "pending" MIG mode refers to the target mode following the next activation trigger.
3155 ///
3156 /// # Errors
3157 ///
3158 /// Returns an error if NVML rejects the handle or outputs, if the device
3159 /// does not support MIG mode, if NVML has not been initialized, or if NVML
3160 /// reports an unexpected failure.
3161 pub fn mig_mode(self) -> Result<CurrentPending<MigMode>> {
3162 let mut current = 0;
3163 let mut pending = 0;
3164 unsafe {
3165 try_ffi!(sys::nvmlDeviceGetMigMode(
3166 self.0,
3167 &raw mut current,
3168 &raw mut pending,
3169 ))?;
3170 }
3171 Ok(CurrentPending {
3172 current: try_from_nvml_enum("mig mode", current)?,
3173 pending: try_from_nvml_enum("mig mode", pending)?,
3174 })
3175 }
3176
3177 /// Sets MIG mode for the device.
3178 ///
3179 /// For Ampere or newer fully supported devices.
3180 /// Requires root privileges.
3181 ///
3182 /// This mode determines whether a GPU instance can be created.
3183 ///
3184 /// This may unbind or reset the device to activate the requested mode.
3185 /// Thus, the attributes associated with the device, such as minor number, might change.
3186 /// Query such attributes again after changing the mode.
3187 ///
3188 /// On certain platforms, such as pass-through virtualization, reset may not be exposed directly and a VM reboot is required.
3189 /// In those cases, the returned activation status is [`Status::ResetRequired`].
3190 ///
3191 /// The returned activation status contains the appropriate error code when activation is unsuccessful.
3192 /// For example, if device unbind fails because the device is not idle, the status is [`Status::InUse`].
3193 /// Idle the device and retry setting the mode in that case.
3194 ///
3195 /// On Windows, only disabling MIG mode is supported. `activation_status` returns [`Status::NotSupported`] because GPU reset is not supported on Windows through this operation.
3196 ///
3197 /// # Errors
3198 ///
3199 /// Returns an error if NVML rejects the handle, requested mode, or
3200 /// activation-status output, if the device does not support MIG mode, if the
3201 /// current process lacks permission, or if NVML has not been initialized.
3202 pub fn set_mig_mode(self, mode: MigMode) -> Result<MigModeActivation> {
3203 let mut activation_status = sys::nvmlReturn_t::NVML_SUCCESS;
3204 unsafe {
3205 try_ffi!(sys::nvmlDeviceSetMigMode(
3206 self.0,
3207 mode.into(),
3208 &raw mut activation_status,
3209 ))?;
3210 }
3211 Ok(MigModeActivation::from_raw(activation_status))
3212 }
3213
3214 /// Returns the maximum number of MIG devices that can exist under a parent NVML device.
3215 ///
3216 /// Returns zero if MIG is not supported or enabled.
3217 ///
3218 /// For Ampere or newer fully supported devices.
3219 /// Supported on Linux only.
3220 ///
3221 /// # Errors
3222 ///
3223 /// Returns an error if NVML rejects the handle or output, if NVML has not
3224 /// been initialized, or if NVML reports an unexpected failure.
3225 pub fn max_mig_device_count(self) -> Result<u32> {
3226 let mut count = 0;
3227 unsafe {
3228 try_ffi!(sys::nvmlDeviceGetMaxMigDeviceCount(self.0, &raw mut count))?;
3229 }
3230 Ok(count)
3231 }
3232
3233 /// Tests if this handle refers to a MIG device.
3234 ///
3235 /// A MIG device handle is an NVML abstraction which maps to a MIG compute instance.
3236 /// These overloaded references can be used (with some restrictions) interchangeably with a GPU device handle to execute queries at a per-compute instance granularity.
3237 ///
3238 /// For Ampere or newer fully supported devices.
3239 /// Supported on Linux only.
3240 ///
3241 /// # Errors
3242 ///
3243 /// Returns an error if NVML rejects the handle or output, if the device does
3244 /// not support this check, if NVML has not been initialized, or if NVML
3245 /// reports an unexpected failure.
3246 pub fn is_mig_device_handle(self) -> Result<bool> {
3247 let mut is_mig_device = 0;
3248 unsafe {
3249 try_ffi!(sys::nvmlDeviceIsMigDeviceHandle(
3250 self.0,
3251 &raw mut is_mig_device,
3252 ))?;
3253 }
3254 Ok(is_mig_device != 0)
3255 }
3256
3257 /// Returns parent device handle from a MIG device handle.
3258 ///
3259 /// For Ampere or newer fully supported devices.
3260 /// Supported on Linux only.
3261 ///
3262 /// # Errors
3263 ///
3264 /// Returns an error if NVML rejects the MIG device handle or parent output,
3265 /// if the device does not support this query, if NVML has not been
3266 /// initialized, or if NVML reports an unexpected failure.
3267 pub fn parent_device_from_mig_handle(self) -> Result<Self> {
3268 let mut device = ptr::null_mut();
3269 unsafe {
3270 try_ffi!(sys::nvmlDeviceGetDeviceHandleFromMigDeviceHandle(
3271 self.0,
3272 &raw mut device,
3273 ))?;
3274 Ok(Self::from_raw(device))
3275 }
3276 }
3277
3278 /// Returns MIG device handle for the given index under its parent NVML device.
3279 ///
3280 /// If the compute instance is destroyed, either explicitly or by destroying, resetting, or unbinding the parent GPU instance or GPU device, the MIG device handle remains invalid and must be requested again.
3281 /// Handles may be reused and their properties can change in the process.
3282 ///
3283 /// For Ampere or newer fully supported devices.
3284 /// Supported on Linux only.
3285 ///
3286 /// # Errors
3287 ///
3288 /// Returns an error if NVML rejects the handle, index, or output, if no MIG
3289 /// device exists at `index`, if the device does not support this query, if
3290 /// NVML has not been initialized, or if NVML reports an unexpected failure.
3291 pub fn mig_device(self, index: u32) -> Result<Self> {
3292 let mut device = ptr::null_mut();
3293 unsafe {
3294 try_ffi!(sys::nvmlDeviceGetMigDeviceHandleByIndex(
3295 self.0,
3296 index,
3297 &raw mut device,
3298 ))?;
3299 Ok(Self::from_raw(device))
3300 }
3301 }
3302
3303 pub fn mig_devices(self) -> Result<Vec<Self>> {
3304 let max_count = self.max_mig_device_count()?;
3305 let mut devices = Vec::with_capacity(max_count as usize);
3306
3307 for index in 0..max_count {
3308 match self.mig_device(index) {
3309 Ok(device) => devices.push(device),
3310 Err(Error::Nvml {
3311 code: Status::NotFound,
3312 ..
3313 }) => {}
3314 Err(error) => return Err(error),
3315 }
3316 }
3317
3318 Ok(devices)
3319 }
3320
3321 /// Versioned wrapper that requests GPU-instance profile information using the latest supported NVML output layout.
3322 ///
3323 /// This wrapper sets the version field on the output structure before calling NVML.
3324 ///
3325 /// For Ampere or newer fully supported devices.
3326 /// Supported on Linux only.
3327 ///
3328 /// # Errors
3329 ///
3330 /// Returns an error if NVML rejects the handle, profile, or request layout,
3331 /// if MIG mode is disabled or the profile is unsupported, if the current
3332 /// process lacks permission, or if NVML has not been initialized.
3333 pub fn gpu_instance_profile_info(self, profile: u32) -> Result<GpuInstanceProfileInfo> {
3334 let mut info = sys::nvmlGpuInstanceProfileInfo_v3_t {
3335 version: struct_version::<sys::nvmlGpuInstanceProfileInfo_v3_t>(3),
3336 ..Default::default()
3337 };
3338 unsafe {
3339 try_ffi!(sys::nvmlDeviceGetGpuInstanceProfileInfoV(
3340 self.0,
3341 profile,
3342 (&raw mut info).cast(),
3343 ))?;
3344 }
3345 Ok(info.into())
3346 }
3347
3348 /// GPU instance profile query function that accepts profile ID, instead of profile name.
3349 /// It requests the result using the latest supported NVML output layout.
3350 ///
3351 /// This wrapper sets the version field on the output structure before calling NVML.
3352 ///
3353 /// For Ampere or newer fully supported devices.
3354 /// Supported on Linux only.
3355 ///
3356 /// # Errors
3357 ///
3358 /// Returns an error if NVML rejects the handle, profile ID, or request
3359 /// layout, if MIG mode is disabled or the profile is unsupported, if the
3360 /// current process lacks permission, or if NVML has not been initialized.
3361 pub fn gpu_instance_profile_info_by_id(
3362 self,
3363 profile_id: u32,
3364 ) -> Result<GpuInstanceProfileInfo> {
3365 let mut info = sys::nvmlGpuInstanceProfileInfo_v3_t {
3366 version: struct_version::<sys::nvmlGpuInstanceProfileInfo_v3_t>(3),
3367 ..Default::default()
3368 };
3369 unsafe {
3370 try_ffi!(sys::nvmlDeviceGetGpuInstanceProfileInfoByIdV(
3371 self.0,
3372 profile_id,
3373 (&raw mut info).cast(),
3374 ))?;
3375 }
3376 Ok(info.into())
3377 }
3378
3379 /// Returns GPU instance profile capacity.
3380 ///
3381 /// For Ampere or newer fully supported devices.
3382 /// Supported on Linux only.
3383 /// Requires privileged access.
3384 ///
3385 /// # Errors
3386 ///
3387 /// Returns an error if NVML rejects the query, if MIG mode is disabled or
3388 /// `profile_id` is unsupported, if the current process lacks permission, or
3389 /// if NVML has not been initialized.
3390 pub fn gpu_instance_remaining_capacity(self, profile_id: u32) -> Result<u32> {
3391 let mut count = 0;
3392 unsafe {
3393 try_ffi!(sys::nvmlDeviceGetGpuInstanceRemainingCapacity(
3394 self.0,
3395 profile_id,
3396 &raw mut count,
3397 ))?;
3398 }
3399 Ok(count)
3400 }
3401
3402 /// Returns GPU instance placements.
3403 ///
3404 /// A placement represents the location of a GPU instance within a device.
3405 /// Returns all possible placements for the given profile, regardless of whether MIG is enabled.
3406 /// A created GPU instance occupies memory slices described by its placement.
3407 /// Creating a GPU instance fails if its placement overlaps already occupied
3408 /// memory slices.
3409 ///
3410 /// For Ampere or newer fully supported devices.
3411 /// Supported on Linux only.
3412 /// Requires privileged access.
3413 ///
3414 /// # Errors
3415 ///
3416 /// Returns an error if NVML rejects the query, if the device does not
3417 /// support MIG or `profile_id` is unsupported, if the current process lacks
3418 /// permission, or if NVML has not been initialized.
3419 pub fn gpu_instance_possible_placements(
3420 self,
3421 profile_id: u32,
3422 ) -> Result<Vec<GpuInstancePlacement>> {
3423 let mut count = 0;
3424 let status = unsafe {
3425 sys::nvmlDeviceGetGpuInstancePossiblePlacements_v2(
3426 self.0,
3427 profile_id,
3428 ptr::null_mut(),
3429 &raw mut count,
3430 )
3431 };
3432 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
3433 return Ok(Vec::new());
3434 }
3435 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
3436 return Err(status.into());
3437 }
3438
3439 let mut placements = vec![sys::nvmlGpuInstancePlacement_t::default(); count as usize];
3440 unsafe {
3441 try_ffi!(sys::nvmlDeviceGetGpuInstancePossiblePlacements_v2(
3442 self.0,
3443 profile_id,
3444 placements.as_mut_ptr(),
3445 &raw mut count,
3446 ))?;
3447 }
3448 placements.truncate(count as usize);
3449 Ok(placements.into_iter().map(Into::into).collect())
3450 }
3451
3452 /// Returns GPU instances for the given instance ID.
3453 ///
3454 /// For Ampere or newer fully supported devices.
3455 /// Supported on Linux only.
3456 /// Requires privileged access.
3457 ///
3458 /// # Errors
3459 ///
3460 /// Returns an error if NVML rejects the query, if no GPU instance has the
3461 /// requested ID, if MIG mode is disabled, if the current process lacks
3462 /// permission, or if NVML has not been initialized.
3463 pub fn gpu_instance_by_id(self, id: u32) -> Result<GpuInstance> {
3464 let mut instance = ptr::null_mut();
3465 unsafe {
3466 try_ffi!(sys::nvmlDeviceGetGpuInstanceById(
3467 self.0,
3468 id,
3469 &raw mut instance,
3470 ))?;
3471 Ok(GpuInstance::from_raw(instance))
3472 }
3473 }
3474
3475 /// Creates a GPU instance.
3476 ///
3477 /// For Ampere or newer fully supported devices.
3478 /// Supported on Linux only.
3479 /// Requires privileged access.
3480 ///
3481 /// If the parent device is unbound or reset, or if the GPU instance is destroyed, the GPU instance handle becomes invalid.
3482 /// The GPU instance must be recreated to acquire a valid handle.
3483 ///
3484 /// # Errors
3485 ///
3486 /// Returns an error if NVML cannot allocate the requested GPU instance, if
3487 /// NVML rejects the query, if MIG mode is disabled or the device is a vGPU
3488 /// guest, if the current process lacks permission, or if NVML has not been
3489 /// initialized.
3490 pub fn create_gpu_instance(self, profile_id: u32) -> Result<OwnedGpuInstance> {
3491 let mut instance = ptr::null_mut();
3492 unsafe {
3493 try_ffi!(sys::nvmlDeviceCreateGpuInstance(
3494 self.0,
3495 profile_id,
3496 &raw mut instance,
3497 ))?;
3498 Ok(OwnedGpuInstance::from_raw(instance))
3499 }
3500 }
3501
3502 /// Creates a GPU instance with the specified placement.
3503 ///
3504 /// For Ampere or newer fully supported devices.
3505 /// Supported on Linux only.
3506 /// Requires privileged access.
3507 ///
3508 /// If the parent device is unbound or reset, or if the GPU instance is destroyed, the GPU instance handle becomes invalid.
3509 /// The GPU instance must be recreated to acquire a valid handle.
3510 ///
3511 /// # Errors
3512 ///
3513 /// Returns an error if NVML cannot allocate the requested GPU instance, if
3514 /// NVML rejects the query or placement, if MIG mode is disabled or the
3515 /// device is a vGPU guest, if the current process lacks permission, or if
3516 /// NVML has not been initialized.
3517 pub fn create_gpu_instance_with_placement(
3518 self,
3519 profile_id: u32,
3520 placement: GpuInstancePlacement,
3521 ) -> Result<OwnedGpuInstance> {
3522 let placement = sys::nvmlGpuInstancePlacement_t::from(placement);
3523 let mut instance = ptr::null_mut();
3524 unsafe {
3525 try_ffi!(sys::nvmlDeviceCreateGpuInstanceWithPlacement(
3526 self.0,
3527 profile_id,
3528 &raw const placement,
3529 &raw mut instance,
3530 ))?;
3531 Ok(OwnedGpuInstance::from_raw(instance))
3532 }
3533 }
3534
3535 /// Returns GPU instances for the given profile ID.
3536 ///
3537 /// For Ampere or newer fully supported devices.
3538 /// Supported on Linux only.
3539 /// Requires privileged access.
3540 ///
3541 /// # Errors
3542 ///
3543 /// Returns an error if NVML rejects the query, if MIG mode is disabled, if
3544 /// the current process lacks permission, or if NVML has not been
3545 /// initialized.
3546 pub fn gpu_instances(self, profile_id: u32) -> Result<Vec<GpuInstance>> {
3547 let mut count = 0;
3548 let status = unsafe {
3549 sys::nvmlDeviceGetGpuInstances(self.0, profile_id, ptr::null_mut(), &raw mut count)
3550 };
3551 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
3552 return Ok(Vec::new());
3553 }
3554 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
3555 return Err(status.into());
3556 }
3557
3558 let mut instances = vec![ptr::null_mut(); count as usize];
3559 unsafe {
3560 try_ffi!(sys::nvmlDeviceGetGpuInstances(
3561 self.0,
3562 profile_id,
3563 instances.as_mut_ptr(),
3564 &raw mut count,
3565 ))?;
3566 }
3567 instances.truncate(count as usize);
3568 Ok(instances
3569 .into_iter()
3570 .map(|instance| unsafe { GpuInstance::from_raw(instance) })
3571 .collect())
3572 }
3573
3574 /// Returns the GPU instance ID for this MIG device handle.
3575 ///
3576 /// GPU instance IDs are unique per device and remain valid until the GPU instance is destroyed.
3577 ///
3578 /// For Ampere or newer fully supported devices.
3579 /// Supported on Linux only.
3580 ///
3581 /// # Errors
3582 ///
3583 /// Returns an error if NVML rejects the handle or output, if the device does
3584 /// not support this query, if NVML has not been initialized, or if NVML
3585 /// reports an unexpected failure.
3586 pub fn gpu_instance_id(self) -> Result<u32> {
3587 let mut id = 0;
3588 unsafe {
3589 try_ffi!(sys::nvmlDeviceGetGpuInstanceId(self.0, &raw mut id))?;
3590 }
3591 Ok(id)
3592 }
3593
3594 /// Returns the compute instance ID for this MIG device handle.
3595 ///
3596 /// Compute instance IDs are unique per GPU instance and remain valid until the compute instance is destroyed.
3597 ///
3598 /// For Ampere or newer fully supported devices.
3599 /// Supported on Linux only.
3600 ///
3601 /// # Errors
3602 ///
3603 /// Returns an error if NVML rejects the handle or output, if the device does
3604 /// not support this query, if NVML has not been initialized, or if NVML
3605 /// reports an unexpected failure.
3606 pub fn compute_instance_id(self) -> Result<u32> {
3607 let mut id = 0;
3608 unsafe {
3609 try_ffi!(sys::nvmlDeviceGetComputeInstanceId(self.0, &raw mut id))?;
3610 }
3611 Ok(id)
3612 }
3613
3614 /// Returns the virtualization mode corresponding to the GPU.
3615 ///
3616 /// For Kepler or newer fully supported devices.
3617 ///
3618 /// # Errors
3619 ///
3620 /// Returns an error if the device is inaccessible, if NVML rejects the
3621 /// query, if NVML has not been initialized, or if NVML reports an
3622 /// unexpected failure.
3623 pub fn virtualization_mode(self) -> Result<VirtualizationMode> {
3624 let mut mode = sys::nvmlGpuVirtualizationMode_t::NVML_GPU_VIRTUALIZATION_MODE_NONE as u32;
3625 unsafe {
3626 try_ffi!(sys::nvmlDeviceGetVirtualizationMode(
3627 self.0,
3628 (&raw mut mode).cast::<sys::nvmlGpuVirtualizationMode_t>(),
3629 ))?;
3630 }
3631 Ok(VirtualizationMode::from_raw(mode))
3632 }
3633
3634 /// Queries if SR-IOV host operation is supported on a vGPU supported device.
3635 ///
3636 /// Checks whether SR-IOV host capability is supported by the device and the driver, and indicates device is in SR-IOV mode if both of these conditions are true.
3637 ///
3638 /// # Errors
3639 ///
3640 /// Returns an error if NVML rejects the query, if the device does not
3641 /// support host vGPU mode reporting, or if NVML reports an unexpected
3642 /// failure.
3643 pub fn host_vgpu_mode(self) -> Result<HostVgpuMode> {
3644 let mut mode = sys::nvmlHostVgpuMode_t::NVML_HOST_VGPU_MODE_NON_SRIOV;
3645 unsafe {
3646 try_ffi!(sys::nvmlDeviceGetHostVgpuMode(self.0, &raw mut mode))?;
3647 }
3648 try_from_nvml_enum("host vgpu mode", mode as u32)
3649 }
3650
3651 /// Returns the supported vGPU types on a physical GPU (device).
3652 ///
3653 /// This wrapper performs the NVML size query internally and returns the supported vGPU types as a [`Vec`].
3654 ///
3655 /// # Errors
3656 ///
3657 /// Returns an error if the vGPU type list changes while the wrapper is
3658 /// fetching it, if NVML rejects the query, if vGPU is unsupported by the
3659 /// device, or if NVML reports an unexpected failure.
3660 pub fn supported_vgpus(self) -> Result<Vec<VgpuTypeId>> {
3661 Ok(query_u32_list(|count, values| unsafe {
3662 sys::nvmlDeviceGetSupportedVgpus(self.0, count, values)
3663 })?
3664 .into_iter()
3665 .map(VgpuTypeId)
3666 .collect())
3667 }
3668
3669 /// Returns the currently creatable vGPU types on a physical GPU.
3670 ///
3671 /// The creatable vGPU types for a device may differ over time because
3672 /// restrictions can limit which vGPU types may run concurrently.
3673 /// For example, if only one vGPU type is allowed at a time on a device, the
3674 /// creatable list is restricted to the currently running vGPU type.
3675 /// This wrapper performs the NVML size query internally and returns the creatable vGPU types as a [`Vec`].
3676 ///
3677 /// # Errors
3678 ///
3679 /// Returns an error if the vGPU type list changes while the wrapper is
3680 /// fetching it, if NVML rejects the query, if vGPU is unsupported by the
3681 /// device, or if NVML reports an unexpected failure.
3682 pub fn creatable_vgpus(self) -> Result<Vec<VgpuTypeId>> {
3683 Ok(query_u32_list(|count, values| unsafe {
3684 sys::nvmlDeviceGetCreatableVgpus(self.0, count, values)
3685 })?
3686 .into_iter()
3687 .map(VgpuTypeId)
3688 .collect())
3689 }
3690
3691 pub fn vgpu_type_supported_placements(
3692 self,
3693 vgpu_type_id: VgpuTypeId,
3694 mode: VgpuPlacementMode,
3695 ) -> Result<Vec<VgpuPlacementId>> {
3696 self.query_vgpu_placements(
3697 vgpu_type_id,
3698 mode,
3699 sys::nvmlDeviceGetVgpuTypeSupportedPlacements,
3700 )
3701 }
3702
3703 pub fn vgpu_type_creatable_placements(
3704 self,
3705 vgpu_type_id: VgpuTypeId,
3706 mode: VgpuPlacementMode,
3707 ) -> Result<Vec<VgpuPlacementId>> {
3708 self.query_vgpu_placements(
3709 vgpu_type_id,
3710 mode,
3711 sys::nvmlDeviceGetVgpuTypeCreatablePlacements,
3712 )
3713 }
3714
3715 /// Returns the active vGPU instances on a device.
3716 ///
3717 /// This wrapper performs the NVML size query internally and returns the active vGPU instances as a [`Vec`].
3718 ///
3719 /// For Kepler or newer fully supported devices.
3720 ///
3721 /// # Errors
3722 ///
3723 /// Returns an error if the active vGPU list changes while the wrapper is
3724 /// fetching it, if NVML rejects the query, if vGPU is unsupported by the
3725 /// device, if NVML has not been initialized, or if NVML reports an
3726 /// unexpected failure.
3727 pub fn active_vgpus(self) -> Result<Vec<VgpuInstance>> {
3728 Ok(query_u32_list(|count, values| unsafe {
3729 sys::nvmlDeviceGetActiveVgpus(self.0, count, values)
3730 })?
3731 .into_iter()
3732 .map(|instance| VgpuInstance::from_id(VgpuInstanceId(instance)))
3733 .collect())
3734 }
3735
3736 /// Returns the vGPU heterogeneous mode for the device.
3737 ///
3738 /// When in heterogeneous mode, a vGPU can concurrently host timesliced vGPUs with differing framebuffer sizes.
3739 ///
3740 /// On success, returns the current vGPU heterogeneous mode as [`EnableState::Enabled`] or [`EnableState::Disabled`].
3741 ///
3742 /// # Errors
3743 ///
3744 /// Returns an error if the installed NVML version does not support the
3745 /// request layout, if NVML rejects the query, if MIG is enabled or the
3746 /// device does not support vGPU heterogeneous mode, if NVML has not been
3747 /// initialized, or if NVML reports an unexpected failure.
3748 pub fn vgpu_heterogeneous_mode(self) -> Result<EnableState> {
3749 let mut mode = sys::nvmlVgpuHeterogeneousMode_t {
3750 version: struct_version::<sys::nvmlVgpuHeterogeneousMode_t>(1),
3751 ..Default::default()
3752 };
3753 unsafe {
3754 try_ffi!(sys::nvmlDeviceGetVgpuHeterogeneousMode(
3755 self.0,
3756 &raw mut mode
3757 ))?;
3758 }
3759 try_from_nvml_enum("enable state", mode.mode)
3760 }
3761
3762 /// Returns a vGPU metadata structure for this physical GPU.
3763 /// The structure contains information about the GPU and the currently installed NVIDIA host driver version that's controlling it, together with an opaque data section containing internal state.
3764 ///
3765 /// This wrapper allocates the metadata buffer internally and retries if NVML reports it is too small.
3766 ///
3767 /// # Errors
3768 ///
3769 /// Returns an error if the metadata buffer changes while the wrapper is
3770 /// fetching it, if NVML rejects the metadata request, if vGPU is unsupported
3771 /// by the system, or if NVML reports an unexpected failure.
3772 pub fn vgpu_metadata(self) -> Result<PgpuMetadata> {
3773 let raw = query_sized_raw(|size, buffer| unsafe {
3774 sys::nvmlDeviceGetVgpuMetadata(self.0, buffer.cast(), size)
3775 })?;
3776
3777 let metadata = unsafe { &*raw.as_ptr().cast::<sys::nvmlVgpuPgpuMetadata_t>() };
3778 let opaque_offset = mem::offset_of!(sys::nvmlVgpuPgpuMetadata_t, opaqueData);
3779 let opaque_end = opaque_offset.saturating_add(metadata.opaqueDataSize as usize);
3780 let opaque_data = raw[opaque_offset..opaque_end].to_vec();
3781
3782 Ok(PgpuMetadata::from_raw(metadata, opaque_data))
3783 }
3784
3785 /// Returns this physical GPU's properties as an ASCII-encoded string.
3786 ///
3787 /// This wrapper allocates the metadata buffer internally and retries if NVML reports it is too small.
3788 ///
3789 /// # Errors
3790 ///
3791 /// Returns an error if the metadata buffer changes while the wrapper is
3792 /// fetching it, if NVML rejects the metadata request, if vGPU is unsupported
3793 /// by the system, or if NVML reports an unexpected failure.
3794 pub fn pgpu_metadata_string(self) -> Result<String> {
3795 let raw = query_sized_raw(|size, buffer| unsafe {
3796 sys::nvmlDeviceGetPgpuMetadataString(self.0, buffer.cast(), size)
3797 })?;
3798 let buffer = raw.into_iter().map(|byte| byte as i8).collect::<Vec<_>>();
3799 Ok(string_from_c_chars(&buffer))
3800 }
3801
3802 /// Returns the requested vGPU capability for GPU.
3803 ///
3804 /// See [`DeviceVgpuCapability`] for the supported capabilities.
3805 /// Returns whether the capability is supported and any associated capability data.
3806 ///
3807 /// For Maxwell or newer fully supported devices.
3808 ///
3809 /// # Errors
3810 ///
3811 /// Returns an error if NVML rejects the device, capability, or output, if
3812 /// the current state does not support this vGPU capability query, if NVML
3813 /// has not been initialized, or if NVML reports an unexpected failure.
3814 pub fn vgpu_capability(self, capability: DeviceVgpuCapability) -> Result<u32> {
3815 let mut result = 0;
3816 unsafe {
3817 try_ffi!(sys::nvmlDeviceGetVgpuCapabilities(
3818 self.0,
3819 capability.into(),
3820 &raw mut result,
3821 ))?;
3822 }
3823 Ok(result)
3824 }
3825
3826 fn query_vgpu_placements(
3827 self,
3828 vgpu_type_id: VgpuTypeId,
3829 mode: VgpuPlacementMode,
3830 query: unsafe extern "C" fn(
3831 sys::nvmlDevice_t,
3832 sys::nvmlVgpuTypeId_t,
3833 *mut sys::nvmlVgpuPlacementList_t,
3834 ) -> sys::nvmlReturn_t,
3835 ) -> Result<Vec<VgpuPlacementId>> {
3836 let mut placement_list = sys::nvmlVgpuPlacementList_t {
3837 version: struct_version::<sys::nvmlVgpuPlacementList_t>(2),
3838 placementSize: size_of::<u32>() as u32,
3839 mode: mode as u32,
3840 ..Default::default()
3841 };
3842
3843 let status = unsafe { query(self.0, vgpu_type_id.0, &raw mut placement_list) };
3844 if status == sys::nvmlReturn_t::NVML_SUCCESS && placement_list.count == 0 {
3845 return Ok(Vec::new());
3846 }
3847 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
3848 return Err(status.into());
3849 }
3850
3851 let mut placements = vec![0u32; placement_list.count as usize];
3852 placement_list.placementIds = placements.as_mut_ptr();
3853 unsafe {
3854 try_ffi!(query(self.0, vgpu_type_id.0, &raw mut placement_list))?;
3855 }
3856 placements.truncate(placement_list.count as usize);
3857 Ok(placements.into_iter().map(VgpuPlacementId).collect())
3858 }
3859
3860 /// Returns bitmasks with the ideal CPU affinity for the device.
3861 /// For example, if processors 0, 1, 32, and 33 are ideal for the device and two mask words are requested, then `result[0] = 0x3` and `result[1] = 0x3`.
3862 /// This is equivalent to calling [`Device::cpu_affinity_within_scope`] with `NVML_AFFINITY_SCOPE_NODE`.
3863 ///
3864 /// For Kepler or newer fully supported devices.
3865 /// Supported on Linux only.
3866 ///
3867 /// # Errors
3868 ///
3869 /// Returns an error if the device is inaccessible, if NVML rejects the
3870 /// handle or output buffer, if the device does not support CPU affinity
3871 /// reporting, if NVML has not been initialized, or if NVML reports an
3872 /// unexpected failure.
3873 pub fn cpu_affinity(self) -> Result<Vec<u64>> {
3874 device_ulong_bitmask_list(|set_size, set| unsafe {
3875 sys::nvmlDeviceGetCpuAffinity(self.0, set_size, set)
3876 })
3877 }
3878
3879 /// Returns bitmasks with the ideal CPU affinity within a node or socket for the device.
3880 /// For example, if processors 0, 1, 32, and 33 are ideal for the device and two mask words are requested, then `result[0] = 0x3` and `result[1] = 0x3`.
3881 ///
3882 /// If the requested scope is not applicable to the target topology, NVML
3883 /// falls back to reporting CPU affinity for the device's immediate non-I/O
3884 /// ancestor.
3885 ///
3886 /// For Kepler or newer fully supported devices.
3887 /// Supported on Linux only.
3888 ///
3889 /// # Errors
3890 ///
3891 /// Returns an error if the device is inaccessible, if NVML rejects the
3892 /// handle, scope, or output buffer, if the device does not support scoped CPU
3893 /// affinity reporting, if NVML has not been initialized, or if NVML reports
3894 /// an unexpected failure.
3895 pub fn cpu_affinity_within_scope(self, scope: AffinityScope) -> Result<Vec<u64>> {
3896 device_ulong_bitmask_list(|set_size, set| unsafe {
3897 sys::nvmlDeviceGetCpuAffinityWithinScope(self.0, set_size, set, scope.into())
3898 })
3899 }
3900
3901 /// Sets the ideal affinity for the calling thread and device using the guidelines given in [`Device::cpu_affinity`].
3902 /// Note, this is a change as of version 8.0.
3903 /// Older versions set the affinity for a calling process and all children.
3904 /// Currently supports up to 1024 processors.
3905 ///
3906 /// For Kepler or newer fully supported devices.
3907 /// Supported on Linux only.
3908 ///
3909 /// # Errors
3910 ///
3911 /// Returns an error if the device is inaccessible, if NVML rejects the
3912 /// handle, if the device does not support CPU affinity binding, if NVML has
3913 /// not been initialized, or if NVML reports an unexpected failure.
3914 pub fn set_cpu_affinity(self) -> Result<()> {
3915 unsafe { try_ffi!(sys::nvmlDeviceSetCpuAffinity(self.0)) }
3916 }
3917
3918 /// Clear all affinity bindings for the calling thread.
3919 /// Note, this is a change as of version 8.0 as older versions cleared the affinity for a calling process and all children.
3920 ///
3921 /// For Kepler or newer fully supported devices.
3922 /// Supported on Linux only.
3923 ///
3924 /// # Errors
3925 ///
3926 /// Returns an error if NVML rejects the handle, if NVML has not been
3927 /// initialized, or if NVML reports an unexpected failure.
3928 pub fn clear_cpu_affinity(self) -> Result<()> {
3929 unsafe { try_ffi!(sys::nvmlDeviceClearCpuAffinity(self.0)) }
3930 }
3931
3932 /// Returns bitmasks with the ideal memory affinity within a node or socket for this device.
3933 /// For example, if NUMA nodes 0 and 1 are ideal within the socket and one mask word is requested, `result[0] = 0x3`.
3934 ///
3935 /// If the requested scope is not applicable to the target topology, NVML
3936 /// falls back to reporting memory affinity for the device's immediate
3937 /// non-I/O ancestor.
3938 ///
3939 /// For Kepler or newer fully supported devices.
3940 /// Supported on Linux only.
3941 ///
3942 /// # Errors
3943 ///
3944 /// Returns an error if the device is inaccessible, if NVML rejects the
3945 /// handle, scope, or output buffer, if the device does not support memory
3946 /// affinity reporting, if NVML has not been initialized, or if NVML reports
3947 /// an unexpected failure.
3948 pub fn memory_affinity(self, scope: AffinityScope) -> Result<Vec<u64>> {
3949 device_ulong_bitmask_list(|set_size, set| unsafe {
3950 sys::nvmlDeviceGetMemoryAffinity(self.0, set_size, set, scope.into())
3951 })
3952 }
3953
3954 /// Returns the addressing mode for the given GPU.
3955 ///
3956 /// [`DeviceAddressingMode::Hmm`] makes system-allocated memory (`malloc`,
3957 /// `mmap`) addressable from the GPU through software-based mirroring of the
3958 /// CPU page tables. [`DeviceAddressingMode::Ats`] makes system-allocated
3959 /// memory addressable from the GPU through Address Translation Services,
3960 /// which means there is effectively a single set of page tables used by
3961 /// both CPU and GPU. [`DeviceAddressingMode::None`] means neither HMM nor
3962 /// ATS is active.
3963 ///
3964 /// For Turing or newer fully supported devices.
3965 /// Supported on Linux only.
3966 ///
3967 /// # Errors
3968 ///
3969 /// Returns an error if the installed NVML version does not support the
3970 /// request layout, if NVML rejects the handle, or if the current platform
3971 /// does not support addressing-mode reporting.
3972 pub fn addressing_mode(self) -> Result<DeviceAddressingMode> {
3973 let mut mode = sys::nvmlDeviceAddressingMode_t {
3974 version: struct_version::<sys::nvmlDeviceAddressingMode_t>(1),
3975 ..Default::default()
3976 };
3977 unsafe {
3978 try_ffi!(sys::nvmlDeviceGetAddressingMode(self.0, &raw mut mode))?;
3979 }
3980 try_from_nvml_enum("device addressing mode", mode.value)
3981 }
3982
3983 /// Returns the repair status for TPC/channel repair.
3984 ///
3985 /// For Ampere or newer fully supported devices.
3986 ///
3987 /// # Errors
3988 ///
3989 /// Returns an error if the installed NVML version does not support the
3990 /// request layout, if NVML rejects the handle or output, if the device does
3991 /// not support repair-status reporting, if NVML has not been initialized, or
3992 /// if NVML reports an unexpected failure.
3993 pub fn repair_status(self) -> Result<RepairStatus> {
3994 let mut status = sys::nvmlRepairStatus_t {
3995 version: struct_version::<sys::nvmlRepairStatus_t>(1),
3996 ..Default::default()
3997 };
3998 unsafe {
3999 try_ffi!(sys::nvmlDeviceGetRepairStatus(self.0, &raw mut status))?;
4000 }
4001 Ok(status.into())
4002 }
4003
4004 /// Returns the NUMA node of the given GPU device.
4005 /// This only applies to platforms where the GPUs are NUMA nodes.
4006 ///
4007 /// # Errors
4008 ///
4009 /// Returns an error if NVML rejects the handle or output, or if the current
4010 /// platform does not support GPU NUMA-node reporting.
4011 pub fn numa_node_id(self) -> Result<u32> {
4012 let mut node = 0;
4013 unsafe {
4014 try_ffi!(sys::nvmlDeviceGetNumaNodeId(self.0, &raw mut node))?;
4015 }
4016 Ok(node)
4017 }
4018
4019 pub fn capabilities(self) -> Result<DeviceCapabilities> {
4020 let mut caps = sys::nvmlDeviceCapabilities_t {
4021 version: struct_version::<sys::nvmlDeviceCapabilities_t>(1),
4022 ..Default::default()
4023 };
4024 unsafe {
4025 try_ffi!(sys::nvmlDeviceGetCapabilities(self.0, &raw mut caps))?;
4026 }
4027 Ok(DeviceCapabilities::from_bits_retain(caps.capMask))
4028 }
4029
4030 /// Returns performance monitor samples from the associated subdevice.
4031 ///
4032 /// # Errors
4033 ///
4034 /// Returns an error if the device is inaccessible, if NVML rejects the
4035 /// query, if the device does not support dynamic P-state samples, if NVML
4036 /// has not been initialized, or if NVML reports an unexpected failure.
4037 pub fn dynamic_pstates_info(self) -> Result<DynamicPstatesInfo> {
4038 unsafe {
4039 let mut info = MaybeUninit::<sys::nvmlGpuDynamicPstatesInfo_t>::uninit();
4040 try_ffi!(sys::nvmlDeviceGetDynamicPstatesInfo(
4041 self.0,
4042 info.as_mut_ptr()
4043 ))?;
4044 Ok(info.assume_init().into())
4045 }
4046 }
4047
4048 /// Versioned wrapper that requests GPU fabric information using the latest supported NVML output layout.
4049 ///
4050 /// This wrapper sets the version field on the output structure before calling NVML.
4051 ///
4052 /// For Hopper or newer fully supported devices.
4053 ///
4054 /// # Errors
4055 ///
4056 /// Returns an error if the device does not support GPU fabric reporting.
4057 pub fn gpu_fabric_info(self) -> Result<GpuFabricInfo> {
4058 let mut info = sys::nvmlGpuFabricInfoV_t {
4059 version: struct_version::<sys::nvmlGpuFabricInfoV_t>(3),
4060 ..Default::default()
4061 };
4062 unsafe {
4063 try_ffi!(sys::nvmlDeviceGetGpuFabricInfoV(self.0, &raw mut info))?;
4064 }
4065 GpuFabricInfo::from_raw(info)
4066 }
4067
4068 /// Returns performance profiles information.
4069 ///
4070 /// For Blackwell or newer fully supported devices.
4071 /// Returns the workload power profile information reported by NVML.
4072 /// The `perf_profiles_mask` field is a bitmask of all supported mode indices where the mode is supported if the index is 1.
4073 /// Each supported mode has a corresponding entry in the profile array containing the profile ID, the priority of this mode, and a conflicting mask.
4074 /// Lower priority values indicate higher priority, and each bit set in the conflicting mask corresponds to a different profile that cannot be used with the given profile.
4075 ///
4076 /// # Errors
4077 ///
4078 /// Returns an error if the installed NVML version does not support the
4079 /// request layout, if the device is inaccessible, if NVML reports that the
4080 /// output layout is too small, if NVML rejects the query, if the device does
4081 /// not support workload power profiles, if NVML has not been initialized, or
4082 /// if NVML reports an unexpected failure.
4083 pub fn workload_power_profiles_info(self) -> Result<WorkloadPowerProfilesInfo> {
4084 let mut info = sys::nvmlWorkloadPowerProfileProfilesInfo_t {
4085 version: struct_version::<sys::nvmlWorkloadPowerProfileProfilesInfo_t>(1),
4086 ..Default::default()
4087 };
4088 unsafe {
4089 try_ffi!(sys::nvmlDeviceWorkloadPowerProfileGetProfilesInfo(
4090 self.0,
4091 &raw mut info,
4092 ))?;
4093 }
4094 Ok(info.into())
4095 }
4096
4097 /// Returns current performance profiles.
4098 ///
4099 /// For Blackwell or newer fully supported devices.
4100 /// Returns the current workload power profile state reported by NVML.
4101 /// Returns the current, requested, and enforced performance profile masks.
4102 /// Each bit set in each bitmask indicates whether the profile is supported, currently requested, or currently engaged, respectively.
4103 ///
4104 /// # Errors
4105 ///
4106 /// Returns an error if the installed NVML version does not support the
4107 /// request layout, if the device is inaccessible, if NVML rejects the query,
4108 /// if the device does not support workload power profiles, if NVML has not
4109 /// been initialized, or if NVML reports an unexpected failure.
4110 pub fn workload_power_current_profiles(self) -> Result<WorkloadPowerCurrentProfiles> {
4111 let mut profiles = sys::nvmlWorkloadPowerProfileCurrentProfiles_t {
4112 version: struct_version::<sys::nvmlWorkloadPowerProfileCurrentProfiles_t>(1),
4113 ..Default::default()
4114 };
4115 unsafe {
4116 try_ffi!(sys::nvmlDeviceWorkloadPowerProfileGetCurrentProfiles(
4117 self.0,
4118 &raw mut profiles,
4119 ))?;
4120 }
4121 Ok(profiles.into())
4122 }
4123
4124 /// Returns the current and pending DRAM Encryption modes for the device.
4125 ///
4126 /// For Blackwell or newer fully supported devices.
4127 /// Only applicable to devices that support DRAM Encryption.
4128 /// Requires [`InforomObject::Den`] version 1.0 or higher.
4129 ///
4130 /// Changing DRAM Encryption modes requires a reboot.
4131 /// The "pending" DRAM Encryption mode refers to the target mode following the next reboot.
4132 ///
4133 /// See [`EnableState`] for details on allowed modes.
4134 ///
4135 /// # Errors
4136 ///
4137 /// Returns an error if the installed NVML version does not support the
4138 /// request layout, if the device is inaccessible, if NVML rejects the handle
4139 /// or outputs, if the device does not support DRAM encryption, if NVML has
4140 /// not been initialized, or if NVML reports an unexpected failure.
4141 pub fn dram_encryption_mode(self) -> Result<CurrentPending<DramEncryptionInfo>> {
4142 let mut current = sys::nvmlDramEncryptionInfo_t {
4143 version: struct_version::<sys::nvmlDramEncryptionInfo_t>(1),
4144 ..Default::default()
4145 };
4146 let mut pending = sys::nvmlDramEncryptionInfo_t {
4147 version: struct_version::<sys::nvmlDramEncryptionInfo_t>(1),
4148 ..Default::default()
4149 };
4150 unsafe {
4151 try_ffi!(sys::nvmlDeviceGetDramEncryptionMode(
4152 self.0,
4153 &raw mut current,
4154 &raw mut pending,
4155 ))?;
4156 }
4157 Ok(CurrentPending {
4158 current: current.into(),
4159 pending: pending.into(),
4160 })
4161 }
4162
4163 /// Sets the DRAM encryption mode for the device.
4164 ///
4165 /// For Kepler or newer fully supported devices.
4166 /// Only applicable to devices that support DRAM Encryption.
4167 /// Requires [`InforomObject::Den`] version 1.0 or higher.
4168 /// Requires root/admin permissions.
4169 ///
4170 /// The DRAM Encryption mode determines whether the GPU enables its DRAM Encryption support.
4171 ///
4172 /// This operation takes effect after the next reboot.
4173 ///
4174 /// See [`EnableState`] for details on available modes.
4175 ///
4176 /// # Errors
4177 ///
4178 /// Returns an error if the installed NVML version does not support the
4179 /// request layout, if the device is inaccessible, if NVML rejects the handle
4180 /// or requested mode, if the device does not support DRAM encryption, if the
4181 /// current process lacks permission, if NVML has not been initialized, or if
4182 /// NVML reports an unexpected failure.
4183 pub fn set_dram_encryption_mode(self, dram_encryption: DramEncryptionInfo) -> Result<()> {
4184 let dram_encryption = sys::nvmlDramEncryptionInfo_t {
4185 version: struct_version::<sys::nvmlDramEncryptionInfo_t>(1),
4186 encryptionState: dram_encryption.encryption_state.into(),
4187 };
4188 unsafe {
4189 try_ffi!(sys::nvmlDeviceSetDramEncryptionMode(
4190 self.0,
4191 &raw const dram_encryption,
4192 ))
4193 }
4194 }
4195
4196 /// Returns Confidential Computing protected and unprotected memory sizes.
4197 ///
4198 /// For Ampere or newer fully supported devices.
4199 /// Supported on Linux, Windows TCC.
4200 ///
4201 /// # Errors
4202 ///
4203 /// Returns an error if NVML rejects the query, if the device does not
4204 /// support Confidential Computing memory-size reporting, or if NVML has not
4205 /// been initialized.
4206 pub fn conf_compute_mem_size_info(self) -> Result<ConfComputeMemSizeInfo> {
4207 let mut info = sys::nvmlConfComputeMemSizeInfo_t::default();
4208 unsafe {
4209 try_ffi!(sys::nvmlDeviceGetConfComputeMemSizeInfo(
4210 self.0,
4211 &raw mut info
4212 ))?;
4213 }
4214 Ok(info.into())
4215 }
4216
4217 /// Returns Confidential Computing protected memory usage.
4218 ///
4219 /// For Ampere or newer fully supported devices.
4220 /// Supported on Linux, Windows TCC.
4221 ///
4222 /// # Errors
4223 ///
4224 /// Returns an error if NVML rejects the query, if the device does not
4225 /// support Confidential Computing protected-memory reporting, if NVML has
4226 /// not been initialized, or if NVML reports an unexpected failure.
4227 pub fn conf_compute_protected_memory_usage(self) -> Result<MemoryInfo> {
4228 let mut memory = sys::nvmlMemory_v2_t {
4229 version: struct_version::<sys::nvmlMemory_v2_t>(2),
4230 ..Default::default()
4231 };
4232 unsafe {
4233 try_ffi!(sys::nvmlDeviceGetConfComputeProtectedMemoryUsage(
4234 self.0,
4235 (&raw mut memory).cast(),
4236 ))?;
4237 }
4238 Ok(memory.into())
4239 }
4240
4241 /// Returns Confidential Computing GPU certificate details.
4242 ///
4243 /// For Ampere or newer fully supported devices.
4244 /// Supported on Linux, Windows TCC.
4245 ///
4246 /// # Errors
4247 ///
4248 /// Returns an error if NVML rejects the query, if the device does not
4249 /// support Confidential Computing certificate reporting, if NVML has not
4250 /// been initialized, or if NVML reports an unexpected failure.
4251 pub fn conf_compute_gpu_certificate(self) -> Result<ConfComputeGpuCertificate> {
4252 unsafe {
4253 let mut certificate = MaybeUninit::<sys::nvmlConfComputeGpuCertificate_t>::uninit();
4254 try_ffi!(sys::nvmlDeviceGetConfComputeGpuCertificate(
4255 self.0,
4256 certificate.as_mut_ptr(),
4257 ))?;
4258 Ok(certificate.assume_init().into())
4259 }
4260 }
4261
4262 /// Returns Confidential Computing GPU attestation report.
4263 ///
4264 /// For Ampere or newer fully supported devices.
4265 /// Supported on Linux, Windows TCC.
4266 ///
4267 /// # Errors
4268 ///
4269 /// Returns an error if NVML rejects the query, if the device does not
4270 /// support Confidential Computing attestation reports, if NVML has not been
4271 /// initialized, or if NVML reports an unexpected failure.
4272 pub fn conf_compute_gpu_attestation_report(self) -> Result<ConfComputeGpuAttestationReport> {
4273 unsafe {
4274 let mut report = MaybeUninit::<sys::nvmlConfComputeGpuAttestationReport_t>::uninit();
4275 try_ffi!(sys::nvmlDeviceGetConfComputeGpuAttestationReport(
4276 self.0,
4277 report.as_mut_ptr(),
4278 ))?;
4279 Ok(report.assume_init().into())
4280 }
4281 }
4282
4283 /// Returns the state of the device's NVLink for the specified link.
4284 ///
4285 /// For Pascal or newer fully supported devices.
4286 ///
4287 /// # Errors
4288 ///
4289 /// Returns an error if NVML rejects the handle, link index, or output, if
4290 /// the device does not support NVLink state reporting, if NVML has not been
4291 /// initialized, or if NVML reports an unexpected failure.
4292 pub fn nvlink_state(self, link: u32) -> Result<EnableState> {
4293 let mut state = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
4294 unsafe {
4295 try_ffi!(sys::nvmlDeviceGetNvLinkState(self.0, link, &raw mut state))?;
4296 }
4297 Ok(state.into())
4298 }
4299
4300 /// Returns the version of the device's NVLink for the specified link.
4301 ///
4302 /// For Pascal or newer fully supported devices.
4303 ///
4304 /// # Errors
4305 ///
4306 /// Returns an error if NVML rejects the handle, link index, or output, if
4307 /// the device does not support NVLink version reporting, if NVML has not
4308 /// been initialized, or if NVML reports an unexpected failure.
4309 pub fn nvlink_version(self, link: u32) -> Result<NvLinkVersion> {
4310 let mut version = 0;
4311 unsafe {
4312 try_ffi!(sys::nvmlDeviceGetNvLinkVersion(
4313 self.0,
4314 link,
4315 &raw mut version
4316 ))?;
4317 }
4318 try_from_nvml_enum("nvlink version", version)
4319 }
4320
4321 /// Returns the requested capability from the device's NVLink for the specified link.
4322 ///
4323 /// See [`NvLinkCapability`] for the supported capabilities.
4324 ///
4325 /// For Pascal or newer fully supported devices.
4326 ///
4327 /// # Errors
4328 ///
4329 /// Returns an error if NVML rejects the handle, link index, capability, or
4330 /// output, if the device does not support NVLink capability reporting, if
4331 /// NVML has not been initialized, or if NVML reports an unexpected failure.
4332 pub fn nvlink_capability(self, link: u32, capability: NvLinkCapability) -> Result<bool> {
4333 let mut result = 0;
4334 unsafe {
4335 try_ffi!(sys::nvmlDeviceGetNvLinkCapability(
4336 self.0,
4337 link,
4338 capability.into(),
4339 &raw mut result,
4340 ))?;
4341 }
4342 Ok(result != 0)
4343 }
4344
4345 /// Returns the PCI information for the remote node on an NVLink link.
4346 ///
4347 /// `pci_subsystem_id` is not filled by this query and is indeterminate.
4348 ///
4349 /// For Pascal or newer fully supported devices.
4350 ///
4351 /// # Errors
4352 ///
4353 /// Returns an error if NVML rejects the handle, link index, or output, if
4354 /// the device does not support remote PCI reporting for NVLink, if NVML has
4355 /// not been initialized, or if NVML reports an unexpected failure.
4356 pub fn nvlink_remote_pci_info(self, link: u32) -> Result<PciInfo> {
4357 unsafe {
4358 let mut pci = MaybeUninit::<sys::nvmlPciInfo_t>::uninit();
4359 try_ffi!(sys::nvmlDeviceGetNvLinkRemotePciInfo_v2(
4360 self.0,
4361 link,
4362 pci.as_mut_ptr(),
4363 ))?;
4364 Ok(pci.assume_init().into())
4365 }
4366 }
4367
4368 /// Returns the NVLink device type of the remote device connected over the given link.
4369 ///
4370 /// # Errors
4371 ///
4372 /// Returns an error if the device is inaccessible, if NVML rejects the
4373 /// handle, link index, or output, if NVLink is unsupported, if NVML has not
4374 /// been initialized, or if NVML reports an unexpected failure.
4375 pub fn nvlink_remote_device_type(self, link: u32) -> Result<NvLinkRemoteDeviceType> {
4376 let mut device_type = sys::nvmlIntNvLinkDeviceType_t::NVML_NVLINK_DEVICE_TYPE_UNKNOWN;
4377 unsafe {
4378 try_ffi!(sys::nvmlDeviceGetNvLinkRemoteDeviceType(
4379 self.0,
4380 link,
4381 &raw mut device_type,
4382 ))?;
4383 }
4384 Ok(device_type.into())
4385 }
4386
4387 /// Returns the specified error counter value.
4388 ///
4389 /// See [`NvLinkErrorCounter`] for available counters.
4390 ///
4391 /// For Pascal or newer fully supported devices.
4392 ///
4393 /// # Errors
4394 ///
4395 /// Returns an error if NVML rejects the handle, link index, counter, or
4396 /// output, if the device does not support NVLink error counters, if NVML has
4397 /// not been initialized, or if NVML reports an unexpected failure.
4398 pub fn nvlink_error_counter(self, link: u32, counter: NvLinkErrorCounter) -> Result<u64> {
4399 let mut value = 0;
4400 unsafe {
4401 try_ffi!(sys::nvmlDeviceGetNvLinkErrorCounter(
4402 self.0,
4403 link,
4404 counter.into(),
4405 &raw mut value,
4406 ))?;
4407 }
4408 Ok(value)
4409 }
4410
4411 /// Returns the supported NVLink reduced bandwidth modes of the device.
4412 ///
4413 /// For Blackwell or newer fully supported devices.
4414 ///
4415 /// # Errors
4416 ///
4417 /// Returns an error if the installed NVML version does not support the
4418 /// request layout, if NVML rejects the handle or output, or if the device
4419 /// does not support NVLink bandwidth modes.
4420 pub fn nvlink_supported_bw_modes(self) -> Result<NvLinkSupportedBwModes> {
4421 let mut modes = sys::nvmlNvlinkSupportedBwModes_t {
4422 version: struct_version::<sys::nvmlNvlinkSupportedBwModes_t>(1),
4423 ..Default::default()
4424 };
4425 unsafe {
4426 try_ffi!(sys::nvmlDeviceGetNvlinkSupportedBwModes(
4427 self.0,
4428 &raw mut modes
4429 ))?;
4430 }
4431 Ok(modes.into())
4432 }
4433
4434 /// Returns the NVLink reduced bandwidth mode for the device.
4435 ///
4436 /// For Blackwell or newer fully supported devices.
4437 ///
4438 /// # Errors
4439 ///
4440 /// Returns an error if the installed NVML version does not support the
4441 /// request layout, if NVML rejects the handle or output, or if the device
4442 /// does not support NVLink bandwidth mode reporting.
4443 pub fn nvlink_bw_mode(self) -> Result<NvLinkBwMode> {
4444 let mut mode = sys::nvmlNvlinkGetBwMode_t {
4445 version: struct_version::<sys::nvmlNvlinkGetBwMode_t>(1),
4446 ..Default::default()
4447 };
4448 unsafe {
4449 try_ffi!(sys::nvmlDeviceGetNvlinkBwMode(self.0, &raw mut mode))?;
4450 }
4451 Ok(mode.into())
4452 }
4453
4454 /// Query NVLINK information associated with this device.
4455 ///
4456 /// # Errors
4457 ///
4458 /// Returns an error if the installed NVML version does not support the
4459 /// request layout, if the device is inaccessible, if NVML rejects the handle
4460 /// or output, if the device does not support NVLink info reporting, if NVML
4461 /// has not been initialized, or if NVML reports an unexpected failure.
4462 pub fn nvlink_info(self) -> Result<NvLinkInfo> {
4463 let mut info = sys::nvmlNvLinkInfo_t {
4464 version: struct_version::<sys::nvmlNvLinkInfo_t>(2),
4465 ..Default::default()
4466 };
4467 unsafe {
4468 try_ffi!(sys::nvmlDeviceGetNvLinkInfo(self.0, &raw mut info))?;
4469 }
4470 Ok(info.into())
4471 }
4472
4473 /// Returns the active frame buffer capture sessions statistics for the given device.
4474 ///
4475 /// For Maxwell or newer fully supported devices.
4476 ///
4477 /// # Errors
4478 ///
4479 /// Returns an error if the device is inaccessible, if NVML rejects the stats
4480 /// output, if NVML has not been initialized, or if NVML reports an
4481 /// unexpected failure.
4482 pub fn fbc_stats(self) -> Result<FbcStats> {
4483 unsafe {
4484 let mut stats = MaybeUninit::<sys::nvmlFBCStats_t>::uninit();
4485 try_ffi!(sys::nvmlDeviceGetFBCStats(self.0, stats.as_mut_ptr()))?;
4486 Ok(stats.assume_init().into())
4487 }
4488 }
4489
4490 /// Returns information about active frame buffer capture sessions on a target device.
4491 ///
4492 /// This wrapper queries the required session count first, then returns the active FBC sessions as a [`Vec`].
4493 ///
4494 /// For Maxwell or newer fully supported devices.
4495 ///
4496 /// `h_resolution`, `v_resolution`, `average_fps`, and `average_latency` may be zero if no new frames were captured since the session started.
4497 ///
4498 /// # Errors
4499 ///
4500 /// Returns an error if the device is inaccessible, if the FBC session list
4501 /// changes while the wrapper is fetching it, if NVML reports an invalid
4502 /// session count, if NVML has not been initialized, or if NVML reports an
4503 /// unexpected failure.
4504 pub fn fbc_sessions(self) -> Result<Vec<FbcSessionInfo>> {
4505 let mut count = 0;
4506 let status = unsafe {
4507 sys::nvmlDeviceGetFBCSessions(self.as_raw(), &raw mut count, ptr::null_mut() as _)
4508 };
4509 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
4510 return Ok(Vec::new());
4511 }
4512 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
4513 return Err(status.into());
4514 }
4515
4516 let mut sessions = vec![sys::nvmlFBCSessionInfo_t::default(); count as usize];
4517 unsafe {
4518 try_ffi!(sys::nvmlDeviceGetFBCSessions(
4519 self.as_raw(),
4520 &raw mut count,
4521 sessions.as_mut_ptr(),
4522 ))?;
4523 }
4524 sessions.truncate(count as usize);
4525 Ok(sessions.into_iter().map(Into::into).collect())
4526 }
4527
4528 /// Returns the common ancestor for two devices.
4529 ///
4530 /// For all products.
4531 /// Supported on Linux only.
4532 ///
4533 /// # Errors
4534 ///
4535 /// Returns an error if NVML rejects either handle or the output, if topology
4536 /// discovery is not supported on this device or OS, or if NVML fails during
4537 /// topology discovery.
4538 pub fn topology_common_ancestor(self, other: Self) -> Result<TopologyLevel> {
4539 let mut level = sys::nvmlGpuTopologyLevel_t::NVML_TOPOLOGY_SYSTEM;
4540 unsafe {
4541 try_ffi!(sys::nvmlDeviceGetTopologyCommonAncestor(
4542 self.0,
4543 other.0,
4544 &raw mut level,
4545 ))?;
4546 }
4547 Ok(level.into())
4548 }
4549
4550 /// Returns the set of GPUs nearest to this device at a specific interconnectivity level.
4551 /// Supported on Linux only.
4552 ///
4553 /// # Errors
4554 ///
4555 /// Returns an error if NVML rejects the handle, topology level, count, or
4556 /// output buffer, if topology discovery is not supported on this device or
4557 /// OS, or if NVML fails during topology discovery.
4558 pub fn topology_nearest_gpus(self, level: TopologyLevel) -> Result<Vec<Device>> {
4559 let mut count = 0;
4560 let status = unsafe {
4561 sys::nvmlDeviceGetTopologyNearestGpus(
4562 self.0,
4563 level.into(),
4564 &raw mut count,
4565 ptr::null_mut(),
4566 )
4567 };
4568 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
4569 return Ok(Vec::new());
4570 }
4571 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
4572 return Err(status.into());
4573 }
4574
4575 let mut devices = vec![ptr::null_mut(); count as usize];
4576 unsafe {
4577 try_ffi!(sys::nvmlDeviceGetTopologyNearestGpus(
4578 self.0,
4579 level.into(),
4580 &raw mut count,
4581 devices.as_mut_ptr(),
4582 ))?;
4583 }
4584 devices.truncate(count as usize);
4585 Ok(devices.into_iter().map(Self).collect())
4586 }
4587
4588 /// Returns the status for a P2P capability between this device and `other`.
4589 ///
4590 /// # Errors
4591 ///
4592 /// Returns an error if NVML rejects either device handle, the capability, or
4593 /// output, or if NVML reports an unexpected failure.
4594 pub fn p2p_status(self, other: Self, capability: P2pCapabilityIndex) -> Result<P2pStatus> {
4595 let mut status = sys::nvmlGpuP2PStatus_t::NVML_P2P_STATUS_UNKNOWN;
4596 unsafe {
4597 try_ffi!(sys::nvmlDeviceGetP2PStatus(
4598 self.0,
4599 other.0,
4600 capability.into(),
4601 &raw mut status,
4602 ))?;
4603 }
4604 Ok(status.into())
4605 }
4606
4607 /// Check if the GPU devices are on the same physical board.
4608 ///
4609 /// For all fully supported products.
4610 ///
4611 /// # Errors
4612 ///
4613 /// Returns an error if either device is inaccessible, if NVML rejects either
4614 /// handle or output, if the check is unsupported, if NVML has not been
4615 /// initialized, or if NVML reports an unexpected failure.
4616 pub fn on_same_board(self, other: Self) -> Result<bool> {
4617 let mut on_same_board = 0;
4618 unsafe {
4619 try_ffi!(sys::nvmlDeviceOnSameBoard(
4620 self.0,
4621 other.0,
4622 &raw mut on_same_board,
4623 ))?;
4624 }
4625 Ok(on_same_board != 0)
4626 }
4627
4628 pub fn compute_running_processes(self) -> Result<Vec<ProcessInfo>> {
4629 query_process_info_list(self, sys::nvmlDeviceGetComputeRunningProcesses)
4630 }
4631
4632 /// Returns information about running processes on a device for the input context.
4633 ///
4634 /// For Hopper or newer fully supported devices.
4635 ///
4636 /// Returns information only about running processes, such as CUDA applications with active contexts.
4637 ///
4638 /// This wrapper performs the NVML size query internally and returns the matching process details as a [`Vec`].
4639 ///
4640 /// The `used_gpu_memory` field is all of the memory used by the application.
4641 /// The `used_gpu_cc_protected_memory` field is all of the protected memory used by the application.
4642 ///
4643 /// Keep in mind that information returned by this call is dynamic and the number of elements might change in time.
4644 /// The wrapper retries internally if NVML reports that more space is needed because new processes were spawned.
4645 ///
4646 /// In MIG mode, a physical device handle reports aggregate information only if the caller has appropriate privileges.
4647 /// Per-instance information can be queried by using specific MIG device handles.
4648 /// Querying per-instance information using MIG device handles is not supported if the device is in vGPU Host virtualization mode.
4649 /// Protected memory usage is currently not available in MIG mode or on Windows.
4650 ///
4651 /// # Errors
4652 ///
4653 /// Returns an error if the device is inaccessible, if the process list
4654 /// changes while the wrapper is fetching it, if NVML rejects the device,
4655 /// process mode, or output, if the query is unsupported, if the current
4656 /// process lacks permission, if NVML has not been initialized, or if NVML
4657 /// reports an unexpected failure.
4658 pub fn running_process_details(self, mode: ProcessMode) -> Result<Vec<ProcessDetail>> {
4659 let mut list = sys::nvmlProcessDetailList_t {
4660 version: struct_version::<sys::nvmlProcessDetailList_t>(1),
4661 mode: mode.into(),
4662 ..Default::default()
4663 };
4664 let status = unsafe { sys::nvmlDeviceGetRunningProcessDetailList(self.0, &raw mut list) };
4665 if status == sys::nvmlReturn_t::NVML_SUCCESS && list.numProcArrayEntries == 0 {
4666 return Ok(Vec::new());
4667 }
4668 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
4669 return Err(status.into());
4670 }
4671
4672 let mut entries =
4673 vec![sys::nvmlProcessDetail_v1_t::default(); list.numProcArrayEntries as usize];
4674 list.procArray = entries.as_mut_ptr();
4675 unsafe {
4676 try_ffi!(sys::nvmlDeviceGetRunningProcessDetailList(
4677 self.0,
4678 &raw mut list
4679 ))?;
4680 }
4681 entries.truncate(list.numProcArrayEntries as usize);
4682 Ok(entries.into_iter().map(Into::into).collect())
4683 }
4684
4685 pub fn graphics_running_processes(self) -> Result<Vec<ProcessInfo>> {
4686 query_process_info_list(self, sys::nvmlDeviceGetGraphicsRunningProcesses)
4687 }
4688
4689 pub fn mps_compute_running_processes(self) -> Result<Vec<ProcessInfo>> {
4690 query_process_info_list(self, sys::nvmlDeviceGetMPSComputeRunningProcesses)
4691 }
4692
4693 /// Returns the current utilization and process ID.
4694 ///
4695 /// For Maxwell or newer fully supported devices.
4696 ///
4697 /// Reads recent utilization of GPU SM (3D/Compute), framebuffer, video encoder, and video decoder for processes running.
4698 /// This wrapper returns utilization values as a [`Vec`] of process samples.
4699 /// One utilization sample structure is returned per running process that had
4700 /// non-zero utilization during the last sample period.
4701 /// It includes the CPU timestamp at which the samples were recorded.
4702 /// Individual utilization values are returned as unsigned integer values.
4703 /// If no valid sample entries are found since `last_seen_timestamp`, [`Status::NotFound`] is returned.
4704 ///
4705 /// This wrapper performs the NVML size query internally and retries with the required capacity.
4706 ///
4707 /// On success, NVML reports the number of process utilization sample structures that were actually written.
4708 /// This may differ from a previously read value as instances are created or destroyed.
4709 ///
4710 /// `last_seen_timestamp` represents the CPU timestamp in microseconds at which utilization samples were last read.
4711 /// Use `0` to read utilization based on all samples maintained by the driver's internal sample buffer.
4712 /// Use a timestamp returned by a previous query to read utilization since that query.
4713 ///
4714 /// On MIG-enabled GPUs, querying process utilization is not currently supported.
4715 ///
4716 /// # Errors
4717 ///
4718 /// Returns an error if the device is inaccessible, if NVML rejects the
4719 /// handle, utilization buffer, or sampling timestamp, if the device does not
4720 /// support process utilization sampling, if NVML has not been initialized,
4721 /// or if NVML reports an unexpected failure. Missing sample entries are
4722 /// returned as an empty list.
4723 pub fn process_utilization(
4724 self,
4725 last_seen_timestamp: u64,
4726 ) -> Result<Vec<ProcessUtilizationSample>> {
4727 let mut count = 0;
4728 let status = unsafe {
4729 sys::nvmlDeviceGetProcessUtilization(
4730 self.0,
4731 ptr::null_mut(),
4732 &raw mut count,
4733 last_seen_timestamp,
4734 )
4735 };
4736 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
4737 return Ok(Vec::new());
4738 }
4739 if status == sys::nvmlReturn_t::NVML_ERROR_NOT_FOUND {
4740 return Ok(Vec::new());
4741 }
4742 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
4743 return Err(status.into());
4744 }
4745
4746 let mut samples = vec![sys::nvmlProcessUtilizationSample_t::default(); count as usize];
4747 unsafe {
4748 try_ffi!(sys::nvmlDeviceGetProcessUtilization(
4749 self.0,
4750 samples.as_mut_ptr(),
4751 &raw mut count,
4752 last_seen_timestamp,
4753 ))?;
4754 }
4755 samples.truncate(count as usize);
4756 Ok(samples.into_iter().map(Into::into).collect())
4757 }
4758
4759 /// Returns the recent utilization and process ID for all running processes.
4760 ///
4761 /// For Maxwell or newer fully supported devices.
4762 ///
4763 /// Reads recent utilization of GPU SM (3D/Compute), framebuffer, video encoder, and video decoder, jpeg decoder, OFA (Optical Flow Accelerator) for all running processes.
4764 /// This wrapper returns utilization values as a [`Vec`] of process utilization records.
4765 /// One utilization sample structure is returned per running process that had
4766 /// non-zero utilization during the last sample period.
4767 /// It includes the CPU timestamp at which the samples were recorded.
4768 /// Individual utilization values are returned as unsigned integer values.
4769 ///
4770 /// This wrapper performs the NVML size query internally and retries with the required capacity.
4771 ///
4772 /// On success, NVML reports the number of process utilization info structures that were actually written.
4773 /// This may differ from a previously read value as instances are created or destroyed.
4774 ///
4775 /// `last_seen_timestamp` represents the CPU timestamp in microseconds at which utilization samples were last read.
4776 /// Use `0` to read utilization based on all samples maintained by the driver's internal sample buffer.
4777 /// Use a timestamp returned by a previous query to read utilization since that query.
4778 ///
4779 /// On MIG-enabled GPUs, querying process utilization is not currently supported.
4780 ///
4781 /// # Errors
4782 ///
4783 /// Returns an error if the installed NVML version does not support the
4784 /// request layout, if the device is inaccessible, if the process list
4785 /// changes while the wrapper is fetching it, if NVML rejects the query, if
4786 /// the device does not support process utilization sampling, if NVML has not
4787 /// been initialized, or if NVML reports an unexpected failure. Missing
4788 /// sample entries are returned as an empty list.
4789 pub fn processes_utilization(
4790 self,
4791 last_seen_timestamp: u64,
4792 ) -> Result<Vec<ProcessUtilizationInfo>> {
4793 let mut info = sys::nvmlProcessesUtilizationInfo_t {
4794 version: struct_version::<sys::nvmlProcessesUtilizationInfo_t>(1),
4795 lastSeenTimeStamp: last_seen_timestamp,
4796 processSamplesCount: 0,
4797 ..Default::default()
4798 };
4799 let status = unsafe { sys::nvmlDeviceGetProcessesUtilizationInfo(self.0, &raw mut info) };
4800 if status == sys::nvmlReturn_t::NVML_SUCCESS && info.processSamplesCount == 0 {
4801 return Ok(Vec::new());
4802 }
4803 if status == sys::nvmlReturn_t::NVML_ERROR_NOT_FOUND {
4804 return Ok(Vec::new());
4805 }
4806 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
4807 return Err(status.into());
4808 }
4809
4810 let mut entries = vec![
4811 sys::nvmlProcessUtilizationInfo_v1_t::default();
4812 info.processSamplesCount as usize
4813 ];
4814 info.procUtilArray = entries.as_mut_ptr();
4815 unsafe {
4816 try_ffi!(sys::nvmlDeviceGetProcessesUtilizationInfo(
4817 self.0,
4818 &raw mut info
4819 ))?;
4820 }
4821 entries.truncate(info.processSamplesCount as usize);
4822 Ok(entries.into_iter().map(Into::into).collect())
4823 }
4824
4825 /// Queries the state of per process accounting mode.
4826 ///
4827 /// For Kepler or newer fully supported devices.
4828 ///
4829 /// See [`Device::accounting_stats`] for the reported accounting metrics.
4830 /// See [`sys::nvmlDeviceSetAccountingMode`].
4831 ///
4832 /// # Errors
4833 ///
4834 /// Returns an error if NVML rejects the handle or output, if the device does
4835 /// not support accounting mode, if NVML has not been initialized, or if NVML
4836 /// reports an unexpected failure.
4837 pub fn accounting_mode(self) -> Result<EnableState> {
4838 let mut mode = sys::nvmlEnableState_t::NVML_FEATURE_DISABLED;
4839 unsafe {
4840 try_ffi!(sys::nvmlDeviceGetAccountingMode(self.0, &raw mut mode))?;
4841 }
4842 Ok(mode.into())
4843 }
4844
4845 /// Queries process's accounting stats.
4846 ///
4847 /// For Kepler or newer fully supported devices.
4848 ///
4849 /// Accounting stats capture GPU utilization and other statistics across the lifetime of a process.
4850 /// Accounting stats can be queried during the lifetime of the process and after its termination.
4851 /// The reported running time remains 0 while the process is still running and is updated to the actual duration after termination.
4852 /// Accounting stats are kept in a circular buffer, newly created processes overwrite information about old processes.
4853 ///
4854 /// The returned value includes the per-process accounting metrics exposed by NVML.
4855 /// Use [`Device::accounting_pids`] to list processes with available stats.
4856 ///
4857 /// * Accounting mode must be enabled.
4858 /// See [`Device::accounting_mode`].
4859 /// * Only compute and graphics application stats can be queried.
4860 /// Monitoring application stats cannot be queried because they do not contribute to GPU utilization.
4861 /// * With a PID collision, only stats for the latest process to terminate are reported.
4862 ///
4863 /// # Errors
4864 ///
4865 /// Returns an error if NVML rejects the handle, process ID, or output, if no
4866 /// accounting stats exist for the process, if accounting is unsupported or
4867 /// disabled, if NVML has not been initialized, or if NVML reports an
4868 /// unexpected failure.
4869 pub fn accounting_stats(self, pid: crate::types::Pid) -> Result<AccountingStats> {
4870 unsafe {
4871 let mut stats = MaybeUninit::<sys::nvmlAccountingStats_t>::uninit();
4872 try_ffi!(sys::nvmlDeviceGetAccountingStats(
4873 self.0,
4874 pid.0,
4875 stats.as_mut_ptr()
4876 ))?;
4877 Ok(stats.assume_init().into())
4878 }
4879 }
4880
4881 /// Returns processes that have accounting stats.
4882 /// Returned processes can be running or terminated.
4883 ///
4884 /// For Kepler or newer fully supported devices.
4885 ///
4886 /// This wrapper queries the process count internally.
4887 /// It returns an empty list when no accounting processes are available.
4888 ///
4889 /// See [`Device::accounting_stats`] for the per-process metrics.
4890 ///
4891 /// With a PID collision, some processes may not be accessible before the circular buffer is full.
4892 ///
4893 /// # Errors
4894 ///
4895 /// Returns an error if the accounting PID list changes while the wrapper is
4896 /// fetching it, if NVML rejects the handle or count output, if accounting is
4897 /// unsupported or disabled, if NVML has not been initialized, or if NVML
4898 /// reports an unexpected failure.
4899 pub fn accounting_pids(self) -> Result<Vec<crate::types::Pid>> {
4900 let mut count = 0;
4901 let status =
4902 unsafe { sys::nvmlDeviceGetAccountingPids(self.0, &raw mut count, ptr::null_mut()) };
4903 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
4904 return Ok(Vec::new());
4905 }
4906 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
4907 return Err(status.into());
4908 }
4909
4910 let mut pids = vec![0u32; count as usize];
4911 unsafe {
4912 try_ffi!(sys::nvmlDeviceGetAccountingPids(
4913 self.0,
4914 &raw mut count,
4915 pids.as_mut_ptr(),
4916 ))?;
4917 }
4918 pids.truncate(count as usize);
4919 Ok(pids.into_iter().map(crate::types::Pid).collect())
4920 }
4921
4922 /// Returns the number of processes that the circular buffer with accounting pids can hold.
4923 ///
4924 /// For Kepler or newer fully supported devices.
4925 ///
4926 /// This is the maximum number of processes with stored accounting information before
4927 /// information about the oldest processes is overwritten by newer process information.
4928 ///
4929 /// # Errors
4930 ///
4931 /// Returns an error if NVML rejects the handle or output, if accounting is
4932 /// unsupported or disabled, if NVML has not been initialized, or if NVML
4933 /// reports an unexpected failure.
4934 pub fn accounting_buffer_size(self) -> Result<u32> {
4935 let mut size = 0;
4936 unsafe {
4937 try_ffi!(sys::nvmlDeviceGetAccountingBufferSize(
4938 self.0,
4939 &raw mut size
4940 ))?;
4941 }
4942 Ok(size)
4943 }
4944}
4945
4946fn field_id_for_perf_policy(perf_policy: PerfPolicyType) -> FieldId {
4947 match perf_policy {
4948 PerfPolicyType::Power => FieldId::PERF_POLICY_POWER,
4949 PerfPolicyType::Thermal => FieldId::PERF_POLICY_THERMAL,
4950 PerfPolicyType::SyncBoost => FieldId::PERF_POLICY_SYNC_BOOST,
4951 PerfPolicyType::BoardLimit => FieldId::PERF_POLICY_BOARD_LIMIT,
4952 PerfPolicyType::LowUtilization => FieldId::PERF_POLICY_LOW_UTILIZATION,
4953 PerfPolicyType::Reliability => FieldId::PERF_POLICY_RELIABILITY,
4954 PerfPolicyType::TotalAppClocks => FieldId::PERF_POLICY_TOTAL_APP_CLOCKS,
4955 PerfPolicyType::TotalBaseClocks => FieldId::PERF_POLICY_TOTAL_BASE_CLOCKS,
4956 }
4957}
4958
4959fn field_sample_as_u64(sample: FieldSample) -> Result<u64> {
4960 Ok(match sample.result? {
4961 FieldValue::UnsignedInt(value) => u64::from(value),
4962 FieldValue::UnsignedLong(value) | FieldValue::UnsignedLongLong(value) => value,
4963 FieldValue::SignedInt(value) => u64::try_from(value).map_err(|_| Error::NegativeValue {
4964 name: "field sample".into(),
4965 value: i64::from(value),
4966 })?,
4967 FieldValue::SignedLongLong(value) => {
4968 u64::try_from(value).map_err(|_| Error::NegativeValue {
4969 name: "field sample".into(),
4970 value,
4971 })?
4972 }
4973 FieldValue::UnsignedShort(value) => u64::from(value),
4974 FieldValue::Double(_) => {
4975 return Err(Error::UnexpectedFieldValueType {
4976 name: "numeric field sample".into(),
4977 value: "double".into(),
4978 });
4979 }
4980 })
4981}