nvml_wrapper/nv_link.rs
1use crate::Device;
2
3use crate::enum_wrappers::{
4 bool_from_state,
5 nv_link::{Capability, ErrorCounter, IntDeviceType},
6 state_from_bool,
7};
8
9use crate::enums::nv_link::Counter;
10use crate::error::{nvml_sym, nvml_try, NvmlError};
11use crate::ffi::bindings::*;
12use crate::struct_wrappers::{device::PciInfo, nv_link::UtilizationControl};
13use crate::structs::nv_link::UtilizationCounter;
14
15use std::{
16 convert::TryFrom,
17 mem,
18 os::raw::{c_uint, c_ulonglong},
19};
20
21use static_assertions::assert_impl_all;
22
23/**
24Struct that represents a `Device`'s NvLink.
25
26Obtain this via `Device.link_wrapper_for()`.
27
28Lifetimes are used to enforce that each `NvLink` instance cannot be used after
29the `Device` instance it was obtained from is dropped:
30
31```compile_fail
32use nvml_wrapper::Nvml;
33# use nvml_wrapper::error::*;
34
35# fn main() -> Result<(), NvmlError> {
36let nvml = Nvml::init()?;
37let device = nvml.device_by_index(0)?;
38let link = device.link_wrapper_for(0);
39
40drop(device);
41
42// This won't compile
43link.is_active()?;
44# Ok(())
45# }
46```
47
48Note that I cannot test any `NvLink` methods myself as I do not have access to
49such a link setup. **Test the functionality in this module before you use it**.
50*/
51#[derive(Debug)]
52pub struct NvLink<'device, 'nvml: 'device> {
53 pub(crate) device: &'device Device<'nvml>,
54 pub(crate) link: c_uint,
55}
56
57assert_impl_all!(NvLink: Send, Sync);
58
59impl<'device, 'nvml: 'device> NvLink<'device, 'nvml> {
60 /// Obtain the `Device` reference stored within this struct.
61 pub fn device(&self) -> &Device<'_> {
62 self.device
63 }
64
65 /// Obtain the value of this struct's `link` field.
66 pub fn link(&self) -> u32 {
67 self.link
68 }
69
70 /**
71 Gets whether or not this `Device`'s NvLink is active.
72
73 # Errors
74
75 * `Uninitialized`, if the library has not been successfully initialized
76 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
77 is invalid
78 * `NotSupported`, if this `Device` doesn't support this feature
79 * `UnexpectedVariant`, for which you can read the docs for
80 * `Unknown`, on any unexpected error
81
82 # Device Support
83
84 Supports Pascal or newer fully supported devices.
85 */
86 // Test written
87 #[doc(alias = "nvmlDeviceGetNvLinkState")]
88 pub fn is_active(&self) -> Result<bool, NvmlError> {
89 let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkState.as_ref())?;
90
91 unsafe {
92 let mut state: nvmlEnableState_t = mem::zeroed();
93
94 nvml_try(sym(self.device.handle(), self.link, &mut state))?;
95
96 bool_from_state(state)
97 }
98 }
99
100 /**
101 Gets the NvLink version of this `Device` / `NvLink`.
102
103 # Errors
104
105 * `Uninitialized`, if the library has not been successfully initialized
106 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
107 is invalid
108 * `NotSupported`, if this `Device` doesn't support this feature
109 * `Unknown`, on any unexpected error
110
111 # Device Support
112
113 Supports Pascal or newer fully supported devices.
114 */
115 // Test written
116 #[doc(alias = "nvmlDeviceGetNvLinkVersion")]
117 pub fn version(&self) -> Result<u32, NvmlError> {
118 let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkVersion.as_ref())?;
119
120 unsafe {
121 let mut version: c_uint = mem::zeroed();
122
123 nvml_try(sym(self.device.handle(), self.link, &mut version))?;
124
125 Ok(version)
126 }
127 }
128
129 /**
130 Gets whether or not this `Device` / `NvLink` has a `Capability`.
131
132 # Errors
133
134 * `Uninitialized`, if the library has not been successfully initialized
135 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
136 is invalid
137 * `NotSupported`, if this `Device` doesn't support this feature
138 * `Unknown`, on any unexpected error
139
140 # Device Support
141
142 Supports Pascal or newer fully supported devices.
143 */
144 // Test written
145 #[doc(alias = "nvmlDeviceGetNvLinkCapability")]
146 pub fn has_capability(&self, cap_type: Capability) -> Result<bool, NvmlError> {
147 let sym = nvml_sym(
148 self.device
149 .nvml()
150 .lib
151 .nvmlDeviceGetNvLinkCapability
152 .as_ref(),
153 )?;
154
155 unsafe {
156 // NVIDIA says that this should be interpreted as a boolean
157 let mut capability: c_uint = mem::zeroed();
158
159 nvml_try(sym(
160 self.device.handle(),
161 self.link,
162 cap_type.as_c(),
163 &mut capability,
164 ))?;
165
166 #[allow(clippy::match_like_matches_macro)]
167 Ok(match capability {
168 0 => false,
169 // Not worth an error or a panic if the value is > 1
170 _ => true,
171 })
172 }
173 }
174
175 /**
176 Gets the PCI information for this `NvLink`'s remote node.
177
178 # Errors
179
180 * `Uninitialized`, if the library has not been successfully initialized
181 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
182 is invalid
183 * `NotSupported`, if this `Device` doesn't support this feature
184 * `Unknown`, on any unexpected error
185
186 # Device Support
187
188 Supports Pascal or newer fully supported devices.
189 */
190 // Test written
191 #[doc(alias = "nvmlDeviceGetNvLinkRemotePciInfo_v2")]
192 pub fn remote_pci_info(&self) -> Result<PciInfo, NvmlError> {
193 let sym = nvml_sym(
194 self.device
195 .nvml()
196 .lib
197 .nvmlDeviceGetNvLinkRemotePciInfo_v2
198 .as_ref(),
199 )?;
200
201 unsafe {
202 let mut pci_info: nvmlPciInfo_t = mem::zeroed();
203
204 nvml_try(sym(self.device.handle(), self.link, &mut pci_info))?;
205
206 PciInfo::try_from(pci_info, false)
207 }
208 }
209
210 /**
211 Gets the specified `ErrorCounter` value.
212
213 # Errors
214
215 * `Uninitialized`, if the library has not been successfully initialized
216 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
217 is invalid
218 * `NotSupported`, if this `Device` doesn't support this feature
219 * `Unknown`, on any unexpected error
220
221 # Device Support
222
223 Supports Pascal or newer fully supported devices.
224 */
225 // Test written
226 #[doc(alias = "nvmlDeviceGetNvLinkErrorCounter")]
227 pub fn error_counter(&self, counter: ErrorCounter) -> Result<u64, NvmlError> {
228 let sym = nvml_sym(
229 self.device
230 .nvml()
231 .lib
232 .nvmlDeviceGetNvLinkErrorCounter
233 .as_ref(),
234 )?;
235
236 unsafe {
237 let mut value: c_ulonglong = mem::zeroed();
238
239 nvml_try(sym(
240 self.device.handle(),
241 self.link,
242 counter.as_c(),
243 &mut value,
244 ))?;
245
246 Ok(value)
247 }
248 }
249
250 /**
251 Resets all error counters to zero.
252
253 # Errors
254
255 * `Uninitialized`, if the library has not been successfully initialized
256 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
257 is invalid
258 * `NotSupported`, if this `Device` doesn't support this feature
259 * `Unknown`, on any unexpected error
260
261 # Device Support
262
263 Supports Pascal or newer fully supported devices.
264 */
265 // No-run test written
266 #[doc(alias = "nvmlDeviceResetNvLinkErrorCounters")]
267 pub fn reset_error_counters(&mut self) -> Result<(), NvmlError> {
268 let sym = nvml_sym(
269 self.device
270 .nvml()
271 .lib
272 .nvmlDeviceResetNvLinkErrorCounters
273 .as_ref(),
274 )?;
275
276 unsafe { nvml_try(sym(self.device.handle(), self.link)) }
277 }
278
279 /**
280 Sets the NvLink utilization counter control information for the specified
281 `Counter`.
282
283 The counters will be reset if `reset_counters` is true.
284
285 # Errors
286
287 * `Uninitialized`, if the library has not been successfully initialized
288 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
289 is invalid
290 * `NotSupported`, if this `Device` doesn't support this feature
291 * `Unknown`, on any unexpected error
292
293 # Device Support
294
295 Supports Pascal or newer fully supported devices.
296 */
297 // No-run test written
298 #[doc(alias = "nvmlDeviceSetNvLinkUtilizationControl")]
299 pub fn set_utilization_control(
300 &mut self,
301 counter: Counter,
302 settings: UtilizationControl,
303 reset_counters: bool,
304 ) -> Result<(), NvmlError> {
305 let reset: c_uint = u32::from(reset_counters);
306
307 let sym = nvml_sym(
308 self.device
309 .nvml()
310 .lib
311 .nvmlDeviceSetNvLinkUtilizationControl
312 .as_ref(),
313 )?;
314
315 unsafe {
316 nvml_try(sym(
317 self.device.handle(),
318 self.link,
319 counter as c_uint,
320 &mut settings.as_c(),
321 reset,
322 ))
323 }
324 }
325
326 /**
327 Gets the NvLink utilization counter control information for the specified
328 `Counter`.
329
330 # Errors
331
332 * `Uninitialized`, if the library has not been successfully initialized
333 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
334 is invalid
335 * `NotSupported`, if this `Device` doesn't support this feature
336 * `Unknown`, on any unexpected error
337
338 # Device Support
339
340 Supports Pascal or newer fully supported devices.
341 */
342 // Test written
343 #[doc(alias = "nvmlDeviceGetNvLinkUtilizationControl")]
344 pub fn utilization_control(&self, counter: Counter) -> Result<UtilizationControl, NvmlError> {
345 let sym = nvml_sym(
346 self.device
347 .nvml()
348 .lib
349 .nvmlDeviceGetNvLinkUtilizationControl
350 .as_ref(),
351 )?;
352
353 unsafe {
354 let mut controls: nvmlNvLinkUtilizationControl_t = mem::zeroed();
355
356 nvml_try(sym(
357 self.device.handle(),
358 self.link,
359 counter as c_uint,
360 &mut controls,
361 ))?;
362
363 UtilizationControl::try_from(controls)
364 }
365 }
366
367 /**
368 Gets the NvLink utilization counter for the given `counter`.
369
370 The retrieved values are based on the current controls set for the specified
371 `Counter`. **You should use `.set_utilization_control()` before calling this**
372 as the utilization counters have no default state.
373
374 I do not attempt to verify, statically or at runtime, that you have controls
375 set for `counter` prior to calling this method on `counter`. NVIDIA says that
376 it is "In general\[,\] good practice", which does not sound to me as if it
377 is in any way unsafe to make this call without having set controls. I don't
378 believe it's worth the overhead of using a `Mutex`'d bool to track whether
379 or not you have set controls, and it's certainly not worth the effort to
380 statically verify it via the type system.
381
382 That being said, I don't know what exactly would happen, either, and I have
383 no means of finding out. If you do and discover that garbage values are
384 returned, for instance, I would love to hear about it; that would likely
385 cause this decision to be reconsidered.
386
387 # Errors
388
389 * `Uninitialized`, if the library has not been successfully initialized
390 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
391 is invalid
392 * `NotSupported`, if this `Device` doesn't support this feature
393 * `Unknown`, on any unexpected error
394
395 # Device Support
396
397 Supports Pascal or newer fully supported devices.
398 */
399 // No-run test written
400 #[doc(alias = "nvmlDeviceGetNvLinkUtilizationCounter")]
401 pub fn utilization_counter(&self, counter: Counter) -> Result<UtilizationCounter, NvmlError> {
402 let sym = nvml_sym(
403 self.device
404 .nvml()
405 .lib
406 .nvmlDeviceGetNvLinkUtilizationCounter
407 .as_ref(),
408 )?;
409
410 unsafe {
411 let mut receive: c_ulonglong = mem::zeroed();
412 let mut send: c_ulonglong = mem::zeroed();
413
414 nvml_try(sym(
415 self.device.handle(),
416 self.link,
417 counter as c_uint,
418 &mut receive,
419 &mut send,
420 ))?;
421
422 Ok(UtilizationCounter { receive, send })
423 }
424 }
425
426 /**
427 Freezes the specified NvLink utilization `Counter`.
428
429 Both the receive and send counters will be frozen (if I'm reading NVIDIA's
430 meaning correctly).
431
432 # Errors
433
434 * `Uninitialized`, if the library has not been successfully initialized
435 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
436 is invalid
437 * `NotSupported`, if this `Device` doesn't support this feature
438 * `Unknown`, on any unexpected error
439
440 # Device Support
441
442 Supports Pascal or newer fully supported devices.
443 */
444 // No-run test written
445 #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
446 pub fn freeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
447 self.set_utilization_counter_frozen(counter, true)
448 }
449
450 /**
451 Unfreezes the specified NvLink utilization `Counter`.
452
453 Both the receive and send counters will be unfrozen (if I'm reading NVIDIA's
454 meaning correctly).
455
456 # Errors
457
458 * `Uninitialized`, if the library has not been successfully initialized
459 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
460 is invalid
461 * `NotSupported`, if this `Device` doesn't support this feature
462 * `Unknown`, on any unexpected error
463
464 # Device Support
465
466 Supports Pascal or newer fully supported devices.
467 */
468 // No-run test written
469 #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
470 pub fn unfreeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
471 self.set_utilization_counter_frozen(counter, false)
472 }
473
474 fn set_utilization_counter_frozen(
475 &mut self,
476 counter: Counter,
477 frozen: bool,
478 ) -> Result<(), NvmlError> {
479 let sym = nvml_sym(
480 self.device
481 .nvml()
482 .lib
483 .nvmlDeviceFreezeNvLinkUtilizationCounter
484 .as_ref(),
485 )?;
486
487 unsafe {
488 nvml_try(sym(
489 self.device.handle(),
490 self.link,
491 counter as c_uint,
492 state_from_bool(frozen),
493 ))
494 }
495 }
496
497 /**
498 Resets the specified NvLink utilization `Counter`.
499
500 Both the receive and send counters will be rest (if I'm reading NVIDIA's
501 meaning correctly).
502
503 # Errors
504
505 * `Uninitialized`, if the library has not been successfully initialized
506 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
507 is invalid
508 * `NotSupported`, if this `Device` doesn't support this feature
509 * `Unknown`, on any unexpected error
510
511 # Device Support
512
513 Supports Pascal or newer fully supported devices.
514 */
515 // No-run test written
516 #[doc(alias = "nvmlDeviceResetNvLinkUtilizationCounter")]
517 pub fn reset_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
518 let sym = nvml_sym(
519 self.device
520 .nvml()
521 .lib
522 .nvmlDeviceResetNvLinkUtilizationCounter
523 .as_ref(),
524 )?;
525
526 unsafe { nvml_try(sym(self.device.handle(), self.link, counter as c_uint)) }
527 }
528
529 /**
530 Get the bandwidth mode of a NvLink connection
531
532 Note that, at the moment, this the global system value, the one based on a device
533 is not yet available.
534
535 # Errors
536
537 * `Uninitialized`, if the library has not been successfully initialized
538 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
539 is invalid
540 * `NotSupported`, if this `Device` doesn't support this feature
541 * `Unknown`, on any unexpected error
542 */
543 #[doc(alias = "nvmlSystemGetNvLinkBwMode")]
544 pub fn bw_mode(&self) -> Result<u32, NvmlError> {
545 let sym = nvml_sym(self.device.nvml().lib.nvmlSystemGetNvlinkBwMode.as_ref())?;
546
547 unsafe {
548 let mut mode: c_uint = 0;
549 nvml_try(sym(&mut mode))?;
550 Ok(mode)
551 }
552 }
553
554 /**
555 Set the bandwidth mode of a NvLink connection
556
557 Note that, at the moment, this the global system value, the one based on a device
558 is not yet available.
559
560 # Errors
561
562 * `Uninitialized`, if the library has not been successfully initialized
563 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
564 is invalid
565 * `NotSupported`, if this `Device` doesn't support this feature
566 * `Unknown`, on any unexpected error
567 */
568 #[doc(alias = "nvmlSystemSetNvLinkBwMode")]
569 pub fn set_bw_mode(&self, mode: u32) -> Result<(), NvmlError> {
570 let sym = nvml_sym(self.device.nvml().lib.nvmlSystemSetNvlinkBwMode.as_ref())?;
571
572 unsafe { nvml_try(sym(mode)) }
573 }
574
575 /**
576 Get the NvLink device type for a given link index
577
578 # Errors
579 * `Uninitialized`, if the library has not been successfully initialized
580 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
581 is invalid
582 * `NotSupported`, if this `Device` doesn't support this feature
583 * `Unknown`, on any unexpected error
584 */
585 #[doc(alias = "nvmlDeviceGetNvLinkRemoteDeviceType")]
586 pub fn remote_device_type(&self, link: u32) -> Result<IntDeviceType, NvmlError> {
587 let sym = nvml_sym(
588 self.device
589 .nvml()
590 .lib
591 .nvmlDeviceGetNvLinkRemoteDeviceType
592 .as_ref(),
593 )?;
594
595 unsafe {
596 let device_type: IntDeviceType = IntDeviceType::Unknown;
597 nvml_try(sym(self.device.handle(), link, &mut device_type.as_c()))?;
598 Ok(device_type)
599 }
600 }
601}
602
603#[cfg(test)]
604#[deny(unused_mut)]
605mod test {
606 use crate::bitmasks::nv_link::*;
607 use crate::enum_wrappers::nv_link::*;
608 use crate::enums::nv_link::*;
609 use crate::struct_wrappers::nv_link::*;
610 use crate::test_utils::*;
611
612 #[test]
613 #[ignore = "my machine does not support this call"]
614 fn is_active() {
615 let nvml = nvml();
616 test_with_link(3, &nvml, |link| link.is_active())
617 }
618
619 #[test]
620 #[ignore = "my machine does not support this call"]
621 fn version() {
622 let nvml = nvml();
623 test_with_link(3, &nvml, |link| link.version())
624 }
625
626 #[test]
627 #[ignore = "my machine does not support this call"]
628 fn has_capability() {
629 let nvml = nvml();
630 test_with_link(3, &nvml, |link| link.has_capability(Capability::P2p))
631 }
632
633 #[test]
634 #[ignore = "my machine does not support this call"]
635 fn remote_pci_info() {
636 let nvml = nvml();
637 test_with_link(3, &nvml, |link| {
638 let info = link.remote_pci_info()?;
639 assert_eq!(info.pci_sub_system_id, None);
640 Ok(info)
641 })
642 }
643
644 #[test]
645 #[ignore = "my machine does not support this call"]
646 fn error_counter() {
647 let nvml = nvml();
648 test_with_link(3, &nvml, |link| {
649 link.error_counter(ErrorCounter::DlRecovery)
650 })
651 }
652
653 // This modifies link state, so we don't want to actually run the test
654 #[allow(dead_code)]
655 fn reset_error_counters() {
656 let nvml = nvml();
657 let device = device(&nvml);
658 let mut link = device.link_wrapper_for(0);
659
660 link.reset_error_counters().unwrap();
661 }
662
663 // This modifies link state, so we don't want to actually run the test
664 #[allow(dead_code)]
665 fn set_utilization_control() {
666 let nvml = nvml();
667 let device = device(&nvml);
668 let mut link = device.link_wrapper_for(0);
669
670 let settings = UtilizationControl {
671 units: UtilizationCountUnit::Cycles,
672 packet_filter: PacketTypes::NO_OP
673 | PacketTypes::READ
674 | PacketTypes::WRITE
675 | PacketTypes::RATOM
676 | PacketTypes::WITH_DATA,
677 };
678
679 link.set_utilization_control(Counter::One, settings, false)
680 .unwrap()
681 }
682
683 #[test]
684 #[ignore = "my machine does not support this call"]
685 fn utilization_control() {
686 let nvml = nvml();
687 test_with_link(3, &nvml, |link| link.utilization_control(Counter::One))
688 }
689
690 // This shouldn't be called without modifying link state, so we don't want
691 // to actually run the test
692 #[allow(dead_code)]
693 fn utilization_counter() {
694 let nvml = nvml();
695 let device = device(&nvml);
696 let link = device.link_wrapper_for(0);
697
698 link.utilization_counter(Counter::One).unwrap();
699 }
700
701 // This modifies link state, so we don't want to actually run the test
702 #[allow(dead_code)]
703 fn freeze_utilization_counter() {
704 let nvml = nvml();
705 let device = device(&nvml);
706 let mut link = device.link_wrapper_for(0);
707
708 link.freeze_utilization_counter(Counter::One).unwrap();
709 }
710
711 // This modifies link state, so we don't want to actually run the test
712 #[allow(dead_code)]
713 fn unfreeze_utilization_counter() {
714 let nvml = nvml();
715 let device = device(&nvml);
716 let mut link = device.link_wrapper_for(0);
717
718 link.unfreeze_utilization_counter(Counter::One).unwrap();
719 }
720
721 // This modifies link state, so we don't want to actually run the test
722 #[allow(dead_code)]
723 fn reset_utilization_counter() {
724 let nvml = nvml();
725 let device = device(&nvml);
726 let mut link = device.link_wrapper_for(0);
727
728 link.reset_utilization_counter(Counter::One).unwrap();
729 }
730
731 // This modifies link state, so we don't want to actually run the test
732 #[allow(dead_code)]
733 fn bw_mode() {
734 let nvml = nvml();
735 let device = device(&nvml);
736 let link = device.link_wrapper_for(0);
737
738 let mode = link.bw_mode().unwrap();
739 link.set_bw_mode(mode).unwrap();
740 }
741}