Skip to main content

nvml_wrapper/
nv_link.rs

1use crate::Device;
2
3use crate::enum_wrappers::{
4    bool_from_state,
5    nv_link::{Capability, ErrorCounter, IntDeviceType},
6    state_from_bool,
7};
8
9use crate::enums::nv_link::Counter;
10use crate::error::{nvml_sym, nvml_try, NvmlError};
11use crate::ffi::bindings::*;
12use crate::struct_wrappers::{device::PciInfo, nv_link::UtilizationControl};
13use crate::structs::nv_link::UtilizationCounter;
14
15use std::{
16    convert::TryFrom,
17    mem,
18    os::raw::{c_uint, c_ulonglong},
19};
20
21use static_assertions::assert_impl_all;
22
23/**
24Struct that represents a `Device`'s NvLink.
25
26Obtain this via `Device.link_wrapper_for()`.
27
28Lifetimes are used to enforce that each `NvLink` instance cannot be used after
29the `Device` instance it was obtained from is dropped:
30
31```compile_fail
32use nvml_wrapper::Nvml;
33# use nvml_wrapper::error::*;
34
35# fn main() -> Result<(), NvmlError> {
36let nvml = Nvml::init()?;
37let device = nvml.device_by_index(0)?;
38let link = device.link_wrapper_for(0);
39
40drop(device);
41
42// This won't compile
43link.is_active()?;
44# Ok(())
45# }
46```
47
48Note that I cannot test any `NvLink` methods myself as I do not have access to
49such a link setup. **Test the functionality in this module before you use it**.
50*/
51#[derive(Debug)]
52pub struct NvLink<'device, 'nvml: 'device> {
53    pub(crate) device: &'device Device<'nvml>,
54    pub(crate) link: c_uint,
55}
56
57assert_impl_all!(NvLink: Send, Sync);
58
59impl<'device, 'nvml: 'device> NvLink<'device, 'nvml> {
60    /// Obtain the `Device` reference stored within this struct.
61    pub fn device(&self) -> &Device<'_> {
62        self.device
63    }
64
65    /// Obtain the value of this struct's `link` field.
66    pub fn link(&self) -> u32 {
67        self.link
68    }
69
70    /**
71    Gets whether or not this `Device`'s NvLink is active.
72
73    # Errors
74
75    * `Uninitialized`, if the library has not been successfully initialized
76    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
77      is invalid
78    * `NotSupported`, if this `Device` doesn't support this feature
79    * `UnexpectedVariant`, for which you can read the docs for
80    * `Unknown`, on any unexpected error
81
82    # Device Support
83
84    Supports Pascal or newer fully supported devices.
85    */
86    // Test written
87    #[doc(alias = "nvmlDeviceGetNvLinkState")]
88    pub fn is_active(&self) -> Result<bool, NvmlError> {
89        let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkState.as_ref())?;
90
91        unsafe {
92            let mut state: nvmlEnableState_t = mem::zeroed();
93
94            nvml_try(sym(self.device.handle(), self.link, &mut state))?;
95
96            bool_from_state(state)
97        }
98    }
99
100    /**
101    Gets the NvLink version of this `Device` / `NvLink`.
102
103    # Errors
104
105    * `Uninitialized`, if the library has not been successfully initialized
106    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
107      is invalid
108    * `NotSupported`, if this `Device` doesn't support this feature
109    * `Unknown`, on any unexpected error
110
111    # Device Support
112
113    Supports Pascal or newer fully supported devices.
114    */
115    // Test written
116    #[doc(alias = "nvmlDeviceGetNvLinkVersion")]
117    pub fn version(&self) -> Result<u32, NvmlError> {
118        let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkVersion.as_ref())?;
119
120        unsafe {
121            let mut version: c_uint = mem::zeroed();
122
123            nvml_try(sym(self.device.handle(), self.link, &mut version))?;
124
125            Ok(version)
126        }
127    }
128
129    /**
130    Gets whether or not this `Device` / `NvLink` has a `Capability`.
131
132    # Errors
133
134    * `Uninitialized`, if the library has not been successfully initialized
135    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
136      is invalid
137    * `NotSupported`, if this `Device` doesn't support this feature
138    * `Unknown`, on any unexpected error
139
140    # Device Support
141
142    Supports Pascal or newer fully supported devices.
143    */
144    // Test written
145    #[doc(alias = "nvmlDeviceGetNvLinkCapability")]
146    pub fn has_capability(&self, cap_type: Capability) -> Result<bool, NvmlError> {
147        let sym = nvml_sym(
148            self.device
149                .nvml()
150                .lib
151                .nvmlDeviceGetNvLinkCapability
152                .as_ref(),
153        )?;
154
155        unsafe {
156            // NVIDIA says that this should be interpreted as a boolean
157            let mut capability: c_uint = mem::zeroed();
158
159            nvml_try(sym(
160                self.device.handle(),
161                self.link,
162                cap_type.as_c(),
163                &mut capability,
164            ))?;
165
166            #[allow(clippy::match_like_matches_macro)]
167            Ok(match capability {
168                0 => false,
169                // Not worth an error or a panic if the value is > 1
170                _ => true,
171            })
172        }
173    }
174
175    /**
176    Gets the PCI information for this `NvLink`'s remote node.
177
178    # Errors
179
180    * `Uninitialized`, if the library has not been successfully initialized
181    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
182      is invalid
183    * `NotSupported`, if this `Device` doesn't support this feature
184    * `Unknown`, on any unexpected error
185
186    # Device Support
187
188    Supports Pascal or newer fully supported devices.
189    */
190    // Test written
191    #[doc(alias = "nvmlDeviceGetNvLinkRemotePciInfo_v2")]
192    pub fn remote_pci_info(&self) -> Result<PciInfo, NvmlError> {
193        let sym = nvml_sym(
194            self.device
195                .nvml()
196                .lib
197                .nvmlDeviceGetNvLinkRemotePciInfo_v2
198                .as_ref(),
199        )?;
200
201        unsafe {
202            let mut pci_info: nvmlPciInfo_t = mem::zeroed();
203
204            nvml_try(sym(self.device.handle(), self.link, &mut pci_info))?;
205
206            PciInfo::try_from(pci_info, false)
207        }
208    }
209
210    /**
211    Gets the specified `ErrorCounter` value.
212
213    # Errors
214
215    * `Uninitialized`, if the library has not been successfully initialized
216    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
217      is invalid
218    * `NotSupported`, if this `Device` doesn't support this feature
219    * `Unknown`, on any unexpected error
220
221    # Device Support
222
223    Supports Pascal or newer fully supported devices.
224    */
225    // Test written
226    #[doc(alias = "nvmlDeviceGetNvLinkErrorCounter")]
227    pub fn error_counter(&self, counter: ErrorCounter) -> Result<u64, NvmlError> {
228        let sym = nvml_sym(
229            self.device
230                .nvml()
231                .lib
232                .nvmlDeviceGetNvLinkErrorCounter
233                .as_ref(),
234        )?;
235
236        unsafe {
237            let mut value: c_ulonglong = mem::zeroed();
238
239            nvml_try(sym(
240                self.device.handle(),
241                self.link,
242                counter.as_c(),
243                &mut value,
244            ))?;
245
246            Ok(value)
247        }
248    }
249
250    /**
251    Resets all error counters to zero.
252
253    # Errors
254
255    * `Uninitialized`, if the library has not been successfully initialized
256    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
257      is invalid
258    * `NotSupported`, if this `Device` doesn't support this feature
259    * `Unknown`, on any unexpected error
260
261    # Device Support
262
263    Supports Pascal or newer fully supported devices.
264    */
265    // No-run test written
266    #[doc(alias = "nvmlDeviceResetNvLinkErrorCounters")]
267    pub fn reset_error_counters(&mut self) -> Result<(), NvmlError> {
268        let sym = nvml_sym(
269            self.device
270                .nvml()
271                .lib
272                .nvmlDeviceResetNvLinkErrorCounters
273                .as_ref(),
274        )?;
275
276        unsafe { nvml_try(sym(self.device.handle(), self.link)) }
277    }
278
279    /**
280    Sets the NvLink utilization counter control information for the specified
281    `Counter`.
282
283    The counters will be reset if `reset_counters` is true.
284
285    # Errors
286
287    * `Uninitialized`, if the library has not been successfully initialized
288    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
289      is invalid
290    * `NotSupported`, if this `Device` doesn't support this feature
291    * `Unknown`, on any unexpected error
292
293    # Device Support
294
295    Supports Pascal or newer fully supported devices.
296    */
297    // No-run test written
298    #[doc(alias = "nvmlDeviceSetNvLinkUtilizationControl")]
299    pub fn set_utilization_control(
300        &mut self,
301        counter: Counter,
302        settings: UtilizationControl,
303        reset_counters: bool,
304    ) -> Result<(), NvmlError> {
305        let reset: c_uint = u32::from(reset_counters);
306
307        let sym = nvml_sym(
308            self.device
309                .nvml()
310                .lib
311                .nvmlDeviceSetNvLinkUtilizationControl
312                .as_ref(),
313        )?;
314
315        unsafe {
316            nvml_try(sym(
317                self.device.handle(),
318                self.link,
319                counter as c_uint,
320                &mut settings.as_c(),
321                reset,
322            ))
323        }
324    }
325
326    /**
327    Gets the NvLink utilization counter control information for the specified
328    `Counter`.
329
330    # Errors
331
332    * `Uninitialized`, if the library has not been successfully initialized
333    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
334      is invalid
335    * `NotSupported`, if this `Device` doesn't support this feature
336    * `Unknown`, on any unexpected error
337
338    # Device Support
339
340    Supports Pascal or newer fully supported devices.
341    */
342    // Test written
343    #[doc(alias = "nvmlDeviceGetNvLinkUtilizationControl")]
344    pub fn utilization_control(&self, counter: Counter) -> Result<UtilizationControl, NvmlError> {
345        let sym = nvml_sym(
346            self.device
347                .nvml()
348                .lib
349                .nvmlDeviceGetNvLinkUtilizationControl
350                .as_ref(),
351        )?;
352
353        unsafe {
354            let mut controls: nvmlNvLinkUtilizationControl_t = mem::zeroed();
355
356            nvml_try(sym(
357                self.device.handle(),
358                self.link,
359                counter as c_uint,
360                &mut controls,
361            ))?;
362
363            UtilizationControl::try_from(controls)
364        }
365    }
366
367    /**
368    Gets the NvLink utilization counter for the given `counter`.
369
370    The retrieved values are based on the current controls set for the specified
371    `Counter`. **You should use `.set_utilization_control()` before calling this**
372    as the utilization counters have no default state.
373
374    I do not attempt to verify, statically or at runtime, that you have controls
375    set for `counter` prior to calling this method on `counter`. NVIDIA says that
376    it is "In general\[,\] good practice", which does not sound to me as if it
377    is in any way unsafe to make this call without having set controls. I don't
378    believe it's worth the overhead of using a `Mutex`'d bool to track whether
379    or not you have set controls, and it's certainly not worth the effort to
380    statically verify it via the type system.
381
382    That being said, I don't know what exactly would happen, either, and I have
383    no means of finding out. If you do and discover that garbage values are
384    returned, for instance, I would love to hear about it; that would likely
385    cause this decision to be reconsidered.
386
387    # Errors
388
389    * `Uninitialized`, if the library has not been successfully initialized
390    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
391      is invalid
392    * `NotSupported`, if this `Device` doesn't support this feature
393    * `Unknown`, on any unexpected error
394
395    # Device Support
396
397    Supports Pascal or newer fully supported devices.
398    */
399    // No-run test written
400    #[doc(alias = "nvmlDeviceGetNvLinkUtilizationCounter")]
401    pub fn utilization_counter(&self, counter: Counter) -> Result<UtilizationCounter, NvmlError> {
402        let sym = nvml_sym(
403            self.device
404                .nvml()
405                .lib
406                .nvmlDeviceGetNvLinkUtilizationCounter
407                .as_ref(),
408        )?;
409
410        unsafe {
411            let mut receive: c_ulonglong = mem::zeroed();
412            let mut send: c_ulonglong = mem::zeroed();
413
414            nvml_try(sym(
415                self.device.handle(),
416                self.link,
417                counter as c_uint,
418                &mut receive,
419                &mut send,
420            ))?;
421
422            Ok(UtilizationCounter { receive, send })
423        }
424    }
425
426    /**
427    Freezes the specified NvLink utilization `Counter`.
428
429    Both the receive and send counters will be frozen (if I'm reading NVIDIA's
430    meaning correctly).
431
432    # Errors
433
434    * `Uninitialized`, if the library has not been successfully initialized
435    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
436      is invalid
437    * `NotSupported`, if this `Device` doesn't support this feature
438    * `Unknown`, on any unexpected error
439
440    # Device Support
441
442    Supports Pascal or newer fully supported devices.
443    */
444    // No-run test written
445    #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
446    pub fn freeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
447        self.set_utilization_counter_frozen(counter, true)
448    }
449
450    /**
451    Unfreezes the specified NvLink utilization `Counter`.
452
453    Both the receive and send counters will be unfrozen (if I'm reading NVIDIA's
454    meaning correctly).
455
456    # Errors
457
458    * `Uninitialized`, if the library has not been successfully initialized
459    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
460      is invalid
461    * `NotSupported`, if this `Device` doesn't support this feature
462    * `Unknown`, on any unexpected error
463
464    # Device Support
465
466    Supports Pascal or newer fully supported devices.
467    */
468    // No-run test written
469    #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
470    pub fn unfreeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
471        self.set_utilization_counter_frozen(counter, false)
472    }
473
474    fn set_utilization_counter_frozen(
475        &mut self,
476        counter: Counter,
477        frozen: bool,
478    ) -> Result<(), NvmlError> {
479        let sym = nvml_sym(
480            self.device
481                .nvml()
482                .lib
483                .nvmlDeviceFreezeNvLinkUtilizationCounter
484                .as_ref(),
485        )?;
486
487        unsafe {
488            nvml_try(sym(
489                self.device.handle(),
490                self.link,
491                counter as c_uint,
492                state_from_bool(frozen),
493            ))
494        }
495    }
496
497    /**
498    Resets the specified NvLink utilization `Counter`.
499
500    Both the receive and send counters will be rest (if I'm reading NVIDIA's
501    meaning correctly).
502
503    # Errors
504
505    * `Uninitialized`, if the library has not been successfully initialized
506    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
507      is invalid
508    * `NotSupported`, if this `Device` doesn't support this feature
509    * `Unknown`, on any unexpected error
510
511    # Device Support
512
513    Supports Pascal or newer fully supported devices.
514    */
515    // No-run test written
516    #[doc(alias = "nvmlDeviceResetNvLinkUtilizationCounter")]
517    pub fn reset_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
518        let sym = nvml_sym(
519            self.device
520                .nvml()
521                .lib
522                .nvmlDeviceResetNvLinkUtilizationCounter
523                .as_ref(),
524        )?;
525
526        unsafe { nvml_try(sym(self.device.handle(), self.link, counter as c_uint)) }
527    }
528
529    /**
530     Get the bandwidth mode of a NvLink connection
531
532     Note that, at the moment, this the global system value, the one based on a device
533     is not yet available.
534
535    # Errors
536
537    * `Uninitialized`, if the library has not been successfully initialized
538    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
539      is invalid
540    * `NotSupported`, if this `Device` doesn't support this feature
541    * `Unknown`, on any unexpected error
542    */
543    #[doc(alias = "nvmlSystemGetNvLinkBwMode")]
544    pub fn bw_mode(&self) -> Result<u32, NvmlError> {
545        let sym = nvml_sym(self.device.nvml().lib.nvmlSystemGetNvlinkBwMode.as_ref())?;
546
547        unsafe {
548            let mut mode: c_uint = 0;
549            nvml_try(sym(&mut mode))?;
550            Ok(mode)
551        }
552    }
553
554    /**
555     Set the bandwidth mode of a NvLink connection
556
557     Note that, at the moment, this the global system value, the one based on a device
558     is not yet available.
559
560    # Errors
561
562    * `Uninitialized`, if the library has not been successfully initialized
563    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
564      is invalid
565    * `NotSupported`, if this `Device` doesn't support this feature
566    * `Unknown`, on any unexpected error
567    */
568    #[doc(alias = "nvmlSystemSetNvLinkBwMode")]
569    pub fn set_bw_mode(&self, mode: u32) -> Result<(), NvmlError> {
570        let sym = nvml_sym(self.device.nvml().lib.nvmlSystemSetNvlinkBwMode.as_ref())?;
571
572        unsafe { nvml_try(sym(mode)) }
573    }
574
575    /**
576     Get the NvLink device type for a given link index
577
578    # Errors
579    * `Uninitialized`, if the library has not been successfully initialized
580    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
581      is invalid
582    * `NotSupported`, if this `Device` doesn't support this feature
583    * `Unknown`, on any unexpected error
584    */
585    #[doc(alias = "nvmlDeviceGetNvLinkRemoteDeviceType")]
586    pub fn remote_device_type(&self, link: u32) -> Result<IntDeviceType, NvmlError> {
587        let sym = nvml_sym(
588            self.device
589                .nvml()
590                .lib
591                .nvmlDeviceGetNvLinkRemoteDeviceType
592                .as_ref(),
593        )?;
594
595        unsafe {
596            let device_type: IntDeviceType = IntDeviceType::Unknown;
597            nvml_try(sym(self.device.handle(), link, &mut device_type.as_c()))?;
598            Ok(device_type)
599        }
600    }
601}
602
603#[cfg(test)]
604#[deny(unused_mut)]
605mod test {
606    use crate::bitmasks::nv_link::*;
607    use crate::enum_wrappers::nv_link::*;
608    use crate::enums::nv_link::*;
609    use crate::struct_wrappers::nv_link::*;
610    use crate::test_utils::*;
611
612    #[test]
613    #[ignore = "my machine does not support this call"]
614    fn is_active() {
615        let nvml = nvml();
616        test_with_link(3, &nvml, |link| link.is_active())
617    }
618
619    #[test]
620    #[ignore = "my machine does not support this call"]
621    fn version() {
622        let nvml = nvml();
623        test_with_link(3, &nvml, |link| link.version())
624    }
625
626    #[test]
627    #[ignore = "my machine does not support this call"]
628    fn has_capability() {
629        let nvml = nvml();
630        test_with_link(3, &nvml, |link| link.has_capability(Capability::P2p))
631    }
632
633    #[test]
634    #[ignore = "my machine does not support this call"]
635    fn remote_pci_info() {
636        let nvml = nvml();
637        test_with_link(3, &nvml, |link| {
638            let info = link.remote_pci_info()?;
639            assert_eq!(info.pci_sub_system_id, None);
640            Ok(info)
641        })
642    }
643
644    #[test]
645    #[ignore = "my machine does not support this call"]
646    fn error_counter() {
647        let nvml = nvml();
648        test_with_link(3, &nvml, |link| {
649            link.error_counter(ErrorCounter::DlRecovery)
650        })
651    }
652
653    // This modifies link state, so we don't want to actually run the test
654    #[allow(dead_code)]
655    fn reset_error_counters() {
656        let nvml = nvml();
657        let device = device(&nvml);
658        let mut link = device.link_wrapper_for(0);
659
660        link.reset_error_counters().unwrap();
661    }
662
663    // This modifies link state, so we don't want to actually run the test
664    #[allow(dead_code)]
665    fn set_utilization_control() {
666        let nvml = nvml();
667        let device = device(&nvml);
668        let mut link = device.link_wrapper_for(0);
669
670        let settings = UtilizationControl {
671            units: UtilizationCountUnit::Cycles,
672            packet_filter: PacketTypes::NO_OP
673                | PacketTypes::READ
674                | PacketTypes::WRITE
675                | PacketTypes::RATOM
676                | PacketTypes::WITH_DATA,
677        };
678
679        link.set_utilization_control(Counter::One, settings, false)
680            .unwrap()
681    }
682
683    #[test]
684    #[ignore = "my machine does not support this call"]
685    fn utilization_control() {
686        let nvml = nvml();
687        test_with_link(3, &nvml, |link| link.utilization_control(Counter::One))
688    }
689
690    // This shouldn't be called without modifying link state, so we don't want
691    // to actually run the test
692    #[allow(dead_code)]
693    fn utilization_counter() {
694        let nvml = nvml();
695        let device = device(&nvml);
696        let link = device.link_wrapper_for(0);
697
698        link.utilization_counter(Counter::One).unwrap();
699    }
700
701    // This modifies link state, so we don't want to actually run the test
702    #[allow(dead_code)]
703    fn freeze_utilization_counter() {
704        let nvml = nvml();
705        let device = device(&nvml);
706        let mut link = device.link_wrapper_for(0);
707
708        link.freeze_utilization_counter(Counter::One).unwrap();
709    }
710
711    // This modifies link state, so we don't want to actually run the test
712    #[allow(dead_code)]
713    fn unfreeze_utilization_counter() {
714        let nvml = nvml();
715        let device = device(&nvml);
716        let mut link = device.link_wrapper_for(0);
717
718        link.unfreeze_utilization_counter(Counter::One).unwrap();
719    }
720
721    // This modifies link state, so we don't want to actually run the test
722    #[allow(dead_code)]
723    fn reset_utilization_counter() {
724        let nvml = nvml();
725        let device = device(&nvml);
726        let mut link = device.link_wrapper_for(0);
727
728        link.reset_utilization_counter(Counter::One).unwrap();
729    }
730
731    // This modifies link state, so we don't want to actually run the test
732    #[allow(dead_code)]
733    fn bw_mode() {
734        let nvml = nvml();
735        let device = device(&nvml);
736        let link = device.link_wrapper_for(0);
737
738        let mode = link.bw_mode().unwrap();
739        link.set_bw_mode(mode).unwrap();
740    }
741}