nvml_wrapper/
nv_link.rs

1use crate::Device;
2
3use crate::enum_wrappers::{
4    bool_from_state,
5    nv_link::{Capability, ErrorCounter},
6    state_from_bool,
7};
8
9use crate::enums::nv_link::Counter;
10use crate::error::{nvml_sym, nvml_try, NvmlError};
11use crate::ffi::bindings::*;
12use crate::struct_wrappers::{device::PciInfo, nv_link::UtilizationControl};
13use crate::structs::nv_link::UtilizationCounter;
14
15use std::{
16    convert::TryFrom,
17    mem,
18    os::raw::{c_uint, c_ulonglong},
19};
20
21use static_assertions::assert_impl_all;
22
23/**
24Struct that represents a `Device`'s NvLink.
25
26Obtain this via `Device.link_wrapper_for()`.
27
28Lifetimes are used to enforce that each `NvLink` instance cannot be used after
29the `Device` instance it was obtained from is dropped:
30
31```compile_fail
32use nvml_wrapper::Nvml;
33# use nvml_wrapper::error::*;
34
35# fn main() -> Result<(), NvmlError> {
36let nvml = Nvml::init()?;
37let device = nvml.device_by_index(0)?;
38let link = device.link_wrapper_for(0);
39
40drop(device);
41
42// This won't compile
43link.is_active()?;
44# Ok(())
45# }
46```
47
48Note that I cannot test any `NvLink` methods myself as I do not have access to
49such a link setup. **Test the functionality in this module before you use it**.
50*/
51#[derive(Debug)]
52pub struct NvLink<'device, 'nvml: 'device> {
53    pub(crate) device: &'device Device<'nvml>,
54    pub(crate) link: c_uint,
55}
56
57assert_impl_all!(NvLink: Send, Sync);
58
59impl<'device, 'nvml: 'device> NvLink<'device, 'nvml> {
60    /// Obtain the `Device` reference stored within this struct.
61    pub fn device(&self) -> &Device {
62        self.device
63    }
64
65    /// Obtain the value of this struct's `link` field.
66    pub fn link(&self) -> u32 {
67        self.link
68    }
69
70    /**
71    Gets whether or not this `Device`'s NvLink is active.
72
73    # Errors
74
75    * `Uninitialized`, if the library has not been successfully initialized
76    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
77    is invalid
78    * `NotSupported`, if this `Device` doesn't support this feature
79    * `UnexpectedVariant`, for which you can read the docs for
80    * `Unknown`, on any unexpected error
81
82    # Device Support
83
84    Supports Pascal or newer fully supported devices.
85    */
86    // Test written
87    #[doc(alias = "nvmlDeviceGetNvLinkState")]
88    pub fn is_active(&self) -> Result<bool, NvmlError> {
89        let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkState.as_ref())?;
90
91        unsafe {
92            let mut state: nvmlEnableState_t = mem::zeroed();
93
94            nvml_try(sym(self.device.handle(), self.link, &mut state))?;
95
96            bool_from_state(state)
97        }
98    }
99
100    /**
101    Gets the NvLink version of this `Device` / `NvLink`.
102
103    # Errors
104
105    * `Uninitialized`, if the library has not been successfully initialized
106    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
107    is invalid
108    * `NotSupported`, if this `Device` doesn't support this feature
109    * `Unknown`, on any unexpected error
110
111    # Device Support
112
113    Supports Pascal or newer fully supported devices.
114    */
115    // Test written
116    #[doc(alias = "nvmlDeviceGetNvLinkVersion")]
117    pub fn version(&self) -> Result<u32, NvmlError> {
118        let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkVersion.as_ref())?;
119
120        unsafe {
121            let mut version: c_uint = mem::zeroed();
122
123            nvml_try(sym(self.device.handle(), self.link, &mut version))?;
124
125            Ok(version)
126        }
127    }
128
129    /**
130    Gets whether or not this `Device` / `NvLink` has a `Capability`.
131
132    # Errors
133
134    * `Uninitialized`, if the library has not been successfully initialized
135    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
136    is invalid
137    * `NotSupported`, if this `Device` doesn't support this feature
138    * `Unknown`, on any unexpected error
139
140    # Device Support
141
142    Supports Pascal or newer fully supported devices.
143    */
144    // Test written
145    #[doc(alias = "nvmlDeviceGetNvLinkCapability")]
146    pub fn has_capability(&self, cap_type: Capability) -> Result<bool, NvmlError> {
147        let sym = nvml_sym(
148            self.device
149                .nvml()
150                .lib
151                .nvmlDeviceGetNvLinkCapability
152                .as_ref(),
153        )?;
154
155        unsafe {
156            // NVIDIA says that this should be interpreted as a boolean
157            let mut capability: c_uint = mem::zeroed();
158
159            nvml_try(sym(
160                self.device.handle(),
161                self.link,
162                cap_type.as_c(),
163                &mut capability,
164            ))?;
165
166            #[allow(clippy::match_like_matches_macro)]
167            Ok(match capability {
168                0 => false,
169                // Not worth an error or a panic if the value is > 1
170                _ => true,
171            })
172        }
173    }
174
175    /**
176    Gets the PCI information for this `NvLink`'s remote node.
177
178    # Errors
179
180    * `Uninitialized`, if the library has not been successfully initialized
181    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
182    is invalid
183    * `NotSupported`, if this `Device` doesn't support this feature
184    * `Unknown`, on any unexpected error
185
186    # Device Support
187
188    Supports Pascal or newer fully supported devices.
189    */
190    // Test written
191    #[doc(alias = "nvmlDeviceGetNvLinkRemotePciInfo_v2")]
192    pub fn remote_pci_info(&self) -> Result<PciInfo, NvmlError> {
193        let sym = nvml_sym(
194            self.device
195                .nvml()
196                .lib
197                .nvmlDeviceGetNvLinkRemotePciInfo_v2
198                .as_ref(),
199        )?;
200
201        unsafe {
202            let mut pci_info: nvmlPciInfo_t = mem::zeroed();
203
204            nvml_try(sym(self.device.handle(), self.link, &mut pci_info))?;
205
206            PciInfo::try_from(pci_info, false)
207        }
208    }
209
210    /**
211    Gets the specified `ErrorCounter` value.
212
213    # Errors
214
215    * `Uninitialized`, if the library has not been successfully initialized
216    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
217    is invalid
218    * `NotSupported`, if this `Device` doesn't support this feature
219    * `Unknown`, on any unexpected error
220
221    # Device Support
222
223    Supports Pascal or newer fully supported devices.
224    */
225    // Test written
226    #[doc(alias = "nvmlDeviceGetNvLinkErrorCounter")]
227    pub fn error_counter(&self, counter: ErrorCounter) -> Result<u64, NvmlError> {
228        let sym = nvml_sym(
229            self.device
230                .nvml()
231                .lib
232                .nvmlDeviceGetNvLinkErrorCounter
233                .as_ref(),
234        )?;
235
236        unsafe {
237            let mut value: c_ulonglong = mem::zeroed();
238
239            nvml_try(sym(
240                self.device.handle(),
241                self.link,
242                counter.as_c(),
243                &mut value,
244            ))?;
245
246            Ok(value)
247        }
248    }
249
250    /**
251    Resets all error counters to zero.
252
253    # Errors
254
255    * `Uninitialized`, if the library has not been successfully initialized
256    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
257    is invalid
258    * `NotSupported`, if this `Device` doesn't support this feature
259    * `Unknown`, on any unexpected error
260
261    # Device Support
262
263    Supports Pascal or newer fully supported devices.
264    */
265    // No-run test written
266    #[doc(alias = "nvmlDeviceResetNvLinkErrorCounters")]
267    pub fn reset_error_counters(&mut self) -> Result<(), NvmlError> {
268        let sym = nvml_sym(
269            self.device
270                .nvml()
271                .lib
272                .nvmlDeviceResetNvLinkErrorCounters
273                .as_ref(),
274        )?;
275
276        unsafe { nvml_try(sym(self.device.handle(), self.link)) }
277    }
278
279    /**
280    Sets the NvLink utilization counter control information for the specified
281    `Counter`.
282
283    The counters will be reset if `reset_counters` is true.
284
285    # Errors
286
287    * `Uninitialized`, if the library has not been successfully initialized
288    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
289    is invalid
290    * `NotSupported`, if this `Device` doesn't support this feature
291    * `Unknown`, on any unexpected error
292
293    # Device Support
294
295    Supports Pascal or newer fully supported devices.
296    */
297    // No-run test written
298    #[doc(alias = "nvmlDeviceSetNvLinkUtilizationControl")]
299    pub fn set_utilization_control(
300        &mut self,
301        counter: Counter,
302        settings: UtilizationControl,
303        reset_counters: bool,
304    ) -> Result<(), NvmlError> {
305        let reset: c_uint = u32::from(reset_counters);
306
307        let sym = nvml_sym(
308            self.device
309                .nvml()
310                .lib
311                .nvmlDeviceSetNvLinkUtilizationControl
312                .as_ref(),
313        )?;
314
315        unsafe {
316            nvml_try(sym(
317                self.device.handle(),
318                self.link,
319                counter as c_uint,
320                &mut settings.as_c(),
321                reset,
322            ))
323        }
324    }
325
326    /**
327    Gets the NvLink utilization counter control information for the specified
328    `Counter`.
329
330    # Errors
331
332    * `Uninitialized`, if the library has not been successfully initialized
333    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
334    is invalid
335    * `NotSupported`, if this `Device` doesn't support this feature
336    * `Unknown`, on any unexpected error
337
338    # Device Support
339
340    Supports Pascal or newer fully supported devices.
341    */
342    // Test written
343    #[doc(alias = "nvmlDeviceGetNvLinkUtilizationControl")]
344    pub fn utilization_control(&self, counter: Counter) -> Result<UtilizationControl, NvmlError> {
345        let sym = nvml_sym(
346            self.device
347                .nvml()
348                .lib
349                .nvmlDeviceGetNvLinkUtilizationControl
350                .as_ref(),
351        )?;
352
353        unsafe {
354            let mut controls: nvmlNvLinkUtilizationControl_t = mem::zeroed();
355
356            nvml_try(sym(
357                self.device.handle(),
358                self.link,
359                counter as c_uint,
360                &mut controls,
361            ))?;
362
363            UtilizationControl::try_from(controls)
364        }
365    }
366
367    /**
368    Gets the NvLink utilization counter for the given `counter`.
369
370    The retrieved values are based on the current controls set for the specified
371    `Counter`. **You should use `.set_utilization_control()` before calling this**
372    as the utilization counters have no default state.
373
374    I do not attempt to verify, statically or at runtime, that you have controls
375    set for `counter` prior to calling this method on `counter`. NVIDIA says that
376    it is "In general\[,\] good practice", which does not sound to me as if it
377    is in any way unsafe to make this call without having set controls. I don't
378    believe it's worth the overhead of using a `Mutex`'d bool to track whether
379    or not you have set controls, and it's certainly not worth the effort to
380    statically verify it via the type system.
381
382    That being said, I don't know what exactly would happen, either, and I have
383    no means of finding out. If you do and discover that garbage values are
384    returned, for instance, I would love to hear about it; that would likely
385    cause this decision to be reconsidered.
386
387    # Errors
388
389    * `Uninitialized`, if the library has not been successfully initialized
390    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
391    is invalid
392    * `NotSupported`, if this `Device` doesn't support this feature
393    * `Unknown`, on any unexpected error
394
395    # Device Support
396
397    Supports Pascal or newer fully supported devices.
398    */
399    // No-run test written
400    #[doc(alias = "nvmlDeviceGetNvLinkUtilizationCounter")]
401    pub fn utilization_counter(&self, counter: Counter) -> Result<UtilizationCounter, NvmlError> {
402        let sym = nvml_sym(
403            self.device
404                .nvml()
405                .lib
406                .nvmlDeviceGetNvLinkUtilizationCounter
407                .as_ref(),
408        )?;
409
410        unsafe {
411            let mut receive: c_ulonglong = mem::zeroed();
412            let mut send: c_ulonglong = mem::zeroed();
413
414            nvml_try(sym(
415                self.device.handle(),
416                self.link,
417                counter as c_uint,
418                &mut receive,
419                &mut send,
420            ))?;
421
422            Ok(UtilizationCounter { receive, send })
423        }
424    }
425
426    /**
427    Freezes the specified NvLink utilization `Counter`.
428
429    Both the receive and send counters will be frozen (if I'm reading NVIDIA's
430    meaning correctly).
431
432    # Errors
433
434    * `Uninitialized`, if the library has not been successfully initialized
435    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
436    is invalid
437    * `NotSupported`, if this `Device` doesn't support this feature
438    * `Unknown`, on any unexpected error
439
440    # Device Support
441
442    Supports Pascal or newer fully supported devices.
443    */
444    // No-run test written
445    #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
446    pub fn freeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
447        self.set_utilization_counter_frozen(counter, true)
448    }
449
450    /**
451    Unfreezes the specified NvLink utilization `Counter`.
452
453    Both the receive and send counters will be unfrozen (if I'm reading NVIDIA's
454    meaning correctly).
455
456    # Errors
457
458    * `Uninitialized`, if the library has not been successfully initialized
459    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
460    is invalid
461    * `NotSupported`, if this `Device` doesn't support this feature
462    * `Unknown`, on any unexpected error
463
464    # Device Support
465
466    Supports Pascal or newer fully supported devices.
467    */
468    // No-run test written
469    #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
470    pub fn unfreeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
471        self.set_utilization_counter_frozen(counter, false)
472    }
473
474    fn set_utilization_counter_frozen(
475        &mut self,
476        counter: Counter,
477        frozen: bool,
478    ) -> Result<(), NvmlError> {
479        let sym = nvml_sym(
480            self.device
481                .nvml()
482                .lib
483                .nvmlDeviceFreezeNvLinkUtilizationCounter
484                .as_ref(),
485        )?;
486
487        unsafe {
488            nvml_try(sym(
489                self.device.handle(),
490                self.link,
491                counter as c_uint,
492                state_from_bool(frozen),
493            ))
494        }
495    }
496
497    /**
498    Resets the specified NvLink utilization `Counter`.
499
500    Both the receive and send counters will be rest (if I'm reading NVIDIA's
501    meaning correctly).
502
503    # Errors
504
505    * `Uninitialized`, if the library has not been successfully initialized
506    * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
507    is invalid
508    * `NotSupported`, if this `Device` doesn't support this feature
509    * `Unknown`, on any unexpected error
510
511    # Device Support
512
513    Supports Pascal or newer fully supported devices.
514    */
515    // No-run test written
516    #[doc(alias = "nvmlDeviceResetNvLinkUtilizationCounter")]
517    pub fn reset_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
518        let sym = nvml_sym(
519            self.device
520                .nvml()
521                .lib
522                .nvmlDeviceResetNvLinkUtilizationCounter
523                .as_ref(),
524        )?;
525
526        unsafe { nvml_try(sym(self.device.handle(), self.link, counter as c_uint)) }
527    }
528}
529
530#[cfg(test)]
531#[deny(unused_mut)]
532mod test {
533    use crate::bitmasks::nv_link::*;
534    use crate::enum_wrappers::nv_link::*;
535    use crate::enums::nv_link::*;
536    use crate::struct_wrappers::nv_link::*;
537    use crate::test_utils::*;
538
539    #[test]
540    #[ignore = "my machine does not support this call"]
541    fn is_active() {
542        let nvml = nvml();
543        test_with_link(3, &nvml, |link| link.is_active())
544    }
545
546    #[test]
547    #[ignore = "my machine does not support this call"]
548    fn version() {
549        let nvml = nvml();
550        test_with_link(3, &nvml, |link| link.version())
551    }
552
553    #[test]
554    #[ignore = "my machine does not support this call"]
555    fn has_capability() {
556        let nvml = nvml();
557        test_with_link(3, &nvml, |link| link.has_capability(Capability::P2p))
558    }
559
560    #[test]
561    #[ignore = "my machine does not support this call"]
562    fn remote_pci_info() {
563        let nvml = nvml();
564        test_with_link(3, &nvml, |link| {
565            let info = link.remote_pci_info()?;
566            assert_eq!(info.pci_sub_system_id, None);
567            Ok(info)
568        })
569    }
570
571    #[test]
572    #[ignore = "my machine does not support this call"]
573    fn error_counter() {
574        let nvml = nvml();
575        test_with_link(3, &nvml, |link| {
576            link.error_counter(ErrorCounter::DlRecovery)
577        })
578    }
579
580    // This modifies link state, so we don't want to actually run the test
581    #[allow(dead_code)]
582    fn reset_error_counters() {
583        let nvml = nvml();
584        let device = device(&nvml);
585        let mut link = device.link_wrapper_for(0);
586
587        link.reset_error_counters().unwrap();
588    }
589
590    // This modifies link state, so we don't want to actually run the test
591    #[allow(dead_code)]
592    fn set_utilization_control() {
593        let nvml = nvml();
594        let device = device(&nvml);
595        let mut link = device.link_wrapper_for(0);
596
597        let settings = UtilizationControl {
598            units: UtilizationCountUnit::Cycles,
599            packet_filter: PacketTypes::NO_OP
600                | PacketTypes::READ
601                | PacketTypes::WRITE
602                | PacketTypes::RATOM
603                | PacketTypes::WITH_DATA,
604        };
605
606        link.set_utilization_control(Counter::One, settings, false)
607            .unwrap()
608    }
609
610    #[test]
611    #[ignore = "my machine does not support this call"]
612    fn utilization_control() {
613        let nvml = nvml();
614        test_with_link(3, &nvml, |link| link.utilization_control(Counter::One))
615    }
616
617    // This shouldn't be called without modifying link state, so we don't want
618    // to actually run the test
619    #[allow(dead_code)]
620    fn utilization_counter() {
621        let nvml = nvml();
622        let device = device(&nvml);
623        let link = device.link_wrapper_for(0);
624
625        link.utilization_counter(Counter::One).unwrap();
626    }
627
628    // This modifies link state, so we don't want to actually run the test
629    #[allow(dead_code)]
630    fn freeze_utilization_counter() {
631        let nvml = nvml();
632        let device = device(&nvml);
633        let mut link = device.link_wrapper_for(0);
634
635        link.freeze_utilization_counter(Counter::One).unwrap();
636    }
637
638    // This modifies link state, so we don't want to actually run the test
639    #[allow(dead_code)]
640    fn unfreeze_utilization_counter() {
641        let nvml = nvml();
642        let device = device(&nvml);
643        let mut link = device.link_wrapper_for(0);
644
645        link.unfreeze_utilization_counter(Counter::One).unwrap();
646    }
647
648    // This modifies link state, so we don't want to actually run the test
649    #[allow(dead_code)]
650    fn reset_utilization_counter() {
651        let nvml = nvml();
652        let device = device(&nvml);
653        let mut link = device.link_wrapper_for(0);
654
655        link.reset_utilization_counter(Counter::One).unwrap();
656    }
657}