nvml_wrapper/nv_link.rs
1use crate::Device;
2
3use crate::enum_wrappers::{
4 bool_from_state,
5 nv_link::{Capability, ErrorCounter},
6 state_from_bool,
7};
8
9use crate::enums::nv_link::Counter;
10use crate::error::{nvml_sym, nvml_try, NvmlError};
11use crate::ffi::bindings::*;
12use crate::struct_wrappers::{device::PciInfo, nv_link::UtilizationControl};
13use crate::structs::nv_link::UtilizationCounter;
14
15use std::{
16 convert::TryFrom,
17 mem,
18 os::raw::{c_uint, c_ulonglong},
19};
20
21use static_assertions::assert_impl_all;
22
23/**
24Struct that represents a `Device`'s NvLink.
25
26Obtain this via `Device.link_wrapper_for()`.
27
28Lifetimes are used to enforce that each `NvLink` instance cannot be used after
29the `Device` instance it was obtained from is dropped:
30
31```compile_fail
32use nvml_wrapper::Nvml;
33# use nvml_wrapper::error::*;
34
35# fn main() -> Result<(), NvmlError> {
36let nvml = Nvml::init()?;
37let device = nvml.device_by_index(0)?;
38let link = device.link_wrapper_for(0);
39
40drop(device);
41
42// This won't compile
43link.is_active()?;
44# Ok(())
45# }
46```
47
48Note that I cannot test any `NvLink` methods myself as I do not have access to
49such a link setup. **Test the functionality in this module before you use it**.
50*/
51#[derive(Debug)]
52pub struct NvLink<'device, 'nvml: 'device> {
53 pub(crate) device: &'device Device<'nvml>,
54 pub(crate) link: c_uint,
55}
56
57assert_impl_all!(NvLink: Send, Sync);
58
59impl<'device, 'nvml: 'device> NvLink<'device, 'nvml> {
60 /// Obtain the `Device` reference stored within this struct.
61 pub fn device(&self) -> &Device {
62 self.device
63 }
64
65 /// Obtain the value of this struct's `link` field.
66 pub fn link(&self) -> u32 {
67 self.link
68 }
69
70 /**
71 Gets whether or not this `Device`'s NvLink is active.
72
73 # Errors
74
75 * `Uninitialized`, if the library has not been successfully initialized
76 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
77 is invalid
78 * `NotSupported`, if this `Device` doesn't support this feature
79 * `UnexpectedVariant`, for which you can read the docs for
80 * `Unknown`, on any unexpected error
81
82 # Device Support
83
84 Supports Pascal or newer fully supported devices.
85 */
86 // Test written
87 #[doc(alias = "nvmlDeviceGetNvLinkState")]
88 pub fn is_active(&self) -> Result<bool, NvmlError> {
89 let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkState.as_ref())?;
90
91 unsafe {
92 let mut state: nvmlEnableState_t = mem::zeroed();
93
94 nvml_try(sym(self.device.handle(), self.link, &mut state))?;
95
96 bool_from_state(state)
97 }
98 }
99
100 /**
101 Gets the NvLink version of this `Device` / `NvLink`.
102
103 # Errors
104
105 * `Uninitialized`, if the library has not been successfully initialized
106 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
107 is invalid
108 * `NotSupported`, if this `Device` doesn't support this feature
109 * `Unknown`, on any unexpected error
110
111 # Device Support
112
113 Supports Pascal or newer fully supported devices.
114 */
115 // Test written
116 #[doc(alias = "nvmlDeviceGetNvLinkVersion")]
117 pub fn version(&self) -> Result<u32, NvmlError> {
118 let sym = nvml_sym(self.device.nvml().lib.nvmlDeviceGetNvLinkVersion.as_ref())?;
119
120 unsafe {
121 let mut version: c_uint = mem::zeroed();
122
123 nvml_try(sym(self.device.handle(), self.link, &mut version))?;
124
125 Ok(version)
126 }
127 }
128
129 /**
130 Gets whether or not this `Device` / `NvLink` has a `Capability`.
131
132 # Errors
133
134 * `Uninitialized`, if the library has not been successfully initialized
135 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
136 is invalid
137 * `NotSupported`, if this `Device` doesn't support this feature
138 * `Unknown`, on any unexpected error
139
140 # Device Support
141
142 Supports Pascal or newer fully supported devices.
143 */
144 // Test written
145 #[doc(alias = "nvmlDeviceGetNvLinkCapability")]
146 pub fn has_capability(&self, cap_type: Capability) -> Result<bool, NvmlError> {
147 let sym = nvml_sym(
148 self.device
149 .nvml()
150 .lib
151 .nvmlDeviceGetNvLinkCapability
152 .as_ref(),
153 )?;
154
155 unsafe {
156 // NVIDIA says that this should be interpreted as a boolean
157 let mut capability: c_uint = mem::zeroed();
158
159 nvml_try(sym(
160 self.device.handle(),
161 self.link,
162 cap_type.as_c(),
163 &mut capability,
164 ))?;
165
166 #[allow(clippy::match_like_matches_macro)]
167 Ok(match capability {
168 0 => false,
169 // Not worth an error or a panic if the value is > 1
170 _ => true,
171 })
172 }
173 }
174
175 /**
176 Gets the PCI information for this `NvLink`'s remote node.
177
178 # Errors
179
180 * `Uninitialized`, if the library has not been successfully initialized
181 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
182 is invalid
183 * `NotSupported`, if this `Device` doesn't support this feature
184 * `Unknown`, on any unexpected error
185
186 # Device Support
187
188 Supports Pascal or newer fully supported devices.
189 */
190 // Test written
191 #[doc(alias = "nvmlDeviceGetNvLinkRemotePciInfo_v2")]
192 pub fn remote_pci_info(&self) -> Result<PciInfo, NvmlError> {
193 let sym = nvml_sym(
194 self.device
195 .nvml()
196 .lib
197 .nvmlDeviceGetNvLinkRemotePciInfo_v2
198 .as_ref(),
199 )?;
200
201 unsafe {
202 let mut pci_info: nvmlPciInfo_t = mem::zeroed();
203
204 nvml_try(sym(self.device.handle(), self.link, &mut pci_info))?;
205
206 PciInfo::try_from(pci_info, false)
207 }
208 }
209
210 /**
211 Gets the specified `ErrorCounter` value.
212
213 # Errors
214
215 * `Uninitialized`, if the library has not been successfully initialized
216 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
217 is invalid
218 * `NotSupported`, if this `Device` doesn't support this feature
219 * `Unknown`, on any unexpected error
220
221 # Device Support
222
223 Supports Pascal or newer fully supported devices.
224 */
225 // Test written
226 #[doc(alias = "nvmlDeviceGetNvLinkErrorCounter")]
227 pub fn error_counter(&self, counter: ErrorCounter) -> Result<u64, NvmlError> {
228 let sym = nvml_sym(
229 self.device
230 .nvml()
231 .lib
232 .nvmlDeviceGetNvLinkErrorCounter
233 .as_ref(),
234 )?;
235
236 unsafe {
237 let mut value: c_ulonglong = mem::zeroed();
238
239 nvml_try(sym(
240 self.device.handle(),
241 self.link,
242 counter.as_c(),
243 &mut value,
244 ))?;
245
246 Ok(value)
247 }
248 }
249
250 /**
251 Resets all error counters to zero.
252
253 # Errors
254
255 * `Uninitialized`, if the library has not been successfully initialized
256 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
257 is invalid
258 * `NotSupported`, if this `Device` doesn't support this feature
259 * `Unknown`, on any unexpected error
260
261 # Device Support
262
263 Supports Pascal or newer fully supported devices.
264 */
265 // No-run test written
266 #[doc(alias = "nvmlDeviceResetNvLinkErrorCounters")]
267 pub fn reset_error_counters(&mut self) -> Result<(), NvmlError> {
268 let sym = nvml_sym(
269 self.device
270 .nvml()
271 .lib
272 .nvmlDeviceResetNvLinkErrorCounters
273 .as_ref(),
274 )?;
275
276 unsafe { nvml_try(sym(self.device.handle(), self.link)) }
277 }
278
279 /**
280 Sets the NvLink utilization counter control information for the specified
281 `Counter`.
282
283 The counters will be reset if `reset_counters` is true.
284
285 # Errors
286
287 * `Uninitialized`, if the library has not been successfully initialized
288 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
289 is invalid
290 * `NotSupported`, if this `Device` doesn't support this feature
291 * `Unknown`, on any unexpected error
292
293 # Device Support
294
295 Supports Pascal or newer fully supported devices.
296 */
297 // No-run test written
298 #[doc(alias = "nvmlDeviceSetNvLinkUtilizationControl")]
299 pub fn set_utilization_control(
300 &mut self,
301 counter: Counter,
302 settings: UtilizationControl,
303 reset_counters: bool,
304 ) -> Result<(), NvmlError> {
305 let reset: c_uint = u32::from(reset_counters);
306
307 let sym = nvml_sym(
308 self.device
309 .nvml()
310 .lib
311 .nvmlDeviceSetNvLinkUtilizationControl
312 .as_ref(),
313 )?;
314
315 unsafe {
316 nvml_try(sym(
317 self.device.handle(),
318 self.link,
319 counter as c_uint,
320 &mut settings.as_c(),
321 reset,
322 ))
323 }
324 }
325
326 /**
327 Gets the NvLink utilization counter control information for the specified
328 `Counter`.
329
330 # Errors
331
332 * `Uninitialized`, if the library has not been successfully initialized
333 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
334 is invalid
335 * `NotSupported`, if this `Device` doesn't support this feature
336 * `Unknown`, on any unexpected error
337
338 # Device Support
339
340 Supports Pascal or newer fully supported devices.
341 */
342 // Test written
343 #[doc(alias = "nvmlDeviceGetNvLinkUtilizationControl")]
344 pub fn utilization_control(&self, counter: Counter) -> Result<UtilizationControl, NvmlError> {
345 let sym = nvml_sym(
346 self.device
347 .nvml()
348 .lib
349 .nvmlDeviceGetNvLinkUtilizationControl
350 .as_ref(),
351 )?;
352
353 unsafe {
354 let mut controls: nvmlNvLinkUtilizationControl_t = mem::zeroed();
355
356 nvml_try(sym(
357 self.device.handle(),
358 self.link,
359 counter as c_uint,
360 &mut controls,
361 ))?;
362
363 UtilizationControl::try_from(controls)
364 }
365 }
366
367 /**
368 Gets the NvLink utilization counter for the given `counter`.
369
370 The retrieved values are based on the current controls set for the specified
371 `Counter`. **You should use `.set_utilization_control()` before calling this**
372 as the utilization counters have no default state.
373
374 I do not attempt to verify, statically or at runtime, that you have controls
375 set for `counter` prior to calling this method on `counter`. NVIDIA says that
376 it is "In general\[,\] good practice", which does not sound to me as if it
377 is in any way unsafe to make this call without having set controls. I don't
378 believe it's worth the overhead of using a `Mutex`'d bool to track whether
379 or not you have set controls, and it's certainly not worth the effort to
380 statically verify it via the type system.
381
382 That being said, I don't know what exactly would happen, either, and I have
383 no means of finding out. If you do and discover that garbage values are
384 returned, for instance, I would love to hear about it; that would likely
385 cause this decision to be reconsidered.
386
387 # Errors
388
389 * `Uninitialized`, if the library has not been successfully initialized
390 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
391 is invalid
392 * `NotSupported`, if this `Device` doesn't support this feature
393 * `Unknown`, on any unexpected error
394
395 # Device Support
396
397 Supports Pascal or newer fully supported devices.
398 */
399 // No-run test written
400 #[doc(alias = "nvmlDeviceGetNvLinkUtilizationCounter")]
401 pub fn utilization_counter(&self, counter: Counter) -> Result<UtilizationCounter, NvmlError> {
402 let sym = nvml_sym(
403 self.device
404 .nvml()
405 .lib
406 .nvmlDeviceGetNvLinkUtilizationCounter
407 .as_ref(),
408 )?;
409
410 unsafe {
411 let mut receive: c_ulonglong = mem::zeroed();
412 let mut send: c_ulonglong = mem::zeroed();
413
414 nvml_try(sym(
415 self.device.handle(),
416 self.link,
417 counter as c_uint,
418 &mut receive,
419 &mut send,
420 ))?;
421
422 Ok(UtilizationCounter { receive, send })
423 }
424 }
425
426 /**
427 Freezes the specified NvLink utilization `Counter`.
428
429 Both the receive and send counters will be frozen (if I'm reading NVIDIA's
430 meaning correctly).
431
432 # Errors
433
434 * `Uninitialized`, if the library has not been successfully initialized
435 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
436 is invalid
437 * `NotSupported`, if this `Device` doesn't support this feature
438 * `Unknown`, on any unexpected error
439
440 # Device Support
441
442 Supports Pascal or newer fully supported devices.
443 */
444 // No-run test written
445 #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
446 pub fn freeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
447 self.set_utilization_counter_frozen(counter, true)
448 }
449
450 /**
451 Unfreezes the specified NvLink utilization `Counter`.
452
453 Both the receive and send counters will be unfrozen (if I'm reading NVIDIA's
454 meaning correctly).
455
456 # Errors
457
458 * `Uninitialized`, if the library has not been successfully initialized
459 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
460 is invalid
461 * `NotSupported`, if this `Device` doesn't support this feature
462 * `Unknown`, on any unexpected error
463
464 # Device Support
465
466 Supports Pascal or newer fully supported devices.
467 */
468 // No-run test written
469 #[doc(alias = "nvmlDeviceFreezeNvLinkUtilizationCounter")]
470 pub fn unfreeze_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
471 self.set_utilization_counter_frozen(counter, false)
472 }
473
474 fn set_utilization_counter_frozen(
475 &mut self,
476 counter: Counter,
477 frozen: bool,
478 ) -> Result<(), NvmlError> {
479 let sym = nvml_sym(
480 self.device
481 .nvml()
482 .lib
483 .nvmlDeviceFreezeNvLinkUtilizationCounter
484 .as_ref(),
485 )?;
486
487 unsafe {
488 nvml_try(sym(
489 self.device.handle(),
490 self.link,
491 counter as c_uint,
492 state_from_bool(frozen),
493 ))
494 }
495 }
496
497 /**
498 Resets the specified NvLink utilization `Counter`.
499
500 Both the receive and send counters will be rest (if I'm reading NVIDIA's
501 meaning correctly).
502
503 # Errors
504
505 * `Uninitialized`, if the library has not been successfully initialized
506 * `InvalidArg`, if the `link` or `Device` within this `NvLink` struct instance
507 is invalid
508 * `NotSupported`, if this `Device` doesn't support this feature
509 * `Unknown`, on any unexpected error
510
511 # Device Support
512
513 Supports Pascal or newer fully supported devices.
514 */
515 // No-run test written
516 #[doc(alias = "nvmlDeviceResetNvLinkUtilizationCounter")]
517 pub fn reset_utilization_counter(&mut self, counter: Counter) -> Result<(), NvmlError> {
518 let sym = nvml_sym(
519 self.device
520 .nvml()
521 .lib
522 .nvmlDeviceResetNvLinkUtilizationCounter
523 .as_ref(),
524 )?;
525
526 unsafe { nvml_try(sym(self.device.handle(), self.link, counter as c_uint)) }
527 }
528}
529
530#[cfg(test)]
531#[deny(unused_mut)]
532mod test {
533 use crate::bitmasks::nv_link::*;
534 use crate::enum_wrappers::nv_link::*;
535 use crate::enums::nv_link::*;
536 use crate::struct_wrappers::nv_link::*;
537 use crate::test_utils::*;
538
539 #[test]
540 #[ignore = "my machine does not support this call"]
541 fn is_active() {
542 let nvml = nvml();
543 test_with_link(3, &nvml, |link| link.is_active())
544 }
545
546 #[test]
547 #[ignore = "my machine does not support this call"]
548 fn version() {
549 let nvml = nvml();
550 test_with_link(3, &nvml, |link| link.version())
551 }
552
553 #[test]
554 #[ignore = "my machine does not support this call"]
555 fn has_capability() {
556 let nvml = nvml();
557 test_with_link(3, &nvml, |link| link.has_capability(Capability::P2p))
558 }
559
560 #[test]
561 #[ignore = "my machine does not support this call"]
562 fn remote_pci_info() {
563 let nvml = nvml();
564 test_with_link(3, &nvml, |link| {
565 let info = link.remote_pci_info()?;
566 assert_eq!(info.pci_sub_system_id, None);
567 Ok(info)
568 })
569 }
570
571 #[test]
572 #[ignore = "my machine does not support this call"]
573 fn error_counter() {
574 let nvml = nvml();
575 test_with_link(3, &nvml, |link| {
576 link.error_counter(ErrorCounter::DlRecovery)
577 })
578 }
579
580 // This modifies link state, so we don't want to actually run the test
581 #[allow(dead_code)]
582 fn reset_error_counters() {
583 let nvml = nvml();
584 let device = device(&nvml);
585 let mut link = device.link_wrapper_for(0);
586
587 link.reset_error_counters().unwrap();
588 }
589
590 // This modifies link state, so we don't want to actually run the test
591 #[allow(dead_code)]
592 fn set_utilization_control() {
593 let nvml = nvml();
594 let device = device(&nvml);
595 let mut link = device.link_wrapper_for(0);
596
597 let settings = UtilizationControl {
598 units: UtilizationCountUnit::Cycles,
599 packet_filter: PacketTypes::NO_OP
600 | PacketTypes::READ
601 | PacketTypes::WRITE
602 | PacketTypes::RATOM
603 | PacketTypes::WITH_DATA,
604 };
605
606 link.set_utilization_control(Counter::One, settings, false)
607 .unwrap()
608 }
609
610 #[test]
611 #[ignore = "my machine does not support this call"]
612 fn utilization_control() {
613 let nvml = nvml();
614 test_with_link(3, &nvml, |link| link.utilization_control(Counter::One))
615 }
616
617 // This shouldn't be called without modifying link state, so we don't want
618 // to actually run the test
619 #[allow(dead_code)]
620 fn utilization_counter() {
621 let nvml = nvml();
622 let device = device(&nvml);
623 let link = device.link_wrapper_for(0);
624
625 link.utilization_counter(Counter::One).unwrap();
626 }
627
628 // This modifies link state, so we don't want to actually run the test
629 #[allow(dead_code)]
630 fn freeze_utilization_counter() {
631 let nvml = nvml();
632 let device = device(&nvml);
633 let mut link = device.link_wrapper_for(0);
634
635 link.freeze_utilization_counter(Counter::One).unwrap();
636 }
637
638 // This modifies link state, so we don't want to actually run the test
639 #[allow(dead_code)]
640 fn unfreeze_utilization_counter() {
641 let nvml = nvml();
642 let device = device(&nvml);
643 let mut link = device.link_wrapper_for(0);
644
645 link.unfreeze_utilization_counter(Counter::One).unwrap();
646 }
647
648 // This modifies link state, so we don't want to actually run the test
649 #[allow(dead_code)]
650 fn reset_utilization_counter() {
651 let nvml = nvml();
652 let device = device(&nvml);
653 let mut link = device.link_wrapper_for(0);
654
655 link.reset_utilization_counter(Counter::One).unwrap();
656 }
657}