Skip to main content

nvml_wrapper/
gpm.rs

1use crate::enums::gpm::GpmMetricId;
2use crate::error::{nvml_sym, nvml_try, NvmlError};
3use crate::ffi::bindings::*;
4use crate::struct_wrappers::gpm::GpmMetricResult;
5use crate::Nvml;
6
7use std::mem;
8
9/**
10Handle to a GPM (GPU Performance Monitoring) sample.
11
12GPM enables collecting fine-grained GPU performance metrics (SM occupancy,
13tensor utilization, PCIe/NVLink bandwidth, etc.) on Hopper+ GPUs. Metrics
14are computed by taking two time-separated samples and comparing them via
15[`gpm_metrics_get`].
16
17**Operations on a sample are not thread-safe.** It does not, therefore,
18implement `Sync`.
19
20You can obtain a `GpmSample` via [`crate::Device::gpm_sample()`] or
21[`crate::Device::gpm_mig_sample()`].
22
23Lifetimes are used to enforce that each `GpmSample` instance cannot be used
24after the `Nvml` instance it was obtained from is dropped.
25*/
26#[derive(Debug)]
27pub struct GpmSample<'nvml> {
28    sample: nvmlGpmSample_t,
29    nvml: &'nvml Nvml,
30}
31
32unsafe impl<'nvml> Send for GpmSample<'nvml> {}
33
34impl<'nvml> GpmSample<'nvml> {
35    /// Allocate a new GPM sample.
36    ///
37    /// # Errors
38    ///
39    /// * `Uninitialized`, if the library has not been successfully initialized
40    /// * `Unknown`, on any unexpected error
41    #[doc(alias = "nvmlGpmSampleAlloc")]
42    pub(crate) fn alloc(nvml: &'nvml Nvml) -> Result<Self, NvmlError> {
43        let sym = nvml_sym(nvml.lib.nvmlGpmSampleAlloc.as_ref())?;
44
45        unsafe {
46            let mut sample: nvmlGpmSample_t = mem::zeroed();
47            nvml_try(sym(&mut sample))?;
48
49            Ok(Self { sample, nvml })
50        }
51    }
52
53    /**
54    Use this to free the sample if you care about handling potential errors
55    (*the `Drop` implementation ignores errors!*).
56
57    # Errors
58
59    * `Uninitialized`, if the library has not been successfully initialized
60    * `Unknown`, on any unexpected error
61    */
62    #[doc(alias = "nvmlGpmSampleFree")]
63    pub fn free(self) -> Result<(), NvmlError> {
64        let sym = nvml_sym(self.nvml.lib.nvmlGpmSampleFree.as_ref())?;
65
66        unsafe {
67            nvml_try(sym(self.sample))?;
68        }
69
70        mem::forget(self);
71        Ok(())
72    }
73
74    /// Get the raw sample handle.
75    ///
76    /// # Safety
77    ///
78    /// This is unsafe to prevent it from being used without care. In
79    /// particular, you must avoid creating a new `GpmSample` from this handle
80    /// and allowing both this `GpmSample` and the newly created one to drop
81    /// (which would result in a double-free).
82    pub unsafe fn handle(&self) -> nvmlGpmSample_t {
83        self.sample
84    }
85
86    /// Get a reference to the `Nvml` instance this sample was created from.
87    pub fn nvml(&self) -> &'nvml Nvml {
88        self.nvml
89    }
90}
91
92/// This `Drop` implementation ignores errors! Use the `.free()` method on
93/// the `GpmSample` struct if you care about handling them.
94impl<'nvml> Drop for GpmSample<'nvml> {
95    #[doc(alias = "nvmlGpmSampleFree")]
96    fn drop(&mut self) {
97        unsafe {
98            self.nvml.lib.nvmlGpmSampleFree(self.sample);
99        }
100    }
101}
102
103/**
104Retrieve GPM metrics computed between two time-separated samples.
105
106The two samples should have been previously populated via
107[`crate::Device::gpm_sample()`] or [`crate::Device::gpm_mig_sample()`].
108
109Returns a `Vec` with one entry per requested metric. Each entry is itself
110a `Result`: the outer `Result` covers transport-level errors, while the
111inner `Result` covers per-metric failures (e.g. a metric not supported on
112the current GPU).
113
114# Errors
115
116* `Uninitialized`, if the library has not been successfully initialized
117* `InvalidArg`, if any argument is invalid
118* `NotSupported`, if GPM is not supported
119* `Unknown`, on any unexpected error
120
121# Panics
122
123Panics if more than 98 metrics are requested (the maximum supported by NVML).
124
125# Device Support
126
127Supports Hopper and newer architectures.
128*/
129#[doc(alias = "nvmlGpmMetricsGet")]
130pub fn gpm_metrics_get<'nvml>(
131    nvml: &'nvml Nvml,
132    sample1: &GpmSample<'nvml>,
133    sample2: &GpmSample<'nvml>,
134    metric_ids: &[GpmMetricId],
135) -> Result<Vec<Result<GpmMetricResult, NvmlError>>, NvmlError> {
136    assert!(
137        metric_ids.len() <= nvmlGpmMetricId_t_NVML_GPM_METRIC_MAX as usize,
138        "cannot request more than {} GPM metrics at once",
139        nvmlGpmMetricId_t_NVML_GPM_METRIC_MAX
140    );
141
142    let sym = nvml_sym(nvml.lib.nvmlGpmMetricsGet.as_ref())?;
143
144    unsafe {
145        let mut request: nvmlGpmMetricsGet_t = mem::zeroed();
146        request.version = NVML_GPM_METRICS_GET_VERSION;
147        request.numMetrics = metric_ids.len() as u32;
148        request.sample1 = sample1.sample;
149        request.sample2 = sample2.sample;
150
151        for (i, id) in metric_ids.iter().enumerate() {
152            request.metrics[i].metricId = id.as_c();
153        }
154
155        nvml_try(sym(&mut request))?;
156
157        let results = (0..metric_ids.len())
158            .map(|i| GpmMetricResult::try_from_c(&request.metrics[i]))
159            .collect();
160
161        Ok(results)
162    }
163}