Skip to main content

singe_nvml/
gpu_instance.rs

1#[allow(unused_imports)]
2use crate::error::Status;
3
4use std::{hash::Hash, mem::ManuallyDrop, ops::Deref, ptr};
5
6use singe_nvml_sys as sys;
7
8use crate::{
9    compute_instance::{ComputeInstance, OwnedComputeInstance},
10    device::Device,
11    error::Result,
12    try_ffi,
13    types::{
14        ComputeInstanceEngineProfile, ComputeInstancePlacement, ComputeInstanceProfileInfo,
15        EnableState, GpuInstanceInfo, VgpuInstanceId, VgpuPlacementId, VgpuSchedulerEngine,
16        VgpuSchedulerLog, VgpuSchedulerState, VgpuTypeId, try_from_nvml_enum,
17    },
18    utility::struct_version,
19    vgpu_instance::VgpuInstance,
20};
21
22#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23#[repr(transparent)]
24pub struct GpuInstance(sys::nvmlGpuInstance_t);
25
26#[derive(Debug)]
27pub struct OwnedGpuInstance(GpuInstance);
28
29impl GpuInstance {
30    pub const unsafe fn from_raw(handle: sys::nvmlGpuInstance_t) -> Self {
31        Self(handle)
32    }
33
34    pub const fn as_raw(&self) -> sys::nvmlGpuInstance_t {
35        self.0
36    }
37
38    pub const fn is_null(&self) -> bool {
39        self.0.is_null()
40    }
41
42    /// Returns GPU instance information.
43    ///
44    /// For Ampere or newer fully supported devices.
45    /// Supported on Linux only.
46    ///
47    /// # Errors
48    ///
49    /// Returns an error if the handle or output arguments are rejected by NVML, if
50    /// the current process does not have permission to query the instance, or if
51    /// NVML has not been initialized.
52    pub fn info(&self) -> Result<GpuInstanceInfo> {
53        let mut info = sys::nvmlGpuInstanceInfo_t::default();
54        unsafe {
55            try_ffi!(sys::nvmlGpuInstanceGetInfo(self.0, &raw mut info))?;
56        }
57        Ok(info.into())
58    }
59
60    pub fn parent_device(&self) -> Result<Device> {
61        Ok(self.info()?.device)
62    }
63
64    pub fn id(&self) -> Result<u32> {
65        Ok(self.info()?.id)
66    }
67
68    pub fn profile_id(&self) -> Result<u32> {
69        Ok(self.info()?.profile_id)
70    }
71
72    /// Versioned wrapper that requests compute-instance profile information using the latest supported NVML output layout.
73    ///
74    /// This wrapper sets the version field on the output structure before calling NVML.
75    ///
76    /// For Ampere or newer fully supported devices.
77    /// Supported on Linux only.
78    ///
79    /// # Errors
80    ///
81    /// Returns an error if the GPU instance, profile, engine profile, or structure
82    /// version is invalid, if the profile is not supported, if the current process
83    /// does not have permission to perform the operation, or if NVML has not been
84    /// initialized.
85    pub fn compute_instance_profile_info(
86        &self,
87        profile: u32,
88        engine_profile: ComputeInstanceEngineProfile,
89    ) -> Result<ComputeInstanceProfileInfo> {
90        let mut info = sys::nvmlComputeInstanceProfileInfo_v3_t {
91            version: struct_version::<sys::nvmlComputeInstanceProfileInfo_v3_t>(3),
92            ..Default::default()
93        };
94        unsafe {
95            try_ffi!(sys::nvmlGpuInstanceGetComputeInstanceProfileInfoV(
96                self.0,
97                profile,
98                engine_profile.into(),
99                (&raw mut info).cast(),
100            ))?;
101        }
102        Ok(info.into())
103    }
104
105    /// Returns compute instance profile capacity.
106    ///
107    /// For Ampere or newer fully supported devices.
108    /// Supported on Linux only.
109    /// Requires privileged access.
110    ///
111    /// # Errors
112    ///
113    /// Returns an error if the GPU instance or `profile_id` is invalid, if the
114    /// profile is not supported, if the current process does not have permission
115    /// to perform the operation, or if NVML has not been initialized.
116    pub fn compute_instance_remaining_capacity(&self, profile_id: u32) -> Result<u32> {
117        let mut count = 0;
118        unsafe {
119            try_ffi!(sys::nvmlGpuInstanceGetComputeInstanceRemainingCapacity(
120                self.0,
121                profile_id,
122                &raw mut count,
123            ))?;
124        }
125        Ok(count)
126    }
127
128    /// Returns compute instance placements.
129    ///
130    /// For Ampere or newer fully supported devices.
131    /// Supported on Linux only.
132    /// Requires privileged access.
133    ///
134    /// A placement represents the location of a compute instance within a GPU instance.
135    /// Returns all possible placements for the given profile.
136    /// A created compute instance occupies compute slices described by its placement.
137    /// Creating a compute instance fails if its placement overlaps already
138    /// occupied compute slices.
139    ///
140    /// # Errors
141    ///
142    /// Returns an error if the GPU instance or `profile_id` is invalid, if MIG mode
143    /// is not enabled or the profile is not supported, if the current process does
144    /// not have permission to perform the operation, or if NVML has not been
145    /// initialized.
146    pub fn compute_instance_possible_placements(
147        &self,
148        profile_id: u32,
149    ) -> Result<Vec<ComputeInstancePlacement>> {
150        let mut count = 0;
151        let status = unsafe {
152            sys::nvmlGpuInstanceGetComputeInstancePossiblePlacements(
153                self.0,
154                profile_id,
155                ptr::null_mut(),
156                &raw mut count,
157            )
158        };
159        if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
160            return Ok(Vec::new());
161        }
162        if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
163            return Err(status.into());
164        }
165
166        let mut placements = vec![sys::nvmlComputeInstancePlacement_t::default(); count as usize];
167        unsafe {
168            try_ffi!(sys::nvmlGpuInstanceGetComputeInstancePossiblePlacements(
169                self.0,
170                profile_id,
171                placements.as_mut_ptr(),
172                &raw mut count,
173            ))?;
174        }
175        placements.truncate(count as usize);
176        Ok(placements.into_iter().map(Into::into).collect())
177    }
178
179    /// Returns compute instance for the given instance ID.
180    ///
181    /// For Ampere or newer fully supported devices.
182    /// Supported on Linux only.
183    /// Requires privileged access.
184    ///
185    /// # Errors
186    ///
187    /// Returns an error if the GPU instance or ID is invalid, if the compute
188    /// instance is not found, if MIG mode is not enabled, if the current process
189    /// lacks permission, or if NVML has not been initialized.
190    pub fn compute_instance_by_id(&self, id: u32) -> Result<ComputeInstance> {
191        let mut instance = ptr::null_mut();
192        unsafe {
193            try_ffi!(sys::nvmlGpuInstanceGetComputeInstanceById(
194                self.0,
195                id,
196                &raw mut instance,
197            ))?;
198            Ok(ComputeInstance::from_raw(instance))
199        }
200    }
201
202    /// Creates a compute instance.
203    ///
204    /// For Ampere or newer fully supported devices.
205    /// Supported on Linux only.
206    /// Requires privileged access.
207    ///
208    /// If the parent device is unbound or reset, or if the parent GPU instance or compute instance is destroyed, the compute instance handle becomes invalid.
209    /// The compute instance must be recreated to acquire a valid handle.
210    ///
211    /// # Errors
212    ///
213    /// Returns an error if the requested compute instance cannot be created, if the
214    /// GPU instance or `profile_id` is invalid, if the profile is not supported, if
215    /// the current process lacks permission, or if NVML has not been initialized.
216    pub fn create_compute_instance(&self, profile_id: u32) -> Result<OwnedComputeInstance> {
217        let mut instance = ptr::null_mut();
218        unsafe {
219            try_ffi!(sys::nvmlGpuInstanceCreateComputeInstance(
220                self.0,
221                profile_id,
222                &raw mut instance,
223            ))?;
224            Ok(OwnedComputeInstance::from_raw(instance))
225        }
226    }
227
228    /// Creates a compute instance with the specified placement.
229    ///
230    /// For Ampere or newer fully supported devices.
231    /// Supported on Linux only.
232    /// Requires privileged access.
233    ///
234    /// If the parent device is unbound or reset, or if the parent GPU instance or compute instance is destroyed, the compute instance handle becomes invalid.
235    /// The compute instance must be recreated to acquire a valid handle.
236    ///
237    /// # Errors
238    ///
239    /// Returns an error if the requested compute instance cannot be created, if the
240    /// GPU instance, `profile_id`, or placement is invalid, if the profile is not
241    /// supported, if the current process lacks permission, or if NVML has not been
242    /// initialized.
243    pub fn create_compute_instance_with_placement(
244        &self,
245        profile_id: u32,
246        placement: ComputeInstancePlacement,
247    ) -> Result<OwnedComputeInstance> {
248        let placement = sys::nvmlComputeInstancePlacement_t::from(placement);
249        let mut instance = ptr::null_mut();
250        unsafe {
251            try_ffi!(sys::nvmlGpuInstanceCreateComputeInstanceWithPlacement(
252                self.0,
253                profile_id,
254                &raw const placement,
255                &raw mut instance,
256            ))?;
257            Ok(OwnedComputeInstance::from_raw(instance))
258        }
259    }
260
261    /// Returns compute instances for the given profile ID.
262    ///
263    /// For Ampere or newer fully supported devices.
264    /// Supported on Linux only.
265    /// Requires privileged access.
266    ///
267    /// # Errors
268    ///
269    /// Returns an error if the GPU instance or `profile_id` is invalid, if the
270    /// profile is not supported, if the current process lacks permission, or if
271    /// NVML has not been initialized.
272    pub fn compute_instances(&self, profile_id: u32) -> Result<Vec<ComputeInstance>> {
273        let mut count = 0;
274        let status = unsafe {
275            sys::nvmlGpuInstanceGetComputeInstances(
276                self.0,
277                profile_id,
278                ptr::null_mut(),
279                &raw mut count,
280            )
281        };
282        if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
283            return Ok(Vec::new());
284        }
285        if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
286            return Err(status.into());
287        }
288
289        let mut instances = vec![ptr::null_mut(); count as usize];
290        unsafe {
291            try_ffi!(sys::nvmlGpuInstanceGetComputeInstances(
292                self.0,
293                profile_id,
294                instances.as_mut_ptr(),
295                &raw mut count,
296            ))?;
297        }
298        instances.truncate(count as usize);
299        Ok(instances
300            .into_iter()
301            .map(|instance| unsafe { ComputeInstance::from_raw(instance) })
302            .collect())
303    }
304
305    /// Query the currently creatable vGPU types on a specific GPU instance.
306    ///
307    /// Returns the vGPU types that can currently be created for this GPU instance.
308    /// This wrapper performs the NVML size query internally and returns the results as a [`Vec`].
309    ///
310    /// The creatable vGPU types may differ over time, as there may be restrictions on what type of vGPUs can concurrently run on the device.
311    ///
312    /// # Errors
313    ///
314    /// Returns an error if NVML rejects the versioned request, if the intermediate
315    /// size query reports a larger buffer requirement than expected, if this GPU
316    /// instance or query arguments are invalid, if the host or GPU does not support
317    /// vGPU creation, if NVML has not been initialized, or if NVML reports an
318    /// unexpected failure.
319    pub fn creatable_vgpus(&self) -> Result<Vec<VgpuTypeId>> {
320        let mut info = sys::nvmlVgpuTypeIdInfo_t {
321            vgpuCount: 0,
322            ..Default::default()
323        };
324        let status = unsafe { sys::nvmlGpuInstanceGetCreatableVgpus(self.0, &raw mut info) };
325        if status == sys::nvmlReturn_t::NVML_SUCCESS && info.vgpuCount == 0 {
326            return Ok(Vec::new());
327        }
328        if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
329            return Err(status.into());
330        }
331
332        let mut types = vec![0u32; info.vgpuCount as usize];
333        info.vgpuTypeIds = types.as_mut_ptr();
334        unsafe {
335            try_ffi!(sys::nvmlGpuInstanceGetCreatableVgpus(self.0, &raw mut info))?;
336        }
337        types.truncate(info.vgpuCount as usize);
338        Ok(types.into_iter().map(VgpuTypeId).collect())
339    }
340
341    /// Returns the active vGPU instances within a GPU instance.
342    ///
343    /// This wrapper performs the NVML size query internally and returns the active vGPU instances as a [`Vec`].
344    ///
345    /// # Errors
346    ///
347    /// Returns an error if NVML rejects the versioned request, if the intermediate
348    /// size query reports a larger buffer requirement than expected, if this GPU
349    /// instance or query arguments are invalid, if the host or GPU does not support
350    /// vGPU queries, if NVML has not been initialized, or if NVML reports an
351    /// unexpected failure.
352    pub fn active_vgpus(&self) -> Result<Vec<VgpuInstance>> {
353        let mut info = sys::nvmlActiveVgpuInstanceInfo_t {
354            version: struct_version::<sys::nvmlActiveVgpuInstanceInfo_t>(1),
355            ..Default::default()
356        };
357
358        let status = unsafe { sys::nvmlGpuInstanceGetActiveVgpus(self.0, &raw mut info) };
359        if status == sys::nvmlReturn_t::NVML_SUCCESS && info.vgpuCount == 0 {
360            return Ok(Vec::new());
361        }
362        if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
363            return Err(status.into());
364        }
365
366        let mut instances = vec![0u32; info.vgpuCount as usize];
367        info.vgpuInstances = instances.as_mut_ptr();
368        unsafe {
369            try_ffi!(sys::nvmlGpuInstanceGetActiveVgpus(self.0, &raw mut info))?;
370        }
371        instances.truncate(info.vgpuCount as usize);
372        Ok(instances
373            .into_iter()
374            .map(|instance| VgpuInstance::from_id(VgpuInstanceId(instance)))
375            .collect())
376    }
377
378    /// Returns the vGPU heterogeneous mode for the GPU instance.
379    ///
380    /// When in heterogeneous mode, a vGPU can concurrently host timesliced vGPUs with differing framebuffer sizes.
381    ///
382    /// On success, returns the current vGPU heterogeneous mode as [`EnableState::Enabled`] or [`EnableState::Disabled`].
383    ///
384    /// For Blackwell &tm GB20x; or newer fully supported devices.
385    ///
386    /// # Errors
387    ///
388    /// Returns an error if NVML rejects the versioned request, if this GPU instance
389    /// or query arguments are invalid, if the host, GPU, or MIG mode does not
390    /// support the query, if NVML has not been initialized, or if NVML reports an
391    /// unexpected failure.
392    pub fn vgpu_heterogeneous_mode(&self) -> Result<EnableState> {
393        let mut mode = sys::nvmlVgpuHeterogeneousMode_t {
394            version: struct_version::<sys::nvmlVgpuHeterogeneousMode_t>(1),
395            ..Default::default()
396        };
397        unsafe {
398            try_ffi!(sys::nvmlGpuInstanceGetVgpuHeterogeneousMode(
399                self.0,
400                &raw mut mode,
401            ))?;
402        }
403        try_from_nvml_enum("enable state", mode.mode)
404    }
405
406    /// Query the creatable vGPU placement ID of the vGPU type within a GPU instance.
407    ///
408    /// For Blackwell &tm GB20x; or newer fully supported devices.
409    ///
410    /// The returned placement IDs correspond to the given `vgpu_type_id`.
411    /// This wrapper performs the NVML size query internally and returns the placement IDs as a [`Vec`].
412    /// The creatable vGPU placement IDs may differ over time, as there may be restrictions on what type of vGPU the vGPU instance is running.
413    ///
414    /// # Errors
415    ///
416    /// Returns an error if NVML rejects the versioned request, if the intermediate
417    /// size query reports a larger buffer requirement than expected, if this GPU
418    /// instance or query arguments are invalid, if the host or GPU does not support
419    /// the query or vGPU heterogeneous mode is disabled, if NVML has not been
420    /// initialized, or if NVML reports an unexpected failure.
421    pub fn vgpu_type_creatable_placements(
422        &self,
423        vgpu_type_id: VgpuTypeId,
424    ) -> Result<Vec<VgpuPlacementId>> {
425        let mut info = sys::nvmlVgpuCreatablePlacementInfo_t {
426            version: struct_version::<sys::nvmlVgpuCreatablePlacementInfo_t>(1),
427            vgpuTypeId: vgpu_type_id.0,
428            placementSize: size_of::<u32>() as u32,
429            ..Default::default()
430        };
431
432        let status =
433            unsafe { sys::nvmlGpuInstanceGetVgpuTypeCreatablePlacements(self.0, &raw mut info) };
434        if status == sys::nvmlReturn_t::NVML_SUCCESS && info.count == 0 {
435            return Ok(Vec::new());
436        }
437        if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
438            return Err(status.into());
439        }
440
441        let mut placements = vec![0u32; info.count as usize];
442        info.placementIds = placements.as_mut_ptr();
443        unsafe {
444            try_ffi!(sys::nvmlGpuInstanceGetVgpuTypeCreatablePlacements(
445                self.0,
446                &raw mut info,
447            ))?;
448        }
449        placements.truncate(info.count as usize);
450        Ok(placements.into_iter().map(VgpuPlacementId).collect())
451    }
452
453    /// Returns the vGPU scheduler state for the given GPU instance.
454    /// The returned scheduler-state details are not relevant when the scheduler policy is best effort.
455    ///
456    /// For Blackwell &tm GB20x; or newer fully supported devices.
457    ///
458    /// # Errors
459    ///
460    /// Returns an error if this GPU instance or query arguments are invalid, if the
461    /// host or GPU does not support vGPU scheduler queries, if NVML has not been
462    /// initialized, or if NVML reports an unexpected failure.
463    pub fn vgpu_scheduler_state(&self, engine: VgpuSchedulerEngine) -> Result<VgpuSchedulerState> {
464        let mut info = sys::nvmlVgpuSchedulerStateInfo_v2_t {
465            engineId: engine as u32,
466            ..Default::default()
467        };
468        unsafe {
469            try_ffi!(sys::nvmlGpuInstanceGetVgpuSchedulerState_v2(
470                self.0,
471                &raw mut info,
472            ))?;
473        }
474        VgpuSchedulerState::from_raw(info)
475    }
476
477    /// Returns the vGPU scheduler logs for this GPU instance.
478    /// The number of returned elements never exceeds
479    /// `NVML_SCHEDULER_SW_MAX_LOG_ENTRIES`.
480    ///
481    /// To get the entire logs, call this method at least 5 times a second.
482    ///
483    /// For Blackwell &tm GB20x; or newer fully supported devices.
484    ///
485    /// # Errors
486    ///
487    /// Returns an error if this GPU instance or query arguments are invalid, if the
488    /// host or GPU does not support vGPU scheduler queries, if NVML has not been
489    /// initialized, or if NVML reports an unexpected failure.
490    pub fn vgpu_scheduler_log(&self, engine: VgpuSchedulerEngine) -> Result<VgpuSchedulerLog> {
491        let mut info = sys::nvmlVgpuSchedulerLogInfo_v2_t {
492            engineId: engine as u32,
493            ..Default::default()
494        };
495        unsafe {
496            try_ffi!(sys::nvmlGpuInstanceGetVgpuSchedulerLog_v2(
497                self.0,
498                &raw mut info,
499            ))?;
500        }
501        VgpuSchedulerLog::from_raw(info)
502    }
503}
504
505impl OwnedGpuInstance {
506    pub const unsafe fn from_raw(handle: sys::nvmlGpuInstance_t) -> Self {
507        Self(GpuInstance(handle))
508    }
509
510    pub const fn as_gpu_instance(&self) -> &GpuInstance {
511        &self.0
512    }
513
514    pub fn into_inner(self) -> GpuInstance {
515        let this = ManuallyDrop::new(self);
516        unsafe { ptr::read(&this.0) }
517    }
518}
519
520impl Deref for OwnedGpuInstance {
521    type Target = GpuInstance;
522
523    fn deref(&self) -> &Self::Target {
524        &self.0
525    }
526}
527
528impl Drop for OwnedGpuInstance {
529    fn drop(&mut self) {
530        unsafe {
531            let _ = sys::nvmlGpuInstanceDestroy(self.0.0);
532        }
533    }
534}