singe_nvml/gpu_instance.rs
1#[allow(unused_imports)]
2use crate::error::Status;
3
4use std::{hash::Hash, mem::ManuallyDrop, ops::Deref, ptr};
5
6use singe_nvml_sys as sys;
7
8use crate::{
9 compute_instance::{ComputeInstance, OwnedComputeInstance},
10 device::Device,
11 error::Result,
12 try_ffi,
13 types::{
14 ComputeInstanceEngineProfile, ComputeInstancePlacement, ComputeInstanceProfileInfo,
15 EnableState, GpuInstanceInfo, VgpuInstanceId, VgpuPlacementId, VgpuSchedulerEngine,
16 VgpuSchedulerLog, VgpuSchedulerState, VgpuTypeId, try_from_nvml_enum,
17 },
18 utility::struct_version,
19 vgpu_instance::VgpuInstance,
20};
21
22#[derive(Debug, Clone, PartialEq, Eq, Hash)]
23#[repr(transparent)]
24pub struct GpuInstance(sys::nvmlGpuInstance_t);
25
26#[derive(Debug)]
27pub struct OwnedGpuInstance(GpuInstance);
28
29impl GpuInstance {
30 pub const unsafe fn from_raw(handle: sys::nvmlGpuInstance_t) -> Self {
31 Self(handle)
32 }
33
34 pub const fn as_raw(&self) -> sys::nvmlGpuInstance_t {
35 self.0
36 }
37
38 pub const fn is_null(&self) -> bool {
39 self.0.is_null()
40 }
41
42 /// Returns GPU instance information.
43 ///
44 /// For Ampere or newer fully supported devices.
45 /// Supported on Linux only.
46 ///
47 /// # Errors
48 ///
49 /// Returns an error if the handle or output arguments are rejected by NVML, if
50 /// the current process does not have permission to query the instance, or if
51 /// NVML has not been initialized.
52 pub fn info(&self) -> Result<GpuInstanceInfo> {
53 let mut info = sys::nvmlGpuInstanceInfo_t::default();
54 unsafe {
55 try_ffi!(sys::nvmlGpuInstanceGetInfo(self.0, &raw mut info))?;
56 }
57 Ok(info.into())
58 }
59
60 pub fn parent_device(&self) -> Result<Device> {
61 Ok(self.info()?.device)
62 }
63
64 pub fn id(&self) -> Result<u32> {
65 Ok(self.info()?.id)
66 }
67
68 pub fn profile_id(&self) -> Result<u32> {
69 Ok(self.info()?.profile_id)
70 }
71
72 /// Versioned wrapper that requests compute-instance profile information using the latest supported NVML output layout.
73 ///
74 /// This wrapper sets the version field on the output structure before calling NVML.
75 ///
76 /// For Ampere or newer fully supported devices.
77 /// Supported on Linux only.
78 ///
79 /// # Errors
80 ///
81 /// Returns an error if the GPU instance, profile, engine profile, or structure
82 /// version is invalid, if the profile is not supported, if the current process
83 /// does not have permission to perform the operation, or if NVML has not been
84 /// initialized.
85 pub fn compute_instance_profile_info(
86 &self,
87 profile: u32,
88 engine_profile: ComputeInstanceEngineProfile,
89 ) -> Result<ComputeInstanceProfileInfo> {
90 let mut info = sys::nvmlComputeInstanceProfileInfo_v3_t {
91 version: struct_version::<sys::nvmlComputeInstanceProfileInfo_v3_t>(3),
92 ..Default::default()
93 };
94 unsafe {
95 try_ffi!(sys::nvmlGpuInstanceGetComputeInstanceProfileInfoV(
96 self.0,
97 profile,
98 engine_profile.into(),
99 (&raw mut info).cast(),
100 ))?;
101 }
102 Ok(info.into())
103 }
104
105 /// Returns compute instance profile capacity.
106 ///
107 /// For Ampere or newer fully supported devices.
108 /// Supported on Linux only.
109 /// Requires privileged access.
110 ///
111 /// # Errors
112 ///
113 /// Returns an error if the GPU instance or `profile_id` is invalid, if the
114 /// profile is not supported, if the current process does not have permission
115 /// to perform the operation, or if NVML has not been initialized.
116 pub fn compute_instance_remaining_capacity(&self, profile_id: u32) -> Result<u32> {
117 let mut count = 0;
118 unsafe {
119 try_ffi!(sys::nvmlGpuInstanceGetComputeInstanceRemainingCapacity(
120 self.0,
121 profile_id,
122 &raw mut count,
123 ))?;
124 }
125 Ok(count)
126 }
127
128 /// Returns compute instance placements.
129 ///
130 /// For Ampere or newer fully supported devices.
131 /// Supported on Linux only.
132 /// Requires privileged access.
133 ///
134 /// A placement represents the location of a compute instance within a GPU instance.
135 /// Returns all possible placements for the given profile.
136 /// A created compute instance occupies compute slices described by its placement.
137 /// Creating a compute instance fails if its placement overlaps already
138 /// occupied compute slices.
139 ///
140 /// # Errors
141 ///
142 /// Returns an error if the GPU instance or `profile_id` is invalid, if MIG mode
143 /// is not enabled or the profile is not supported, if the current process does
144 /// not have permission to perform the operation, or if NVML has not been
145 /// initialized.
146 pub fn compute_instance_possible_placements(
147 &self,
148 profile_id: u32,
149 ) -> Result<Vec<ComputeInstancePlacement>> {
150 let mut count = 0;
151 let status = unsafe {
152 sys::nvmlGpuInstanceGetComputeInstancePossiblePlacements(
153 self.0,
154 profile_id,
155 ptr::null_mut(),
156 &raw mut count,
157 )
158 };
159 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
160 return Ok(Vec::new());
161 }
162 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
163 return Err(status.into());
164 }
165
166 let mut placements = vec![sys::nvmlComputeInstancePlacement_t::default(); count as usize];
167 unsafe {
168 try_ffi!(sys::nvmlGpuInstanceGetComputeInstancePossiblePlacements(
169 self.0,
170 profile_id,
171 placements.as_mut_ptr(),
172 &raw mut count,
173 ))?;
174 }
175 placements.truncate(count as usize);
176 Ok(placements.into_iter().map(Into::into).collect())
177 }
178
179 /// Returns compute instance for the given instance ID.
180 ///
181 /// For Ampere or newer fully supported devices.
182 /// Supported on Linux only.
183 /// Requires privileged access.
184 ///
185 /// # Errors
186 ///
187 /// Returns an error if the GPU instance or ID is invalid, if the compute
188 /// instance is not found, if MIG mode is not enabled, if the current process
189 /// lacks permission, or if NVML has not been initialized.
190 pub fn compute_instance_by_id(&self, id: u32) -> Result<ComputeInstance> {
191 let mut instance = ptr::null_mut();
192 unsafe {
193 try_ffi!(sys::nvmlGpuInstanceGetComputeInstanceById(
194 self.0,
195 id,
196 &raw mut instance,
197 ))?;
198 Ok(ComputeInstance::from_raw(instance))
199 }
200 }
201
202 /// Creates a compute instance.
203 ///
204 /// For Ampere or newer fully supported devices.
205 /// Supported on Linux only.
206 /// Requires privileged access.
207 ///
208 /// If the parent device is unbound or reset, or if the parent GPU instance or compute instance is destroyed, the compute instance handle becomes invalid.
209 /// The compute instance must be recreated to acquire a valid handle.
210 ///
211 /// # Errors
212 ///
213 /// Returns an error if the requested compute instance cannot be created, if the
214 /// GPU instance or `profile_id` is invalid, if the profile is not supported, if
215 /// the current process lacks permission, or if NVML has not been initialized.
216 pub fn create_compute_instance(&self, profile_id: u32) -> Result<OwnedComputeInstance> {
217 let mut instance = ptr::null_mut();
218 unsafe {
219 try_ffi!(sys::nvmlGpuInstanceCreateComputeInstance(
220 self.0,
221 profile_id,
222 &raw mut instance,
223 ))?;
224 Ok(OwnedComputeInstance::from_raw(instance))
225 }
226 }
227
228 /// Creates a compute instance with the specified placement.
229 ///
230 /// For Ampere or newer fully supported devices.
231 /// Supported on Linux only.
232 /// Requires privileged access.
233 ///
234 /// If the parent device is unbound or reset, or if the parent GPU instance or compute instance is destroyed, the compute instance handle becomes invalid.
235 /// The compute instance must be recreated to acquire a valid handle.
236 ///
237 /// # Errors
238 ///
239 /// Returns an error if the requested compute instance cannot be created, if the
240 /// GPU instance, `profile_id`, or placement is invalid, if the profile is not
241 /// supported, if the current process lacks permission, or if NVML has not been
242 /// initialized.
243 pub fn create_compute_instance_with_placement(
244 &self,
245 profile_id: u32,
246 placement: ComputeInstancePlacement,
247 ) -> Result<OwnedComputeInstance> {
248 let placement = sys::nvmlComputeInstancePlacement_t::from(placement);
249 let mut instance = ptr::null_mut();
250 unsafe {
251 try_ffi!(sys::nvmlGpuInstanceCreateComputeInstanceWithPlacement(
252 self.0,
253 profile_id,
254 &raw const placement,
255 &raw mut instance,
256 ))?;
257 Ok(OwnedComputeInstance::from_raw(instance))
258 }
259 }
260
261 /// Returns compute instances for the given profile ID.
262 ///
263 /// For Ampere or newer fully supported devices.
264 /// Supported on Linux only.
265 /// Requires privileged access.
266 ///
267 /// # Errors
268 ///
269 /// Returns an error if the GPU instance or `profile_id` is invalid, if the
270 /// profile is not supported, if the current process lacks permission, or if
271 /// NVML has not been initialized.
272 pub fn compute_instances(&self, profile_id: u32) -> Result<Vec<ComputeInstance>> {
273 let mut count = 0;
274 let status = unsafe {
275 sys::nvmlGpuInstanceGetComputeInstances(
276 self.0,
277 profile_id,
278 ptr::null_mut(),
279 &raw mut count,
280 )
281 };
282 if status == sys::nvmlReturn_t::NVML_SUCCESS && count == 0 {
283 return Ok(Vec::new());
284 }
285 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
286 return Err(status.into());
287 }
288
289 let mut instances = vec![ptr::null_mut(); count as usize];
290 unsafe {
291 try_ffi!(sys::nvmlGpuInstanceGetComputeInstances(
292 self.0,
293 profile_id,
294 instances.as_mut_ptr(),
295 &raw mut count,
296 ))?;
297 }
298 instances.truncate(count as usize);
299 Ok(instances
300 .into_iter()
301 .map(|instance| unsafe { ComputeInstance::from_raw(instance) })
302 .collect())
303 }
304
305 /// Query the currently creatable vGPU types on a specific GPU instance.
306 ///
307 /// Returns the vGPU types that can currently be created for this GPU instance.
308 /// This wrapper performs the NVML size query internally and returns the results as a [`Vec`].
309 ///
310 /// The creatable vGPU types may differ over time, as there may be restrictions on what type of vGPUs can concurrently run on the device.
311 ///
312 /// # Errors
313 ///
314 /// Returns an error if NVML rejects the versioned request, if the intermediate
315 /// size query reports a larger buffer requirement than expected, if this GPU
316 /// instance or query arguments are invalid, if the host or GPU does not support
317 /// vGPU creation, if NVML has not been initialized, or if NVML reports an
318 /// unexpected failure.
319 pub fn creatable_vgpus(&self) -> Result<Vec<VgpuTypeId>> {
320 let mut info = sys::nvmlVgpuTypeIdInfo_t {
321 vgpuCount: 0,
322 ..Default::default()
323 };
324 let status = unsafe { sys::nvmlGpuInstanceGetCreatableVgpus(self.0, &raw mut info) };
325 if status == sys::nvmlReturn_t::NVML_SUCCESS && info.vgpuCount == 0 {
326 return Ok(Vec::new());
327 }
328 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
329 return Err(status.into());
330 }
331
332 let mut types = vec![0u32; info.vgpuCount as usize];
333 info.vgpuTypeIds = types.as_mut_ptr();
334 unsafe {
335 try_ffi!(sys::nvmlGpuInstanceGetCreatableVgpus(self.0, &raw mut info))?;
336 }
337 types.truncate(info.vgpuCount as usize);
338 Ok(types.into_iter().map(VgpuTypeId).collect())
339 }
340
341 /// Returns the active vGPU instances within a GPU instance.
342 ///
343 /// This wrapper performs the NVML size query internally and returns the active vGPU instances as a [`Vec`].
344 ///
345 /// # Errors
346 ///
347 /// Returns an error if NVML rejects the versioned request, if the intermediate
348 /// size query reports a larger buffer requirement than expected, if this GPU
349 /// instance or query arguments are invalid, if the host or GPU does not support
350 /// vGPU queries, if NVML has not been initialized, or if NVML reports an
351 /// unexpected failure.
352 pub fn active_vgpus(&self) -> Result<Vec<VgpuInstance>> {
353 let mut info = sys::nvmlActiveVgpuInstanceInfo_t {
354 version: struct_version::<sys::nvmlActiveVgpuInstanceInfo_t>(1),
355 ..Default::default()
356 };
357
358 let status = unsafe { sys::nvmlGpuInstanceGetActiveVgpus(self.0, &raw mut info) };
359 if status == sys::nvmlReturn_t::NVML_SUCCESS && info.vgpuCount == 0 {
360 return Ok(Vec::new());
361 }
362 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
363 return Err(status.into());
364 }
365
366 let mut instances = vec![0u32; info.vgpuCount as usize];
367 info.vgpuInstances = instances.as_mut_ptr();
368 unsafe {
369 try_ffi!(sys::nvmlGpuInstanceGetActiveVgpus(self.0, &raw mut info))?;
370 }
371 instances.truncate(info.vgpuCount as usize);
372 Ok(instances
373 .into_iter()
374 .map(|instance| VgpuInstance::from_id(VgpuInstanceId(instance)))
375 .collect())
376 }
377
378 /// Returns the vGPU heterogeneous mode for the GPU instance.
379 ///
380 /// When in heterogeneous mode, a vGPU can concurrently host timesliced vGPUs with differing framebuffer sizes.
381 ///
382 /// On success, returns the current vGPU heterogeneous mode as [`EnableState::Enabled`] or [`EnableState::Disabled`].
383 ///
384 /// For Blackwell &tm GB20x; or newer fully supported devices.
385 ///
386 /// # Errors
387 ///
388 /// Returns an error if NVML rejects the versioned request, if this GPU instance
389 /// or query arguments are invalid, if the host, GPU, or MIG mode does not
390 /// support the query, if NVML has not been initialized, or if NVML reports an
391 /// unexpected failure.
392 pub fn vgpu_heterogeneous_mode(&self) -> Result<EnableState> {
393 let mut mode = sys::nvmlVgpuHeterogeneousMode_t {
394 version: struct_version::<sys::nvmlVgpuHeterogeneousMode_t>(1),
395 ..Default::default()
396 };
397 unsafe {
398 try_ffi!(sys::nvmlGpuInstanceGetVgpuHeterogeneousMode(
399 self.0,
400 &raw mut mode,
401 ))?;
402 }
403 try_from_nvml_enum("enable state", mode.mode)
404 }
405
406 /// Query the creatable vGPU placement ID of the vGPU type within a GPU instance.
407 ///
408 /// For Blackwell &tm GB20x; or newer fully supported devices.
409 ///
410 /// The returned placement IDs correspond to the given `vgpu_type_id`.
411 /// This wrapper performs the NVML size query internally and returns the placement IDs as a [`Vec`].
412 /// The creatable vGPU placement IDs may differ over time, as there may be restrictions on what type of vGPU the vGPU instance is running.
413 ///
414 /// # Errors
415 ///
416 /// Returns an error if NVML rejects the versioned request, if the intermediate
417 /// size query reports a larger buffer requirement than expected, if this GPU
418 /// instance or query arguments are invalid, if the host or GPU does not support
419 /// the query or vGPU heterogeneous mode is disabled, if NVML has not been
420 /// initialized, or if NVML reports an unexpected failure.
421 pub fn vgpu_type_creatable_placements(
422 &self,
423 vgpu_type_id: VgpuTypeId,
424 ) -> Result<Vec<VgpuPlacementId>> {
425 let mut info = sys::nvmlVgpuCreatablePlacementInfo_t {
426 version: struct_version::<sys::nvmlVgpuCreatablePlacementInfo_t>(1),
427 vgpuTypeId: vgpu_type_id.0,
428 placementSize: size_of::<u32>() as u32,
429 ..Default::default()
430 };
431
432 let status =
433 unsafe { sys::nvmlGpuInstanceGetVgpuTypeCreatablePlacements(self.0, &raw mut info) };
434 if status == sys::nvmlReturn_t::NVML_SUCCESS && info.count == 0 {
435 return Ok(Vec::new());
436 }
437 if status != sys::nvmlReturn_t::NVML_ERROR_INSUFFICIENT_SIZE {
438 return Err(status.into());
439 }
440
441 let mut placements = vec![0u32; info.count as usize];
442 info.placementIds = placements.as_mut_ptr();
443 unsafe {
444 try_ffi!(sys::nvmlGpuInstanceGetVgpuTypeCreatablePlacements(
445 self.0,
446 &raw mut info,
447 ))?;
448 }
449 placements.truncate(info.count as usize);
450 Ok(placements.into_iter().map(VgpuPlacementId).collect())
451 }
452
453 /// Returns the vGPU scheduler state for the given GPU instance.
454 /// The returned scheduler-state details are not relevant when the scheduler policy is best effort.
455 ///
456 /// For Blackwell &tm GB20x; or newer fully supported devices.
457 ///
458 /// # Errors
459 ///
460 /// Returns an error if this GPU instance or query arguments are invalid, if the
461 /// host or GPU does not support vGPU scheduler queries, if NVML has not been
462 /// initialized, or if NVML reports an unexpected failure.
463 pub fn vgpu_scheduler_state(&self, engine: VgpuSchedulerEngine) -> Result<VgpuSchedulerState> {
464 let mut info = sys::nvmlVgpuSchedulerStateInfo_v2_t {
465 engineId: engine as u32,
466 ..Default::default()
467 };
468 unsafe {
469 try_ffi!(sys::nvmlGpuInstanceGetVgpuSchedulerState_v2(
470 self.0,
471 &raw mut info,
472 ))?;
473 }
474 VgpuSchedulerState::from_raw(info)
475 }
476
477 /// Returns the vGPU scheduler logs for this GPU instance.
478 /// The number of returned elements never exceeds
479 /// `NVML_SCHEDULER_SW_MAX_LOG_ENTRIES`.
480 ///
481 /// To get the entire logs, call this method at least 5 times a second.
482 ///
483 /// For Blackwell &tm GB20x; or newer fully supported devices.
484 ///
485 /// # Errors
486 ///
487 /// Returns an error if this GPU instance or query arguments are invalid, if the
488 /// host or GPU does not support vGPU scheduler queries, if NVML has not been
489 /// initialized, or if NVML reports an unexpected failure.
490 pub fn vgpu_scheduler_log(&self, engine: VgpuSchedulerEngine) -> Result<VgpuSchedulerLog> {
491 let mut info = sys::nvmlVgpuSchedulerLogInfo_v2_t {
492 engineId: engine as u32,
493 ..Default::default()
494 };
495 unsafe {
496 try_ffi!(sys::nvmlGpuInstanceGetVgpuSchedulerLog_v2(
497 self.0,
498 &raw mut info,
499 ))?;
500 }
501 VgpuSchedulerLog::from_raw(info)
502 }
503}
504
505impl OwnedGpuInstance {
506 pub const unsafe fn from_raw(handle: sys::nvmlGpuInstance_t) -> Self {
507 Self(GpuInstance(handle))
508 }
509
510 pub const fn as_gpu_instance(&self) -> &GpuInstance {
511 &self.0
512 }
513
514 pub fn into_inner(self) -> GpuInstance {
515 let this = ManuallyDrop::new(self);
516 unsafe { ptr::read(&this.0) }
517 }
518}
519
520impl Deref for OwnedGpuInstance {
521 type Target = GpuInstance;
522
523 fn deref(&self) -> &Self::Target {
524 &self.0
525 }
526}
527
528impl Drop for OwnedGpuInstance {
529 fn drop(&mut self) {
530 unsafe {
531 let _ = sys::nvmlGpuInstanceDestroy(self.0.0);
532 }
533 }
534}