oxicuda_memory/memory_info.rs
1//! GPU memory usage queries and unified memory hints.
2//!
3//! This module provides:
4//!
5//! - [`MemoryInfo`] and [`memory_info`] for querying free/total GPU memory.
6//! - [`MemAdvice`] and [`mem_advise`] for providing memory usage hints to
7//! the CUDA unified memory subsystem.
8//! - [`mem_prefetch`] for prefetching unified memory to a specific device.
9//!
10//! # Example
11//!
12//! ```rust,no_run
13//! # use oxicuda_memory::memory_info::{memory_info, MemoryInfo};
14//! let info = memory_info()?;
15//! println!("GPU memory: {} MB free / {} MB total",
16//! info.free / (1024 * 1024),
17//! info.total / (1024 * 1024),
18//! );
19//! # Ok::<(), oxicuda_driver::error::CudaError>(())
20//! ```
21
22use oxicuda_driver::device::Device;
23use oxicuda_driver::error::{CudaError, CudaResult};
24use oxicuda_driver::loader::try_driver;
25use oxicuda_driver::stream::Stream;
26
27// ---------------------------------------------------------------------------
28// MemoryInfo
29// ---------------------------------------------------------------------------
30
31/// GPU memory usage information.
32///
33/// Returned by [`memory_info`], this struct reports the free and total
34/// device memory for the current CUDA context.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub struct MemoryInfo {
37 /// Free device memory in bytes.
38 pub free: usize,
39 /// Total device memory in bytes.
40 pub total: usize,
41}
42
43impl MemoryInfo {
44 /// Returns the used memory in bytes (`total - free`).
45 #[inline]
46 pub fn used(&self) -> usize {
47 self.total.saturating_sub(self.free)
48 }
49
50 /// Returns the fraction of memory currently in use (0.0 to 1.0).
51 #[inline]
52 pub fn usage_fraction(&self) -> f64 {
53 if self.total == 0 {
54 return 0.0;
55 }
56 self.used() as f64 / self.total as f64
57 }
58}
59
60impl std::fmt::Display for MemoryInfo {
61 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62 write!(
63 f,
64 "MemoryInfo(free={} MB, total={} MB, used={:.1}%)",
65 self.free / (1024 * 1024),
66 self.total / (1024 * 1024),
67 self.usage_fraction() * 100.0,
68 )
69 }
70}
71
72/// Queries free and total device memory for the current CUDA context.
73///
74/// The returned values reflect the state at the time of the query and
75/// may change as other threads or processes allocate or free memory.
76///
77/// # Errors
78///
79/// Returns an error if no context is current or the driver call fails.
80pub fn memory_info() -> CudaResult<MemoryInfo> {
81 let driver = try_driver()?;
82 let mut free: usize = 0;
83 let mut total: usize = 0;
84 oxicuda_driver::check(unsafe { (driver.cu_mem_get_info_v2)(&mut free, &mut total) })?;
85 Ok(MemoryInfo { free, total })
86}
87
88// ---------------------------------------------------------------------------
89// MemAdvice
90// ---------------------------------------------------------------------------
91
92/// Memory advice hints for unified (managed) memory.
93///
94/// These hints guide the CUDA runtime's page migration and caching
95/// decisions for unified memory allocations. Providing accurate hints
96/// can significantly improve performance by reducing unnecessary page
97/// migrations.
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
99#[repr(u32)]
100pub enum MemAdvice {
101 /// Mark the memory region as read-mostly. This creates read-only
102 /// copies on accessing processors, reducing migration overhead.
103 SetReadMostly = 1,
104 /// Undo a previous `SetReadMostly` hint.
105 UnsetReadMostly = 2,
106 /// Set the preferred location for the memory region. The data will
107 /// preferably reside on the specified device.
108 SetPreferredLocation = 3,
109 /// Undo a previous `SetPreferredLocation` hint.
110 UnsetPreferredLocation = 4,
111 /// Indicate that the specified device will access this memory region.
112 /// This can cause the driver to create a mapping on that device.
113 SetAccessedBy = 5,
114 /// Undo a previous `SetAccessedBy` hint.
115 UnsetAccessedBy = 6,
116}
117
118/// Provides a memory usage hint for a unified memory region.
119///
120/// # Parameters
121///
122/// * `ptr` — device pointer to the start of the unified memory region.
123/// * `count` — size of the region in bytes.
124/// * `advice` — the usage hint to apply.
125/// * `device` — the device to which the hint applies.
126///
127/// # Errors
128///
129/// Returns an error if the pointer is not a managed allocation, the
130/// device is invalid, or the driver call fails.
131pub fn mem_advise(ptr: u64, count: usize, advice: MemAdvice, device: &Device) -> CudaResult<()> {
132 if count == 0 {
133 return Err(CudaError::InvalidValue);
134 }
135 let driver = try_driver()?;
136 oxicuda_driver::check(unsafe {
137 (driver.cu_mem_advise)(ptr, count, advice as u32, device.raw())
138 })
139}
140
141/// Prefetches unified memory to the specified device.
142///
143/// This is an asynchronous operation enqueued on `stream`. The data
144/// is migrated to the target device so that subsequent accesses from
145/// that device will not cause page faults.
146///
147/// # Parameters
148///
149/// * `ptr` — device pointer to the start of the unified memory region.
150/// * `count` — size of the region in bytes.
151/// * `device` — the target device to prefetch to.
152/// * `stream` — the stream on which to enqueue the prefetch.
153///
154/// # Errors
155///
156/// Returns an error if the pointer is not a managed allocation, the
157/// device is invalid, or the driver call fails.
158pub fn mem_prefetch(ptr: u64, count: usize, device: &Device, stream: &Stream) -> CudaResult<()> {
159 if count == 0 {
160 return Err(CudaError::InvalidValue);
161 }
162 let driver = try_driver()?;
163 oxicuda_driver::check(unsafe {
164 (driver.cu_mem_prefetch_async)(ptr, count, device.raw(), stream.raw())
165 })
166}
167
168// ---------------------------------------------------------------------------
169// Tests
170// ---------------------------------------------------------------------------
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn memory_info_used_calculation() {
178 let info = MemoryInfo {
179 free: 4096,
180 total: 8192,
181 };
182 assert_eq!(info.used(), 4096);
183 }
184
185 #[test]
186 fn memory_info_usage_fraction() {
187 let info = MemoryInfo {
188 free: 2048,
189 total: 8192,
190 };
191 let frac = info.usage_fraction();
192 assert!((frac - 0.75).abs() < 1e-10);
193 }
194
195 #[test]
196 fn memory_info_usage_fraction_zero_total() {
197 let info = MemoryInfo { free: 0, total: 0 };
198 assert!((info.usage_fraction()).abs() < 1e-10);
199 }
200
201 #[test]
202 fn memory_info_display() {
203 let info = MemoryInfo {
204 free: 4 * 1024 * 1024,
205 total: 8 * 1024 * 1024,
206 };
207 let s = format!("{info}");
208 assert!(s.contains("free=4 MB"));
209 assert!(s.contains("total=8 MB"));
210 }
211
212 #[test]
213 fn mem_advice_variants() {
214 assert_eq!(MemAdvice::SetReadMostly as u32, 1);
215 assert_eq!(MemAdvice::UnsetReadMostly as u32, 2);
216 assert_eq!(MemAdvice::SetPreferredLocation as u32, 3);
217 assert_eq!(MemAdvice::UnsetPreferredLocation as u32, 4);
218 assert_eq!(MemAdvice::SetAccessedBy as u32, 5);
219 assert_eq!(MemAdvice::UnsetAccessedBy as u32, 6);
220 }
221
222 #[test]
223 fn mem_advise_rejects_zero_count() {
224 let dev = Device::get(0);
225 // On macOS we cannot get a device, so we test the zero-count path
226 // only if we can construct one.
227 if let Ok(dev) = dev {
228 let result = mem_advise(0x1000, 0, MemAdvice::SetReadMostly, &dev);
229 assert!(result.is_err());
230 }
231 }
232
233 #[test]
234 fn mem_prefetch_rejects_zero_count() {
235 // We cannot construct a Stream without a GPU context, but we can
236 // verify the function signature compiles.
237 let _: fn(u64, usize, &Device, &Stream) -> CudaResult<()> = mem_prefetch;
238 }
239
240 #[test]
241 fn memory_info_signature_compiles() {
242 let _: fn() -> CudaResult<MemoryInfo> = memory_info;
243 }
244}