Skip to main content

oxicuda_memory/
memory_info.rs

1//! GPU memory usage queries and unified memory hints.
2//!
3//! This module provides:
4//!
5//! - [`MemoryInfo`] and [`memory_info`] for querying free/total GPU memory.
6//! - [`MemAdvice`] and [`mem_advise`] for providing memory usage hints to
7//!   the CUDA unified memory subsystem.
8//! - [`mem_prefetch`] for prefetching unified memory to a specific device.
9//!
10//! # Example
11//!
12//! ```rust,no_run
13//! # use oxicuda_memory::memory_info::{memory_info, MemoryInfo};
14//! let info = memory_info()?;
15//! println!("GPU memory: {} MB free / {} MB total",
16//!     info.free / (1024 * 1024),
17//!     info.total / (1024 * 1024),
18//! );
19//! # Ok::<(), oxicuda_driver::error::CudaError>(())
20//! ```
21
22use oxicuda_driver::device::Device;
23use oxicuda_driver::error::{CudaError, CudaResult};
24use oxicuda_driver::loader::try_driver;
25use oxicuda_driver::stream::Stream;
26
27// ---------------------------------------------------------------------------
28// MemoryInfo
29// ---------------------------------------------------------------------------
30
31/// GPU memory usage information.
32///
33/// Returned by [`memory_info`], this struct reports the free and total
34/// device memory for the current CUDA context.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub struct MemoryInfo {
37    /// Free device memory in bytes.
38    pub free: usize,
39    /// Total device memory in bytes.
40    pub total: usize,
41}
42
43impl MemoryInfo {
44    /// Returns the used memory in bytes (`total - free`).
45    #[inline]
46    pub fn used(&self) -> usize {
47        self.total.saturating_sub(self.free)
48    }
49
50    /// Returns the fraction of memory currently in use (0.0 to 1.0).
51    #[inline]
52    pub fn usage_fraction(&self) -> f64 {
53        if self.total == 0 {
54            return 0.0;
55        }
56        self.used() as f64 / self.total as f64
57    }
58}
59
60impl std::fmt::Display for MemoryInfo {
61    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
62        write!(
63            f,
64            "MemoryInfo(free={} MB, total={} MB, used={:.1}%)",
65            self.free / (1024 * 1024),
66            self.total / (1024 * 1024),
67            self.usage_fraction() * 100.0,
68        )
69    }
70}
71
72/// Queries free and total device memory for the current CUDA context.
73///
74/// The returned values reflect the state at the time of the query and
75/// may change as other threads or processes allocate or free memory.
76///
77/// # Errors
78///
79/// Returns an error if no context is current or the driver call fails.
80pub fn memory_info() -> CudaResult<MemoryInfo> {
81    let driver = try_driver()?;
82    let mut free: usize = 0;
83    let mut total: usize = 0;
84    oxicuda_driver::check(unsafe { (driver.cu_mem_get_info_v2)(&mut free, &mut total) })?;
85    Ok(MemoryInfo { free, total })
86}
87
88// ---------------------------------------------------------------------------
89// MemAdvice
90// ---------------------------------------------------------------------------
91
92/// Memory advice hints for unified (managed) memory.
93///
94/// These hints guide the CUDA runtime's page migration and caching
95/// decisions for unified memory allocations. Providing accurate hints
96/// can significantly improve performance by reducing unnecessary page
97/// migrations.
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
99#[repr(u32)]
100pub enum MemAdvice {
101    /// Mark the memory region as read-mostly. This creates read-only
102    /// copies on accessing processors, reducing migration overhead.
103    SetReadMostly = 1,
104    /// Undo a previous `SetReadMostly` hint.
105    UnsetReadMostly = 2,
106    /// Set the preferred location for the memory region. The data will
107    /// preferably reside on the specified device.
108    SetPreferredLocation = 3,
109    /// Undo a previous `SetPreferredLocation` hint.
110    UnsetPreferredLocation = 4,
111    /// Indicate that the specified device will access this memory region.
112    /// This can cause the driver to create a mapping on that device.
113    SetAccessedBy = 5,
114    /// Undo a previous `SetAccessedBy` hint.
115    UnsetAccessedBy = 6,
116}
117
118/// Provides a memory usage hint for a unified memory region.
119///
120/// # Parameters
121///
122/// * `ptr` — device pointer to the start of the unified memory region.
123/// * `count` — size of the region in bytes.
124/// * `advice` — the usage hint to apply.
125/// * `device` — the device to which the hint applies.
126///
127/// # Errors
128///
129/// Returns an error if the pointer is not a managed allocation, the
130/// device is invalid, or the driver call fails.
131pub fn mem_advise(ptr: u64, count: usize, advice: MemAdvice, device: &Device) -> CudaResult<()> {
132    if count == 0 {
133        return Err(CudaError::InvalidValue);
134    }
135    let driver = try_driver()?;
136    oxicuda_driver::check(unsafe {
137        (driver.cu_mem_advise)(ptr, count, advice as u32, device.raw())
138    })
139}
140
141/// Prefetches unified memory to the specified device.
142///
143/// This is an asynchronous operation enqueued on `stream`. The data
144/// is migrated to the target device so that subsequent accesses from
145/// that device will not cause page faults.
146///
147/// # Parameters
148///
149/// * `ptr` — device pointer to the start of the unified memory region.
150/// * `count` — size of the region in bytes.
151/// * `device` — the target device to prefetch to.
152/// * `stream` — the stream on which to enqueue the prefetch.
153///
154/// # Errors
155///
156/// Returns an error if the pointer is not a managed allocation, the
157/// device is invalid, or the driver call fails.
158pub fn mem_prefetch(ptr: u64, count: usize, device: &Device, stream: &Stream) -> CudaResult<()> {
159    if count == 0 {
160        return Err(CudaError::InvalidValue);
161    }
162    let driver = try_driver()?;
163    oxicuda_driver::check(unsafe {
164        (driver.cu_mem_prefetch_async)(ptr, count, device.raw(), stream.raw())
165    })
166}
167
168// ---------------------------------------------------------------------------
169// Tests
170// ---------------------------------------------------------------------------
171
172#[cfg(test)]
173mod tests {
174    use super::*;
175
176    #[test]
177    fn memory_info_used_calculation() {
178        let info = MemoryInfo {
179            free: 4096,
180            total: 8192,
181        };
182        assert_eq!(info.used(), 4096);
183    }
184
185    #[test]
186    fn memory_info_usage_fraction() {
187        let info = MemoryInfo {
188            free: 2048,
189            total: 8192,
190        };
191        let frac = info.usage_fraction();
192        assert!((frac - 0.75).abs() < 1e-10);
193    }
194
195    #[test]
196    fn memory_info_usage_fraction_zero_total() {
197        let info = MemoryInfo { free: 0, total: 0 };
198        assert!((info.usage_fraction()).abs() < 1e-10);
199    }
200
201    #[test]
202    fn memory_info_display() {
203        let info = MemoryInfo {
204            free: 4 * 1024 * 1024,
205            total: 8 * 1024 * 1024,
206        };
207        let s = format!("{info}");
208        assert!(s.contains("free=4 MB"));
209        assert!(s.contains("total=8 MB"));
210    }
211
212    #[test]
213    fn mem_advice_variants() {
214        assert_eq!(MemAdvice::SetReadMostly as u32, 1);
215        assert_eq!(MemAdvice::UnsetReadMostly as u32, 2);
216        assert_eq!(MemAdvice::SetPreferredLocation as u32, 3);
217        assert_eq!(MemAdvice::UnsetPreferredLocation as u32, 4);
218        assert_eq!(MemAdvice::SetAccessedBy as u32, 5);
219        assert_eq!(MemAdvice::UnsetAccessedBy as u32, 6);
220    }
221
222    #[test]
223    fn mem_advise_rejects_zero_count() {
224        let dev = Device::get(0);
225        // On macOS we cannot get a device, so we test the zero-count path
226        // only if we can construct one.
227        if let Ok(dev) = dev {
228            let result = mem_advise(0x1000, 0, MemAdvice::SetReadMostly, &dev);
229            assert!(result.is_err());
230        }
231    }
232
233    #[test]
234    fn mem_prefetch_rejects_zero_count() {
235        // We cannot construct a Stream without a GPU context, but we can
236        // verify the function signature compiles.
237        let _: fn(u64, usize, &Device, &Stream) -> CudaResult<()> = mem_prefetch;
238    }
239
240    #[test]
241    fn memory_info_signature_compiles() {
242        let _: fn() -> CudaResult<MemoryInfo> = memory_info;
243    }
244}