Skip to main content

oxicuda_driver/
stream_ordered_alloc.rs

1//! Stream-ordered memory allocation (CUDA 11.2+ / 12.x+).
2//!
3//! Stream-ordered memory allocation allows memory operations (`alloc` / `free`)
4//! to participate in the stream execution order, eliminating the need for
5//! explicit synchronisation between allocation and kernel launch.
6//!
7//! This module provides:
8//!
9//! * [`StreamMemoryPool`] — a memory pool bound to a specific device.
10//! * [`StreamAllocation`] — a handle to a stream-ordered allocation.
11//! * [`StreamOrderedAllocConfig`] — pool configuration (sizes, thresholds).
12//! * [`PoolAttribute`] / [`PoolUsageStats`] — attribute queries and statistics.
13//! * [`PoolExportDescriptor`] / [`ShareableHandleType`] — IPC sharing metadata.
14//! * [`stream_alloc`] / [`stream_free`] — convenience free functions.
15//!
16//! # Platform behaviour
17//!
18//! On macOS (where NVIDIA dropped CUDA support), all operations that would
19//! require the GPU driver return `Err(CudaError::NotSupported)`.  Config
20//! validation, statistics tracking, and accessor methods work everywhere.
21//!
22//! # Example
23//!
24//! ```rust,no_run
25//! use oxicuda_driver::stream_ordered_alloc::*;
26//!
27//! let config = StreamOrderedAllocConfig::default_for_device(0);
28//! let mut pool = StreamMemoryPool::new(config)?;
29//!
30//! let stream_handle = 0u64; // placeholder
31//! let mut alloc = pool.alloc_async(1024, stream_handle)?;
32//! assert_eq!(alloc.size(), 1024);
33//! assert!(!alloc.is_freed());
34//!
35//! pool.free_async(&mut alloc)?;
36//! assert!(alloc.is_freed());
37//! # Ok::<(), oxicuda_driver::CudaError>(())
38//! ```
39
40use std::fmt;
41
42use crate::error::{CudaError, CudaResult};
43use crate::ffi::CUdeviceptr;
44
45// ---------------------------------------------------------------------------
46// Constants — CUmemPool_attribute (mirrors CUDA header values)
47// ---------------------------------------------------------------------------
48
49/// Pool reuse policy: follow event dependencies.
50pub const CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: u32 = 1;
51/// Pool reuse policy: allow opportunistic reuse.
52pub const CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: u32 = 2;
53/// Pool reuse policy: allow internal dependency insertion.
54pub const CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: u32 = 3;
55/// Release threshold in bytes (memory returned to OS when usage drops below).
56pub const CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: u32 = 4;
57/// Current reserved memory (bytes) — read-only.
58pub const CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT: u32 = 5;
59/// High-water mark of reserved memory (bytes) — resettable.
60pub const CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH: u32 = 6;
61/// Current used memory (bytes) — read-only.
62pub const CU_MEMPOOL_ATTR_USED_MEM_CURRENT: u32 = 7;
63/// High-water mark of used memory (bytes) — resettable.
64pub const CU_MEMPOOL_ATTR_USED_MEM_HIGH: u32 = 8;
65
66// ---------------------------------------------------------------------------
67// StreamOrderedAllocConfig
68// ---------------------------------------------------------------------------
69
70/// Configuration for a stream-ordered memory pool.
71///
72/// All sizes are in bytes.
73#[derive(Debug, Clone, PartialEq, Eq)]
74pub struct StreamOrderedAllocConfig {
75    /// Initial pool size in bytes.  The pool pre-reserves this amount of
76    /// device memory when created.
77    pub initial_pool_size: usize,
78
79    /// Maximum pool size in bytes.  `0` means unlimited — the pool will grow
80    /// as needed (subject to device memory limits).
81    pub max_pool_size: usize,
82
83    /// Release threshold in bytes.  When the pool is trimmed, at least this
84    /// much memory is kept reserved for future allocations.
85    pub release_threshold: usize,
86
87    /// The device ordinal to create the pool on.
88    pub device: i32,
89}
90
91impl StreamOrderedAllocConfig {
92    /// Validate that the configuration is internally consistent.
93    ///
94    /// # Rules
95    ///
96    /// * `initial_pool_size` must not exceed `max_pool_size` (when
97    ///   `max_pool_size > 0`).
98    /// * `release_threshold` must not exceed `max_pool_size` (when
99    ///   `max_pool_size > 0`).
100    /// * `device` must be non-negative.
101    ///
102    /// # Errors
103    ///
104    /// Returns [`CudaError::InvalidValue`] if any rule is violated.
105    pub fn validate(&self) -> CudaResult<()> {
106        if self.device < 0 {
107            return Err(CudaError::InvalidValue);
108        }
109
110        if self.max_pool_size > 0 {
111            if self.initial_pool_size > self.max_pool_size {
112                return Err(CudaError::InvalidValue);
113            }
114            if self.release_threshold > self.max_pool_size {
115                return Err(CudaError::InvalidValue);
116            }
117        }
118
119        Ok(())
120    }
121
122    /// Returns a sensible default configuration for the given device.
123    ///
124    /// * `initial_pool_size` = 0 (grow on demand)
125    /// * `max_pool_size` = 0 (unlimited)
126    /// * `release_threshold` = 0 (release everything on trim)
127    pub fn default_for_device(device: i32) -> Self {
128        Self {
129            initial_pool_size: 0,
130            max_pool_size: 0,
131            release_threshold: 0,
132            device,
133        }
134    }
135}
136
137// ---------------------------------------------------------------------------
138// PoolAttribute
139// ---------------------------------------------------------------------------
140
141/// Attributes that can be queried or set on a [`StreamMemoryPool`].
142#[derive(Debug, Clone, Copy, PartialEq, Eq)]
143pub enum PoolAttribute {
144    /// Whether freed blocks can be reused by following event dependencies.
145    ReuseFollowEventDependencies,
146    /// Whether freed blocks can be opportunistically reused (without ordering).
147    ReuseAllowOpportunistic,
148    /// Whether the pool may insert internal dependencies for reuse.
149    ReuseAllowInternalDependencies,
150    /// The release threshold in bytes.
151    ReleaseThreshold(u64),
152    /// Current reserved memory (read-only query).
153    ReservedMemCurrent,
154    /// High-water mark of reserved memory.
155    ReservedMemHigh,
156    /// Current used memory (read-only query).
157    UsedMemCurrent,
158    /// High-water mark of used memory.
159    UsedMemHigh,
160}
161
162impl PoolAttribute {
163    /// Convert to the raw CUDA attribute constant.
164    pub fn to_raw(self) -> u32 {
165        match self {
166            Self::ReuseFollowEventDependencies => CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES,
167            Self::ReuseAllowOpportunistic => CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
168            Self::ReuseAllowInternalDependencies => {
169                CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES
170            }
171            Self::ReleaseThreshold(_) => CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
172            Self::ReservedMemCurrent => CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,
173            Self::ReservedMemHigh => CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,
174            Self::UsedMemCurrent => CU_MEMPOOL_ATTR_USED_MEM_CURRENT,
175            Self::UsedMemHigh => CU_MEMPOOL_ATTR_USED_MEM_HIGH,
176        }
177    }
178}
179
180// ---------------------------------------------------------------------------
181// PoolUsageStats
182// ---------------------------------------------------------------------------
183
184/// Snapshot of pool memory usage.
185#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
186pub struct PoolUsageStats {
187    /// Bytes currently reserved from the device allocator.
188    pub reserved_current: u64,
189    /// Peak bytes reserved (since creation or last reset).
190    pub reserved_high: u64,
191    /// Bytes currently in use by outstanding allocations.
192    pub used_current: u64,
193    /// Peak bytes in use (since creation or last reset).
194    pub used_high: u64,
195    /// Number of active (not-yet-freed) allocations.
196    pub active_allocations: usize,
197    /// Peak number of concurrent allocations.
198    pub peak_allocations: usize,
199}
200
201// ---------------------------------------------------------------------------
202// ShareableHandleType / PoolExportDescriptor
203// ---------------------------------------------------------------------------
204
205/// Handle type used for IPC sharing of memory pools.
206#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
207pub enum ShareableHandleType {
208    /// No sharing.
209    #[default]
210    None,
211    /// POSIX file descriptor (Linux).
212    PosixFileDescriptor,
213    /// Win32 handle (Windows).
214    Win32Handle,
215    /// Win32 KMT handle (Windows, legacy).
216    Win32KmtHandle,
217}
218
219/// Descriptor for exporting a pool for IPC sharing.
220#[derive(Debug, Clone, Copy, PartialEq, Eq)]
221pub struct PoolExportDescriptor {
222    /// The handle type to use for sharing.
223    pub shareable_handle_type: ShareableHandleType,
224    /// The device ordinal that owns the pool.
225    pub pool_device: i32,
226}
227
228// ---------------------------------------------------------------------------
229// StreamAllocation
230// ---------------------------------------------------------------------------
231
232/// Handle to a stream-ordered memory allocation.
233///
234/// An allocation lives on the GPU and is associated with a specific stream
235/// and memory pool.  It becomes available when all preceding work on the
236/// stream has completed, and is returned to the pool when freed (also
237/// stream-ordered).
238pub struct StreamAllocation {
239    /// Device pointer (`CUdeviceptr`).
240    ptr: CUdeviceptr,
241    /// Size of the allocation in bytes.
242    size: usize,
243    /// The stream this allocation is ordered on.
244    stream: u64,
245    /// The pool handle that owns this allocation.
246    pool: u64,
247    /// Whether this allocation has already been freed.
248    freed: bool,
249}
250
251impl StreamAllocation {
252    /// Returns the device pointer as a raw `u64` (`CUdeviceptr`).
253    #[inline]
254    pub fn as_ptr(&self) -> u64 {
255        self.ptr
256    }
257
258    /// Returns the allocation size in bytes.
259    #[inline]
260    pub fn size(&self) -> usize {
261        self.size
262    }
263
264    /// Returns `true` if this allocation has been freed.
265    #[inline]
266    pub fn is_freed(&self) -> bool {
267        self.freed
268    }
269
270    /// Returns the stream handle this allocation is ordered on.
271    #[inline]
272    pub fn stream(&self) -> u64 {
273        self.stream
274    }
275
276    /// Returns the pool handle that owns this allocation.
277    #[inline]
278    pub fn pool(&self) -> u64 {
279        self.pool
280    }
281}
282
283impl fmt::Debug for StreamAllocation {
284    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
285        f.debug_struct("StreamAllocation")
286            .field("ptr", &format_args!("0x{:016x}", self.ptr))
287            .field("size", &self.size)
288            .field("stream", &format_args!("0x{:016x}", self.stream))
289            .field("freed", &self.freed)
290            .finish()
291    }
292}
293
294// ---------------------------------------------------------------------------
295// StreamMemoryPool
296// ---------------------------------------------------------------------------
297
298/// A memory pool for stream-ordered allocations.
299///
300/// On platforms with a real CUDA driver (Linux, Windows), creating a pool
301/// calls `cuMemPoolCreate` under the hood.  On macOS (where there is no
302/// NVIDIA driver), pool metadata is tracked locally but any operation that
303/// would require the driver returns `Err(CudaError::NotSupported)`.
304///
305/// # Allocation tracking
306///
307/// The pool tracks allocation counts and byte totals locally for
308/// diagnostics.  These statistics are maintained even on macOS so that
309/// the API surface can be exercised in tests.
310pub struct StreamMemoryPool {
311    /// Raw `CUmemoryPool` handle (0 if not backed by a real driver pool).
312    handle: u64,
313    /// Device ordinal.
314    device: i32,
315    /// Configuration used to create this pool.
316    config: StreamOrderedAllocConfig,
317    /// Number of currently active (not freed) allocations.
318    active_allocations: usize,
319    /// Total bytes currently allocated.
320    total_allocated: usize,
321    /// Peak bytes ever allocated concurrently.
322    peak_allocated: usize,
323    /// Peak number of concurrent allocations.
324    peak_allocation_count: usize,
325    /// Monotonically increasing allocation id for generating unique pointers
326    /// in non-GPU mode.
327    #[cfg_attr(not(target_os = "macos"), allow(dead_code))]
328    next_alloc_id: u64,
329}
330
331impl fmt::Debug for StreamMemoryPool {
332    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
333        f.debug_struct("StreamMemoryPool")
334            .field("handle", &format_args!("0x{:016x}", self.handle))
335            .field("device", &self.device)
336            .field("active_allocations", &self.active_allocations)
337            .field("total_allocated", &self.total_allocated)
338            .field("peak_allocated", &self.peak_allocated)
339            .finish()
340    }
341}
342
343impl StreamMemoryPool {
344    /// Create a new memory pool for the given device.
345    ///
346    /// The configuration is validated before the pool is created.  On
347    /// platforms with a real CUDA driver, `cuMemPoolCreate` is invoked.
348    /// On macOS, a local-only pool is created for testing purposes.
349    ///
350    /// # Errors
351    ///
352    /// * [`CudaError::InvalidValue`] if the config fails validation.
353    /// * [`CudaError::NotSupported`] on macOS (pool metadata is still created
354    ///   so that tests can exercise the API).
355    pub fn new(config: StreamOrderedAllocConfig) -> CudaResult<Self> {
356        config.validate()?;
357
358        let pool = Self {
359            handle: 0,
360            device: config.device,
361            config,
362            active_allocations: 0,
363            total_allocated: 0,
364            peak_allocated: 0,
365            peak_allocation_count: 0,
366            next_alloc_id: 1,
367        };
368
369        // On real GPU platforms, we would call cuMemPoolCreate here.
370        // The pool handle would be stored in `self.handle`.
371        #[cfg(not(target_os = "macos"))]
372        {
373            Self::gpu_create_pool(&pool)?;
374        }
375
376        Ok(pool)
377    }
378
379    /// Allocate memory on a stream (stream-ordered).
380    ///
381    /// The allocation becomes available when all prior work on the stream
382    /// has completed.  The returned [`StreamAllocation`] tracks the pointer,
383    /// size, and ownership.
384    ///
385    /// # Errors
386    ///
387    /// * [`CudaError::InvalidValue`] if `size` is zero.
388    /// * [`CudaError::OutOfMemory`] if `max_pool_size` would be exceeded.
389    /// * [`CudaError::NotSupported`] on macOS.
390    pub fn alloc_async(&mut self, size: usize, stream: u64) -> CudaResult<StreamAllocation> {
391        if size == 0 {
392            return Err(CudaError::InvalidValue);
393        }
394
395        // Check max pool size constraint.
396        if self.config.max_pool_size > 0
397            && self.total_allocated.saturating_add(size) > self.config.max_pool_size
398        {
399            return Err(CudaError::OutOfMemory);
400        }
401
402        let ptr = self.platform_alloc_async(size, stream)?;
403
404        // Update bookkeeping.
405        self.active_allocations += 1;
406        self.total_allocated = self.total_allocated.saturating_add(size);
407        if self.total_allocated > self.peak_allocated {
408            self.peak_allocated = self.total_allocated;
409        }
410        if self.active_allocations > self.peak_allocation_count {
411            self.peak_allocation_count = self.active_allocations;
412        }
413
414        Ok(StreamAllocation {
415            ptr,
416            size,
417            stream,
418            pool: self.handle,
419            freed: false,
420        })
421    }
422
423    /// Free memory on a stream (stream-ordered).
424    ///
425    /// The memory is returned to the pool when all prior work on the
426    /// stream has completed.  The allocation is marked as freed and
427    /// cannot be freed again.
428    ///
429    /// # Errors
430    ///
431    /// * [`CudaError::InvalidValue`] if the allocation is already freed.
432    /// * [`CudaError::NotSupported`] on macOS.
433    pub fn free_async(&mut self, alloc: &mut StreamAllocation) -> CudaResult<()> {
434        if alloc.freed {
435            return Err(CudaError::InvalidValue);
436        }
437
438        self.platform_free_async(alloc)?;
439
440        alloc.freed = true;
441        self.active_allocations = self.active_allocations.saturating_sub(1);
442        self.total_allocated = self.total_allocated.saturating_sub(alloc.size);
443
444        Ok(())
445    }
446
447    /// Trim the pool, releasing unused memory back to the OS.
448    ///
449    /// At least `min_bytes_to_keep` bytes of reserved memory will remain
450    /// in the pool for future allocations.
451    ///
452    /// # Errors
453    ///
454    /// * [`CudaError::NotSupported`] on macOS.
455    pub fn trim(&mut self, min_bytes_to_keep: usize) -> CudaResult<()> {
456        self.platform_trim(min_bytes_to_keep)
457    }
458
459    /// Get pool usage statistics.
460    ///
461    /// The returned [`PoolUsageStats`] combines locally tracked allocation
462    /// counts with byte-level information.  On macOS, the reserved/used
463    /// byte fields mirror the local bookkeeping since no driver is available.
464    pub fn stats(&self) -> PoolUsageStats {
465        PoolUsageStats {
466            reserved_current: self.total_allocated as u64,
467            reserved_high: self.peak_allocated as u64,
468            used_current: self.total_allocated as u64,
469            used_high: self.peak_allocated as u64,
470            active_allocations: self.active_allocations,
471            peak_allocations: self.peak_allocation_count,
472        }
473    }
474
475    /// Set a pool attribute.
476    ///
477    /// Only attributes that carry a value (e.g. [`PoolAttribute::ReleaseThreshold`])
478    /// modify pool state.  Read-only attributes (e.g. `ReservedMemCurrent`)
479    /// return [`CudaError::InvalidValue`].
480    ///
481    /// # Errors
482    ///
483    /// * [`CudaError::InvalidValue`] for read-only attributes.
484    /// * [`CudaError::NotSupported`] on macOS.
485    pub fn set_attribute(&mut self, attr: PoolAttribute) -> CudaResult<()> {
486        // Read-only attributes cannot be set.
487        match attr {
488            PoolAttribute::ReservedMemCurrent
489            | PoolAttribute::UsedMemCurrent
490            | PoolAttribute::ReservedMemHigh
491            | PoolAttribute::UsedMemHigh => {
492                return Err(CudaError::InvalidValue);
493            }
494            _ => {}
495        }
496
497        // Apply locally-meaningful attributes.
498        if let PoolAttribute::ReleaseThreshold(val) = attr {
499            self.config.release_threshold = val as usize;
500        }
501
502        self.platform_set_attribute(attr)
503    }
504
505    /// Enable peer access from another device to allocations in this pool.
506    ///
507    /// After this call, kernels running on `peer_device` can access memory
508    /// allocated from this pool.
509    ///
510    /// # Errors
511    ///
512    /// * [`CudaError::InvalidDevice`] if `peer_device` equals this pool's device.
513    /// * [`CudaError::NotSupported`] on macOS.
514    pub fn enable_peer_access(&self, peer_device: i32) -> CudaResult<()> {
515        if peer_device == self.device {
516            return Err(CudaError::InvalidDevice);
517        }
518
519        self.platform_enable_peer_access(peer_device)
520    }
521
522    /// Disable peer access from another device to allocations in this pool.
523    ///
524    /// # Errors
525    ///
526    /// * [`CudaError::InvalidDevice`] if `peer_device` equals this pool's device.
527    /// * [`CudaError::NotSupported`] on macOS.
528    pub fn disable_peer_access(&self, peer_device: i32) -> CudaResult<()> {
529        if peer_device == self.device {
530            return Err(CudaError::InvalidDevice);
531        }
532
533        self.platform_disable_peer_access(peer_device)
534    }
535
536    /// Reset peak statistics (peak allocated bytes and peak allocation count).
537    pub fn reset_peak_stats(&mut self) {
538        self.peak_allocated = self.total_allocated;
539        self.peak_allocation_count = self.active_allocations;
540    }
541
542    /// Get the default memory pool for a device.
543    ///
544    /// CUDA provides a default pool per device.  On macOS, this returns a
545    /// local-only pool with default configuration.
546    ///
547    /// # Errors
548    ///
549    /// * [`CudaError::InvalidValue`] if `device` is negative.
550    pub fn default_pool(device: i32) -> CudaResult<Self> {
551        if device < 0 {
552            return Err(CudaError::InvalidValue);
553        }
554
555        // On real GPU, we would call cuDeviceGetDefaultMemPool.
556        // For now, return a pool with default config.
557        let config = StreamOrderedAllocConfig::default_for_device(device);
558        Self::new(config)
559    }
560
561    /// Returns the raw pool handle.
562    #[inline]
563    pub fn handle(&self) -> u64 {
564        self.handle
565    }
566
567    /// Returns the device ordinal.
568    #[inline]
569    pub fn device(&self) -> i32 {
570        self.device
571    }
572
573    /// Returns the pool configuration.
574    #[inline]
575    pub fn config(&self) -> &StreamOrderedAllocConfig {
576        &self.config
577    }
578
579    // -----------------------------------------------------------------------
580    // Platform-specific helpers
581    // -----------------------------------------------------------------------
582
583    /// Perform the actual allocation.  On macOS, generates a synthetic pointer.
584    fn platform_alloc_async(&mut self, size: usize, stream: u64) -> CudaResult<CUdeviceptr> {
585        #[cfg(target_os = "macos")]
586        {
587            let _ = stream;
588            // Generate a synthetic, non-zero device pointer for testing.
589            // Each allocation gets a unique "address" based on the pool's
590            // monotonic counter, with a base offset to avoid null.
591            let synthetic_ptr = 0x1000_0000_0000_u64 + self.next_alloc_id * 0x1000;
592            self.next_alloc_id = self.next_alloc_id.wrapping_add(1);
593            let _ = size;
594            Ok(synthetic_ptr)
595        }
596
597        #[cfg(not(target_os = "macos"))]
598        {
599            Self::gpu_alloc_async(self.handle, size, stream)
600        }
601    }
602
603    /// Trim on current platform.
604    fn platform_trim(&mut self, min_bytes_to_keep: usize) -> CudaResult<()> {
605        #[cfg(target_os = "macos")]
606        {
607            let _ = min_bytes_to_keep;
608            Err(CudaError::NotSupported)
609        }
610
611        #[cfg(not(target_os = "macos"))]
612        {
613            Self::gpu_trim(self.handle, min_bytes_to_keep)
614        }
615    }
616
617    /// Set attribute on current platform.
618    fn platform_set_attribute(&self, attr: PoolAttribute) -> CudaResult<()> {
619        #[cfg(target_os = "macos")]
620        {
621            match attr {
622                PoolAttribute::ReleaseThreshold(_) => Ok(()),
623                _ => Err(CudaError::NotSupported),
624            }
625        }
626
627        #[cfg(not(target_os = "macos"))]
628        {
629            Self::gpu_set_attribute(self.handle, attr)
630        }
631    }
632
633    /// Enable peer access on current platform.
634    fn platform_enable_peer_access(&self, peer_device: i32) -> CudaResult<()> {
635        #[cfg(target_os = "macos")]
636        {
637            let _ = peer_device;
638            Err(CudaError::NotSupported)
639        }
640
641        #[cfg(not(target_os = "macos"))]
642        {
643            Self::gpu_enable_peer_access(self.handle, peer_device)
644        }
645    }
646
647    /// Disable peer access on current platform.
648    fn platform_disable_peer_access(&self, peer_device: i32) -> CudaResult<()> {
649        #[cfg(target_os = "macos")]
650        {
651            let _ = peer_device;
652            Err(CudaError::NotSupported)
653        }
654
655        #[cfg(not(target_os = "macos"))]
656        {
657            Self::gpu_disable_peer_access(self.handle, peer_device)
658        }
659    }
660
661    /// Perform the actual free.  On macOS, this is a no-op (synthetic pointers).
662    fn platform_free_async(&self, alloc: &StreamAllocation) -> CudaResult<()> {
663        #[cfg(target_os = "macos")]
664        {
665            let _ = alloc;
666            Ok(())
667        }
668
669        #[cfg(not(target_os = "macos"))]
670        {
671            Self::gpu_free_async(alloc.ptr, alloc.stream)
672        }
673    }
674
675    // -----------------------------------------------------------------------
676    // GPU-only stubs (compiled out on macOS)
677    // -----------------------------------------------------------------------
678
679    /// Create the pool on the GPU via `cuMemPoolCreate`.
680    #[cfg(not(target_os = "macos"))]
681    fn gpu_create_pool(_pool: &Self) -> CudaResult<()> {
682        // In a full implementation, this would call:
683        //   cuMemPoolCreate(&pool_handle, &pool_props)
684        // For now, the pool operates with handle=0 (default pool semantics).
685        Ok(())
686    }
687
688    /// Allocate via `cuMemAllocAsync`.
689    #[cfg(not(target_os = "macos"))]
690    fn gpu_alloc_async(_pool_handle: u64, _size: usize, _stream: u64) -> CudaResult<CUdeviceptr> {
691        // Would call: cuMemAllocAsync(&dptr, size, stream)
692        // For now, return a placeholder.  Real implementation would use
693        // try_driver() and invoke the function pointer.
694        Err(CudaError::NotInitialized)
695    }
696
697    /// Free via `cuMemFreeAsync`.
698    #[cfg(not(target_os = "macos"))]
699    fn gpu_free_async(_ptr: CUdeviceptr, _stream: u64) -> CudaResult<()> {
700        // Would call: cuMemFreeAsync(dptr, stream)
701        Err(CudaError::NotInitialized)
702    }
703
704    /// Trim via `cuMemPoolTrimTo`.
705    #[cfg(not(target_os = "macos"))]
706    fn gpu_trim(_pool_handle: u64, _min_bytes_to_keep: usize) -> CudaResult<()> {
707        // Would call: cuMemPoolTrimTo(pool, minBytesToKeep)
708        Err(CudaError::NotInitialized)
709    }
710
711    /// Set attribute via `cuMemPoolSetAttribute`.
712    #[cfg(not(target_os = "macos"))]
713    fn gpu_set_attribute(_pool_handle: u64, _attr: PoolAttribute) -> CudaResult<()> {
714        // Would call: cuMemPoolSetAttribute(pool, attr, &value)
715        Err(CudaError::NotInitialized)
716    }
717
718    /// Enable peer access via `cuMemPoolExportToShareableHandle` + access control.
719    #[cfg(not(target_os = "macos"))]
720    fn gpu_enable_peer_access(_pool_handle: u64, _peer_device: i32) -> CudaResult<()> {
721        Err(CudaError::NotInitialized)
722    }
723
724    /// Disable peer access.
725    #[cfg(not(target_os = "macos"))]
726    fn gpu_disable_peer_access(_pool_handle: u64, _peer_device: i32) -> CudaResult<()> {
727        Err(CudaError::NotInitialized)
728    }
729}
730
731// ---------------------------------------------------------------------------
732// Convenience free functions
733// ---------------------------------------------------------------------------
734
735/// Allocate memory on a stream using the default pool for device 0.
736///
737/// This is a convenience wrapper around [`StreamMemoryPool::default_pool`]
738/// and [`StreamMemoryPool::alloc_async`].
739///
740/// # Errors
741///
742/// Propagates errors from pool creation and allocation.
743pub fn stream_alloc(size: usize, stream: u64) -> CudaResult<StreamAllocation> {
744    let mut pool = StreamMemoryPool::default_pool(0)?;
745    pool.alloc_async(size, stream)
746}
747
748/// Free a stream-ordered allocation using a temporary default pool.
749///
750/// # Errors
751///
752/// * [`CudaError::InvalidValue`] if the allocation is already freed.
753pub fn stream_free(alloc: &mut StreamAllocation) -> CudaResult<()> {
754    if alloc.freed {
755        return Err(CudaError::InvalidValue);
756    }
757
758    // On macOS, just mark as freed (no real GPU work).
759    #[cfg(target_os = "macos")]
760    {
761        alloc.freed = true;
762        Ok(())
763    }
764
765    #[cfg(not(target_os = "macos"))]
766    {
767        StreamMemoryPool::gpu_free_async(alloc.ptr, alloc.stream)?;
768        alloc.freed = true;
769        Ok(())
770    }
771}
772
773// ---------------------------------------------------------------------------
774// Tests
775// ---------------------------------------------------------------------------
776
777#[cfg(test)]
778mod tests {
779    use super::*;
780
781    // -- Config validation -------------------------------------------------
782
783    #[test]
784    fn config_validate_valid_sizes() {
785        let config = StreamOrderedAllocConfig {
786            initial_pool_size: 1024,
787            max_pool_size: 4096,
788            release_threshold: 512,
789            device: 0,
790        };
791        assert!(config.validate().is_ok());
792    }
793
794    #[test]
795    fn config_validate_unlimited_max() {
796        let config = StreamOrderedAllocConfig {
797            initial_pool_size: 1024 * 1024,
798            max_pool_size: 0, // unlimited
799            release_threshold: 512,
800            device: 0,
801        };
802        assert!(config.validate().is_ok());
803    }
804
805    #[test]
806    fn config_validate_initial_exceeds_max() {
807        let config = StreamOrderedAllocConfig {
808            initial_pool_size: 8192,
809            max_pool_size: 4096,
810            release_threshold: 0,
811            device: 0,
812        };
813        assert_eq!(config.validate(), Err(CudaError::InvalidValue));
814    }
815
816    #[test]
817    fn config_validate_negative_device() {
818        let config = StreamOrderedAllocConfig {
819            initial_pool_size: 0,
820            max_pool_size: 0,
821            release_threshold: 0,
822            device: -1,
823        };
824        assert_eq!(config.validate(), Err(CudaError::InvalidValue));
825    }
826
827    #[test]
828    fn config_validate_threshold_exceeds_max() {
829        let config = StreamOrderedAllocConfig {
830            initial_pool_size: 0,
831            max_pool_size: 1024,
832            release_threshold: 2048,
833            device: 0,
834        };
835        assert_eq!(config.validate(), Err(CudaError::InvalidValue));
836    }
837
838    // -- Default config ----------------------------------------------------
839
840    #[test]
841    fn default_config_for_device() {
842        let config = StreamOrderedAllocConfig::default_for_device(2);
843        assert_eq!(config.device, 2);
844        assert_eq!(config.initial_pool_size, 0);
845        assert_eq!(config.max_pool_size, 0);
846        assert_eq!(config.release_threshold, 0);
847        assert!(config.validate().is_ok());
848    }
849
850    // -- Pool creation -----------------------------------------------------
851
852    #[test]
853    fn pool_creation() {
854        let config = StreamOrderedAllocConfig::default_for_device(0);
855        let pool = StreamMemoryPool::new(config);
856        assert!(pool.is_ok());
857        let pool = pool.ok();
858        assert!(pool.is_some());
859        let pool = pool.map(|p| {
860            assert_eq!(p.device(), 0);
861            assert_eq!(p.active_allocations, 0);
862            assert_eq!(p.total_allocated, 0);
863        });
864        let _ = pool;
865    }
866
867    #[test]
868    fn pool_creation_invalid_config() {
869        let config = StreamOrderedAllocConfig {
870            initial_pool_size: 0,
871            max_pool_size: 0,
872            release_threshold: 0,
873            device: -1,
874        };
875        let result = StreamMemoryPool::new(config);
876        assert!(matches!(result, Err(CudaError::InvalidValue)));
877    }
878
879    // -- alloc_async / free_async -----------------------------------------
880
881    #[cfg(target_os = "macos")]
882    #[test]
883    fn alloc_async_creates_allocation() {
884        let config = StreamOrderedAllocConfig::default_for_device(0);
885        let mut pool = StreamMemoryPool::new(config).ok();
886        assert!(pool.is_some());
887        let pool = pool.as_mut().map(|p| {
888            let alloc = p.alloc_async(1024, 0);
889            assert!(alloc.is_ok());
890            let alloc = alloc.ok();
891            assert!(alloc.is_some());
892            if let Some(a) = &alloc {
893                assert_eq!(a.size(), 1024);
894                assert!(!a.is_freed());
895                assert_ne!(a.as_ptr(), 0);
896                assert_eq!(a.stream(), 0);
897            }
898        });
899        let _ = pool;
900    }
901
902    #[cfg(target_os = "macos")]
903    #[test]
904    fn free_async_marks_freed() {
905        let config = StreamOrderedAllocConfig::default_for_device(0);
906        let mut pool =
907            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
908        let mut alloc = pool
909            .alloc_async(2048, 0)
910            .expect("alloc should succeed on macOS");
911        assert!(!alloc.is_freed());
912        assert!(pool.free_async(&mut alloc).is_ok());
913        assert!(alloc.is_freed());
914        assert_eq!(pool.active_allocations, 0);
915    }
916
917    #[cfg(target_os = "macos")]
918    #[test]
919    fn double_free_returns_error() {
920        let config = StreamOrderedAllocConfig::default_for_device(0);
921        let mut pool =
922            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
923        let mut alloc = pool
924            .alloc_async(512, 0)
925            .expect("alloc should succeed on macOS");
926        assert!(pool.free_async(&mut alloc).is_ok());
927        assert_eq!(pool.free_async(&mut alloc), Err(CudaError::InvalidValue));
928    }
929
930    // -- Trim --------------------------------------------------------------
931
932    #[cfg(target_os = "macos")]
933    #[test]
934    fn trim_returns_not_supported_on_macos() {
935        let config = StreamOrderedAllocConfig::default_for_device(0);
936        let mut pool =
937            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
938        assert_eq!(pool.trim(0), Err(CudaError::NotSupported));
939    }
940
941    // -- Stats tracking ----------------------------------------------------
942
943    #[cfg(target_os = "macos")]
944    #[test]
945    fn stats_tracking() {
946        let config = StreamOrderedAllocConfig::default_for_device(0);
947        let mut pool =
948            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
949
950        let mut a1 = pool.alloc_async(1024, 0).expect("alloc should succeed");
951        let _a2 = pool.alloc_async(2048, 0).expect("alloc should succeed");
952
953        let stats = pool.stats();
954        assert_eq!(stats.active_allocations, 2);
955        assert_eq!(stats.used_current, 3072);
956        assert_eq!(stats.used_high, 3072);
957        assert_eq!(stats.peak_allocations, 2);
958
959        pool.free_async(&mut a1).expect("free should succeed");
960        let stats = pool.stats();
961        assert_eq!(stats.active_allocations, 1);
962        assert_eq!(stats.used_current, 2048);
963        // Peak should remain at 3072.
964        assert_eq!(stats.used_high, 3072);
965    }
966
967    // -- Pool attribute setting --------------------------------------------
968
969    #[cfg(target_os = "macos")]
970    #[test]
971    fn set_attribute_release_threshold() {
972        let config = StreamOrderedAllocConfig::default_for_device(0);
973        let mut pool =
974            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
975        let result = pool.set_attribute(PoolAttribute::ReleaseThreshold(4096));
976        assert!(result.is_ok());
977        assert_eq!(pool.config().release_threshold, 4096);
978    }
979
980    #[test]
981    fn set_attribute_readonly_returns_error() {
982        let config = StreamOrderedAllocConfig::default_for_device(0);
983        let mut pool = StreamMemoryPool::new(config).expect("pool creation should succeed");
984        assert_eq!(
985            pool.set_attribute(PoolAttribute::ReservedMemCurrent),
986            Err(CudaError::InvalidValue)
987        );
988        assert_eq!(
989            pool.set_attribute(PoolAttribute::UsedMemCurrent),
990            Err(CudaError::InvalidValue)
991        );
992    }
993
994    // -- StreamAllocation accessors ----------------------------------------
995
996    #[cfg(target_os = "macos")]
997    #[test]
998    fn allocation_accessors() {
999        let config = StreamOrderedAllocConfig::default_for_device(0);
1000        let mut pool =
1001            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
1002        let alloc = pool.alloc_async(4096, 42).expect("alloc should succeed");
1003        assert_eq!(alloc.size(), 4096);
1004        assert_eq!(alloc.stream(), 42);
1005        assert!(!alloc.is_freed());
1006        assert_ne!(alloc.as_ptr(), 0);
1007        // Debug formatting should not panic.
1008        let _debug = format!("{alloc:?}");
1009    }
1010
1011    // -- Convenience functions ---------------------------------------------
1012
1013    #[cfg(target_os = "macos")]
1014    #[test]
1015    fn convenience_stream_alloc() {
1016        let result = stream_alloc(256, 0);
1017        assert!(result.is_ok());
1018        let alloc = result.expect("should succeed on macOS");
1019        assert_eq!(alloc.size(), 256);
1020        assert!(!alloc.is_freed());
1021    }
1022
1023    #[cfg(target_os = "macos")]
1024    #[test]
1025    fn convenience_stream_free() {
1026        let mut alloc = stream_alloc(128, 0).expect("alloc should succeed on macOS");
1027        assert!(stream_free(&mut alloc).is_ok());
1028        assert!(alloc.is_freed());
1029        // Double free via convenience function.
1030        assert_eq!(stream_free(&mut alloc), Err(CudaError::InvalidValue));
1031    }
1032
1033    // -- Large allocation size ---------------------------------------------
1034
1035    #[cfg(target_os = "macos")]
1036    #[test]
1037    fn large_allocation_size() {
1038        let config = StreamOrderedAllocConfig {
1039            initial_pool_size: 0,
1040            max_pool_size: 0, // unlimited
1041            release_threshold: 0,
1042            device: 0,
1043        };
1044        let mut pool =
1045            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
1046        // 16 GiB allocation (large but valid).
1047        let size = 16 * 1024 * 1024 * 1024_usize;
1048        let alloc = pool.alloc_async(size, 0);
1049        assert!(alloc.is_ok());
1050        let alloc = alloc.expect("should succeed");
1051        assert_eq!(alloc.size(), size);
1052    }
1053
1054    #[cfg(target_os = "macos")]
1055    #[test]
1056    fn alloc_exceeds_max_pool_size() {
1057        let config = StreamOrderedAllocConfig {
1058            initial_pool_size: 0,
1059            max_pool_size: 1024,
1060            release_threshold: 0,
1061            device: 0,
1062        };
1063        let mut pool = StreamMemoryPool::new(config).expect("pool creation should succeed");
1064        assert!(matches!(
1065            pool.alloc_async(2048, 0),
1066            Err(CudaError::OutOfMemory)
1067        ));
1068    }
1069
1070    // -- Peer access -------------------------------------------------------
1071
1072    #[test]
1073    fn peer_access_same_device_error() {
1074        let config = StreamOrderedAllocConfig::default_for_device(0);
1075        let pool = StreamMemoryPool::new(config).expect("pool creation should succeed");
1076        assert_eq!(pool.enable_peer_access(0), Err(CudaError::InvalidDevice));
1077        assert_eq!(pool.disable_peer_access(0), Err(CudaError::InvalidDevice));
1078    }
1079
1080    #[cfg(target_os = "macos")]
1081    #[test]
1082    fn peer_access_not_supported_on_macos() {
1083        let config = StreamOrderedAllocConfig::default_for_device(0);
1084        let pool = StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
1085        assert_eq!(pool.enable_peer_access(1), Err(CudaError::NotSupported));
1086        assert_eq!(pool.disable_peer_access(1), Err(CudaError::NotSupported));
1087    }
1088
1089    // -- Reset peak stats --------------------------------------------------
1090
1091    #[cfg(target_os = "macos")]
1092    #[test]
1093    fn reset_peak_stats() {
1094        let config = StreamOrderedAllocConfig::default_for_device(0);
1095        let mut pool =
1096            StreamMemoryPool::new(config).expect("pool creation should succeed on macOS");
1097
1098        let mut a1 = pool.alloc_async(1024, 0).expect("alloc ok");
1099        let _a2 = pool.alloc_async(2048, 0).expect("alloc ok");
1100        assert_eq!(pool.stats().peak_allocations, 2);
1101        assert_eq!(pool.stats().used_high, 3072);
1102
1103        pool.free_async(&mut a1).expect("free ok");
1104        pool.reset_peak_stats();
1105
1106        let stats = pool.stats();
1107        assert_eq!(stats.used_high, 2048); // reset to current
1108        assert_eq!(stats.peak_allocations, 1); // reset to current
1109    }
1110
1111    // -- Zero-size alloc ---------------------------------------------------
1112
1113    #[test]
1114    fn alloc_zero_size_returns_error() {
1115        let config = StreamOrderedAllocConfig::default_for_device(0);
1116        let mut pool = StreamMemoryPool::new(config).expect("pool creation should succeed");
1117        assert!(matches!(
1118            pool.alloc_async(0, 0),
1119            Err(CudaError::InvalidValue)
1120        ));
1121    }
1122
1123    // -- Default pool ------------------------------------------------------
1124
1125    #[test]
1126    fn default_pool_valid_device() {
1127        let pool = StreamMemoryPool::default_pool(0);
1128        assert!(pool.is_ok());
1129    }
1130
1131    #[test]
1132    fn default_pool_negative_device() {
1133        assert!(matches!(
1134            StreamMemoryPool::default_pool(-1),
1135            Err(CudaError::InvalidValue)
1136        ));
1137    }
1138
1139    // -- PoolAttribute::to_raw ---------------------------------------------
1140
1141    #[test]
1142    fn pool_attribute_to_raw() {
1143        assert_eq!(
1144            PoolAttribute::ReuseFollowEventDependencies.to_raw(),
1145            CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES
1146        );
1147        assert_eq!(
1148            PoolAttribute::ReuseAllowOpportunistic.to_raw(),
1149            CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC
1150        );
1151        assert_eq!(
1152            PoolAttribute::ReuseAllowInternalDependencies.to_raw(),
1153            CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES
1154        );
1155        assert_eq!(
1156            PoolAttribute::ReleaseThreshold(0).to_raw(),
1157            CU_MEMPOOL_ATTR_RELEASE_THRESHOLD
1158        );
1159        assert_eq!(
1160            PoolAttribute::ReservedMemCurrent.to_raw(),
1161            CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT
1162        );
1163        assert_eq!(
1164            PoolAttribute::ReservedMemHigh.to_raw(),
1165            CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH
1166        );
1167        assert_eq!(
1168            PoolAttribute::UsedMemCurrent.to_raw(),
1169            CU_MEMPOOL_ATTR_USED_MEM_CURRENT
1170        );
1171        assert_eq!(
1172            PoolAttribute::UsedMemHigh.to_raw(),
1173            CU_MEMPOOL_ATTR_USED_MEM_HIGH
1174        );
1175    }
1176
1177    // -- ShareableHandleType default ---------------------------------------
1178
1179    #[test]
1180    fn shareable_handle_type_default() {
1181        assert_eq!(ShareableHandleType::default(), ShareableHandleType::None);
1182    }
1183
1184    // -- PoolExportDescriptor construction ---------------------------------
1185
1186    #[test]
1187    fn pool_export_descriptor() {
1188        let desc = PoolExportDescriptor {
1189            shareable_handle_type: ShareableHandleType::PosixFileDescriptor,
1190            pool_device: 0,
1191        };
1192        assert_eq!(
1193            desc.shareable_handle_type,
1194            ShareableHandleType::PosixFileDescriptor
1195        );
1196        assert_eq!(desc.pool_device, 0);
1197    }
1198}