Skip to main content

oxicuda_memory/
host_registered.rs

1//! Host-registered memory for DMA access.
2//!
3//! [`RegisteredMemory<T>`] wraps `cuMemHostRegister` / `cuMemHostUnregister`
4//! to register existing host allocations with the CUDA driver, enabling DMA
5//! transfers without an intermediate staging copy.
6//!
7//! Unlike [`PinnedBuffer`](crate::PinnedBuffer), which allocates *new*
8//! page-locked memory, `RegisteredMemory` works with memory that has
9//! already been allocated (e.g. a `Vec<T>`, a slice from a memory-mapped
10//! file, etc.).
11//!
12//! # Lifetime
13//!
14//! The caller must ensure the underlying allocation outlives the
15//! `RegisteredMemory` handle.  The handle borrows (but does NOT own) the
16//! memory.  On [`Drop`], only `cuMemHostUnregister` is called — the
17//! original allocation is untouched.
18//!
19//! # Example
20//!
21//! ```rust,no_run
22//! # use oxicuda_memory::host_registered::{register_vec, RegisterFlags};
23//! let mut data = vec![0.0f32; 1024];
24//! let reg = register_vec(&mut data, RegisterFlags::DEFAULT)?;
25//! assert_eq!(reg.len(), 1024);
26//! // `data` is now DMA-accessible; use `reg.device_ptr()` on the GPU side.
27//! drop(reg); // cuMemHostUnregister is called here
28//! # Ok::<(), oxicuda_driver::error::CudaError>(())
29//! ```
30
31use std::fmt;
32use std::ops::{BitAnd, BitOr, Deref, DerefMut};
33
34use oxicuda_driver::error::{CudaError, CudaResult};
35use oxicuda_driver::ffi::{
36    CU_MEMHOSTREGISTER_DEVICEMAP, CU_MEMHOSTREGISTER_IOMEMORY, CU_MEMHOSTREGISTER_PORTABLE,
37    CU_MEMHOSTREGISTER_READ_ONLY, CUdeviceptr,
38};
39
40#[cfg(not(target_os = "macos"))]
41use oxicuda_driver::ffi;
42#[cfg(not(target_os = "macos"))]
43use oxicuda_driver::loader::try_driver;
44#[cfg(not(target_os = "macos"))]
45use std::ffi::c_void;
46
47// ---------------------------------------------------------------------------
48// RegisterFlags
49// ---------------------------------------------------------------------------
50
51/// Bitflags controlling how `cuMemHostRegister` registers host memory.
52#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
53pub struct RegisterFlags(u32);
54
55impl RegisterFlags {
56    /// Memory is portable across CUDA contexts.
57    pub const PORTABLE: Self = Self(CU_MEMHOSTREGISTER_PORTABLE);
58
59    /// Memory is mapped into the device address space, enabling zero-copy
60    /// access via `cuMemHostGetDevicePointer`.
61    pub const DEVICE_MAP: Self = Self(CU_MEMHOSTREGISTER_DEVICEMAP);
62
63    /// Pointer refers to I/O memory (not system RAM).
64    pub const IO_MEMORY: Self = Self(CU_MEMHOSTREGISTER_IOMEMORY);
65
66    /// Memory will not be written by the GPU (read-only hint).
67    pub const READ_ONLY: Self = Self(CU_MEMHOSTREGISTER_READ_ONLY);
68
69    /// The recommended default: portable + device-mapped.
70    pub const DEFAULT: Self = Self(CU_MEMHOSTREGISTER_PORTABLE | CU_MEMHOSTREGISTER_DEVICEMAP);
71
72    /// No flags set.
73    pub const NONE: Self = Self(0);
74
75    /// Returns the raw `u32` flag value.
76    #[inline]
77    pub const fn bits(self) -> u32 {
78        self.0
79    }
80
81    /// Creates a `RegisterFlags` from a raw `u32` value.
82    #[inline]
83    pub const fn from_bits(bits: u32) -> Self {
84        Self(bits)
85    }
86
87    /// Returns `true` if `self` contains all flags in `other`.
88    #[inline]
89    pub const fn contains(self, other: Self) -> bool {
90        (self.0 & other.0) == other.0
91    }
92}
93
94impl BitOr for RegisterFlags {
95    type Output = Self;
96
97    #[inline]
98    fn bitor(self, rhs: Self) -> Self {
99        Self(self.0 | rhs.0)
100    }
101}
102
103impl BitAnd for RegisterFlags {
104    type Output = Self;
105
106    #[inline]
107    fn bitand(self, rhs: Self) -> Self {
108        Self(self.0 & rhs.0)
109    }
110}
111
112impl fmt::Display for RegisterFlags {
113    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114        let mut parts = Vec::new();
115        if self.contains(Self::PORTABLE) {
116            parts.push("PORTABLE");
117        }
118        if self.contains(Self::DEVICE_MAP) {
119            parts.push("DEVICE_MAP");
120        }
121        if self.contains(Self::IO_MEMORY) {
122            parts.push("IO_MEMORY");
123        }
124        if self.contains(Self::READ_ONLY) {
125            parts.push("READ_ONLY");
126        }
127        if parts.is_empty() {
128            write!(f, "NONE")
129        } else {
130            write!(f, "{}", parts.join(" | "))
131        }
132    }
133}
134
135// ---------------------------------------------------------------------------
136// RegisteredMemory<T>
137// ---------------------------------------------------------------------------
138
139/// RAII handle for host memory registered with the CUDA driver.
140///
141/// The handle borrows a raw pointer to existing host memory and registers
142/// it via `cuMemHostRegister_v2`.  On [`Drop`], `cuMemHostUnregister` is
143/// called to undo the registration.  The underlying allocation is **not**
144/// freed — that responsibility remains with the original owner.
145///
146/// # Safety invariant
147///
148/// The memory range `[ptr, ptr + len)` must remain valid and not be freed
149/// for the entire lifetime of this handle.
150pub struct RegisteredMemory<T: Copy> {
151    /// Borrowed pointer to the host allocation (NOT owned).
152    ptr: *mut T,
153    /// Number of `T` elements.
154    len: usize,
155    /// Flags used during registration.
156    flags: RegisterFlags,
157    /// Device-visible pointer obtained from registration (if DEVICE_MAP).
158    device_ptr: CUdeviceptr,
159}
160
161// SAFETY: The registered host memory is not thread-local; it is accessible
162// from any thread once registered with the CUDA driver.
163unsafe impl<T: Copy + Send> Send for RegisteredMemory<T> {}
164unsafe impl<T: Copy + Sync> Sync for RegisteredMemory<T> {}
165
166impl<T: Copy> RegisteredMemory<T> {
167    /// Returns a raw const pointer to the registered memory.
168    #[inline]
169    pub fn as_ptr(&self) -> *const T {
170        self.ptr
171    }
172
173    /// Returns a raw mutable pointer to the registered memory.
174    #[inline]
175    pub fn as_mut_ptr(&mut self) -> *mut T {
176        self.ptr
177    }
178
179    /// Returns the device-visible pointer for the registered memory.
180    ///
181    /// This is only meaningful when the `DEVICE_MAP` flag was set.
182    #[inline]
183    pub fn device_ptr(&self) -> CUdeviceptr {
184        self.device_ptr
185    }
186
187    /// Returns the number of `T` elements in the registered range.
188    #[inline]
189    pub fn len(&self) -> usize {
190        self.len
191    }
192
193    /// Returns `true` if the registered range contains zero elements.
194    #[inline]
195    pub fn is_empty(&self) -> bool {
196        self.len == 0
197    }
198
199    /// Returns the flags used when the memory was registered.
200    #[inline]
201    pub fn flags(&self) -> RegisterFlags {
202        self.flags
203    }
204
205    /// Returns a shared slice over the registered memory.
206    #[inline]
207    pub fn as_slice(&self) -> &[T] {
208        // SAFETY: the caller guaranteed the memory is valid for `self.len`
209        // elements, and we have `&self` so no mutable alias exists.
210        unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
211    }
212
213    /// Returns a mutable slice over the registered memory.
214    #[inline]
215    pub fn as_mut_slice(&mut self) -> &mut [T] {
216        // SAFETY: the caller guaranteed the memory is valid for `self.len`
217        // elements, and we have `&mut self` so no other alias exists.
218        unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) }
219    }
220}
221
222impl<T: Copy> Deref for RegisteredMemory<T> {
223    type Target = [T];
224
225    #[inline]
226    fn deref(&self) -> &[T] {
227        self.as_slice()
228    }
229}
230
231impl<T: Copy> DerefMut for RegisteredMemory<T> {
232    #[inline]
233    fn deref_mut(&mut self) -> &mut [T] {
234        self.as_mut_slice()
235    }
236}
237
238impl<T: Copy> Drop for RegisteredMemory<T> {
239    fn drop(&mut self) {
240        #[cfg(not(target_os = "macos"))]
241        {
242            if let Ok(api) = try_driver() {
243                let rc = unsafe { (api.cu_mem_host_unregister)(self.ptr.cast::<c_void>()) };
244                if rc != 0 {
245                    tracing::warn!(
246                        cuda_error = rc,
247                        len = self.len,
248                        "cuMemHostUnregister failed during RegisteredMemory drop"
249                    );
250                }
251            }
252        }
253    }
254}
255
256// ---------------------------------------------------------------------------
257// Public registration functions
258// ---------------------------------------------------------------------------
259
260/// Registers an existing host memory range with the CUDA driver for DMA.
261///
262/// # Safety contract (upheld by the caller)
263///
264/// * `ptr` must point to a valid allocation of at least `len * size_of::<T>()` bytes.
265/// * The allocation must remain valid for the lifetime of the returned handle.
266///
267/// # Errors
268///
269/// * [`CudaError::InvalidValue`] if `len` is zero or the byte size overflows.
270/// * [`CudaError::NotSupported`] on macOS.
271/// * Other driver errors from `cuMemHostRegister_v2`.
272pub fn register<T: Copy>(
273    ptr: *mut T,
274    len: usize,
275    flags: RegisterFlags,
276) -> CudaResult<RegisteredMemory<T>> {
277    if len == 0 {
278        return Err(CudaError::InvalidValue);
279    }
280    if ptr.is_null() {
281        return Err(CudaError::InvalidValue);
282    }
283    let byte_size = len
284        .checked_mul(std::mem::size_of::<T>())
285        .ok_or(CudaError::InvalidValue)?;
286
287    #[cfg(target_os = "macos")]
288    {
289        // On macOS there is no CUDA driver.  Return a synthetic handle so
290        // that unit tests can exercise the API surface without a GPU.
291        let _ = byte_size;
292        Ok(RegisteredMemory {
293            ptr,
294            len,
295            flags,
296            device_ptr: ptr as CUdeviceptr,
297        })
298    }
299
300    #[cfg(not(target_os = "macos"))]
301    {
302        let api = try_driver()?;
303
304        // Register the host memory range.
305        let rc =
306            unsafe { (api.cu_mem_host_register_v2)(ptr.cast::<c_void>(), byte_size, flags.bits()) };
307        oxicuda_driver::check(rc)?;
308
309        // If DEVICE_MAP is set, obtain the device pointer.
310        let device_ptr = if flags.contains(RegisterFlags::DEVICE_MAP) {
311            let mut dptr: CUdeviceptr = 0;
312            let rc2 = unsafe {
313                (api.cu_mem_host_get_device_pointer_v2)(&mut dptr, ptr.cast::<c_void>(), 0)
314            };
315            oxicuda_driver::check(rc2)?;
316            dptr
317        } else {
318            0
319        };
320
321        Ok(RegisteredMemory {
322            ptr,
323            len,
324            flags,
325            device_ptr,
326        })
327    }
328}
329
330/// Convenience: registers a mutable slice with the CUDA driver.
331///
332/// # Errors
333///
334/// Same as [`register`].
335pub fn register_slice<T: Copy>(
336    slice: &mut [T],
337    flags: RegisterFlags,
338) -> CudaResult<RegisteredMemory<T>> {
339    register(slice.as_mut_ptr(), slice.len(), flags)
340}
341
342/// Convenience: registers a `Vec<T>` with the CUDA driver.
343///
344/// The `Vec` must not be reallocated (e.g. via `push`, `resize`) while the
345/// returned handle is alive, as that would invalidate the registered pointer.
346///
347/// # Errors
348///
349/// Same as [`register`].
350pub fn register_vec<T: Copy>(
351    vec: &mut Vec<T>,
352    flags: RegisterFlags,
353) -> CudaResult<RegisteredMemory<T>> {
354    register(vec.as_mut_ptr(), vec.len(), flags)
355}
356
357// ---------------------------------------------------------------------------
358// Pointer query
359// ---------------------------------------------------------------------------
360
361/// The type of memory backing a registered pointer.
362#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
363pub enum RegisteredMemoryType {
364    /// Host (system) memory.
365    Host,
366    /// Device (GPU) memory.
367    Device,
368    /// Unified (managed) memory.
369    Unified,
370    /// Pointer is not registered with CUDA.
371    Unregistered,
372}
373
374impl fmt::Display for RegisteredMemoryType {
375    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
376        match self {
377            Self::Host => write!(f, "Host"),
378            Self::Device => write!(f, "Device"),
379            Self::Unified => write!(f, "Unified"),
380            Self::Unregistered => write!(f, "Unregistered"),
381        }
382    }
383}
384
385/// Information about a pointer registered with the CUDA driver.
386#[derive(Debug, Clone, Copy)]
387pub struct RegisteredPointerInfo {
388    /// Device pointer corresponding to the registered host pointer.
389    pub device_ptr: CUdeviceptr,
390    /// Whether the memory is managed (unified).
391    pub is_managed: bool,
392    /// The type of memory backing the pointer.
393    pub memory_type: RegisteredMemoryType,
394}
395
396/// Queries the CUDA driver for information about a registered pointer.
397///
398/// # Errors
399///
400/// * [`CudaError::NotSupported`] on macOS.
401/// * [`CudaError::InvalidValue`] if the pointer is not known to the driver.
402/// * Other driver errors from `cuPointerGetAttribute`.
403pub fn query_registered_pointer_info(ptr: *const u8) -> CudaResult<RegisteredPointerInfo> {
404    if ptr.is_null() {
405        return Err(CudaError::InvalidValue);
406    }
407
408    #[cfg(target_os = "macos")]
409    {
410        // Synthetic response for macOS tests.
411        Ok(RegisteredPointerInfo {
412            device_ptr: ptr as CUdeviceptr,
413            is_managed: false,
414            memory_type: RegisteredMemoryType::Host,
415        })
416    }
417
418    #[cfg(not(target_os = "macos"))]
419    {
420        let api = try_driver()?;
421        let dev_ptr_val = ptr as CUdeviceptr;
422
423        // Query memory type.
424        let mut mem_type: u32 = 0;
425        let rc = unsafe {
426            (api.cu_pointer_get_attribute)(
427                (&mut mem_type as *mut u32).cast::<c_void>(),
428                ffi::CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
429                dev_ptr_val,
430            )
431        };
432        let memory_type = if rc != 0 {
433            // If the query fails, the pointer is likely unregistered.
434            RegisteredMemoryType::Unregistered
435        } else {
436            match mem_type {
437                ffi::CU_MEMORYTYPE_HOST => RegisteredMemoryType::Host,
438                ffi::CU_MEMORYTYPE_DEVICE => RegisteredMemoryType::Device,
439                ffi::CU_MEMORYTYPE_UNIFIED => RegisteredMemoryType::Unified,
440                _ => RegisteredMemoryType::Unregistered,
441            }
442        };
443
444        // Query is_managed.
445        let mut managed: u32 = 0;
446        let rc2 = unsafe {
447            (api.cu_pointer_get_attribute)(
448                (&mut managed as *mut u32).cast::<c_void>(),
449                ffi::CU_POINTER_ATTRIBUTE_IS_MANAGED,
450                dev_ptr_val,
451            )
452        };
453        let is_managed = rc2 == 0 && managed != 0;
454
455        // Query device pointer.
456        let mut dptr: CUdeviceptr = 0;
457        let rc3 = unsafe {
458            (api.cu_pointer_get_attribute)(
459                (&mut dptr as *mut CUdeviceptr).cast::<c_void>(),
460                ffi::CU_POINTER_ATTRIBUTE_DEVICE_POINTER,
461                dev_ptr_val,
462            )
463        };
464        if rc3 != 0 {
465            dptr = 0;
466        }
467
468        Ok(RegisteredPointerInfo {
469            device_ptr: dptr,
470            is_managed,
471            memory_type,
472        })
473    }
474}
475
476// ---------------------------------------------------------------------------
477// Tests
478// ---------------------------------------------------------------------------
479
480#[cfg(test)]
481mod tests {
482    use super::*;
483
484    // -- RegisterFlags tests -----------------------------------------------
485
486    #[test]
487    fn flags_default_contains_portable_and_device_map() {
488        assert!(RegisterFlags::DEFAULT.contains(RegisterFlags::PORTABLE));
489        assert!(RegisterFlags::DEFAULT.contains(RegisterFlags::DEVICE_MAP));
490        assert!(!RegisterFlags::DEFAULT.contains(RegisterFlags::IO_MEMORY));
491        assert!(!RegisterFlags::DEFAULT.contains(RegisterFlags::READ_ONLY));
492    }
493
494    #[test]
495    fn flags_bitor_combines() {
496        let combined = RegisterFlags::PORTABLE | RegisterFlags::READ_ONLY;
497        assert!(combined.contains(RegisterFlags::PORTABLE));
498        assert!(combined.contains(RegisterFlags::READ_ONLY));
499        assert!(!combined.contains(RegisterFlags::IO_MEMORY));
500    }
501
502    #[test]
503    fn flags_bitand_intersects() {
504        let a = RegisterFlags::PORTABLE | RegisterFlags::DEVICE_MAP;
505        let b = RegisterFlags::PORTABLE | RegisterFlags::READ_ONLY;
506        let intersected = a & b;
507        assert!(intersected.contains(RegisterFlags::PORTABLE));
508        assert!(!intersected.contains(RegisterFlags::DEVICE_MAP));
509        assert!(!intersected.contains(RegisterFlags::READ_ONLY));
510    }
511
512    #[test]
513    fn flags_display() {
514        assert_eq!(RegisterFlags::NONE.to_string(), "NONE");
515        assert_eq!(RegisterFlags::PORTABLE.to_string(), "PORTABLE");
516        let default_str = RegisterFlags::DEFAULT.to_string();
517        assert!(default_str.contains("PORTABLE"));
518        assert!(default_str.contains("DEVICE_MAP"));
519    }
520
521    #[test]
522    fn flags_bits_roundtrip() {
523        let flags = RegisterFlags::PORTABLE | RegisterFlags::IO_MEMORY;
524        let bits = flags.bits();
525        assert_eq!(RegisterFlags::from_bits(bits), flags);
526    }
527
528    #[test]
529    fn flags_none_is_zero() {
530        assert_eq!(RegisterFlags::NONE.bits(), 0);
531    }
532
533    // -- RegisteredMemoryType tests ----------------------------------------
534
535    #[test]
536    fn memory_type_display() {
537        assert_eq!(RegisteredMemoryType::Host.to_string(), "Host");
538        assert_eq!(RegisteredMemoryType::Device.to_string(), "Device");
539        assert_eq!(RegisteredMemoryType::Unified.to_string(), "Unified");
540        assert_eq!(
541            RegisteredMemoryType::Unregistered.to_string(),
542            "Unregistered"
543        );
544    }
545
546    #[test]
547    fn memory_type_equality() {
548        assert_eq!(RegisteredMemoryType::Host, RegisteredMemoryType::Host);
549        assert_ne!(RegisteredMemoryType::Host, RegisteredMemoryType::Device);
550    }
551
552    // -- register / RegisteredMemory tests ---------------------------------
553
554    #[test]
555    fn register_zero_len_fails() {
556        let mut buf = [0u8; 16];
557        let result = register(buf.as_mut_ptr(), 0, RegisterFlags::DEFAULT);
558        assert!(matches!(result, Err(CudaError::InvalidValue)));
559    }
560
561    #[test]
562    fn register_null_ptr_fails() {
563        let result = register::<u8>(std::ptr::null_mut(), 10, RegisterFlags::DEFAULT);
564        assert!(matches!(result, Err(CudaError::InvalidValue)));
565    }
566
567    #[test]
568    fn register_slice_zero_len_fails() {
569        let mut empty: [f32; 0] = [];
570        let result = register_slice(&mut empty, RegisterFlags::DEFAULT);
571        assert!(matches!(result, Err(CudaError::InvalidValue)));
572    }
573
574    #[test]
575    fn register_vec_zero_len_fails() {
576        let mut v: Vec<i32> = Vec::new();
577        let result = register_vec(&mut v, RegisterFlags::DEFAULT);
578        assert!(matches!(result, Err(CudaError::InvalidValue)));
579    }
580
581    #[test]
582    fn query_null_ptr_fails() {
583        let result = query_registered_pointer_info(std::ptr::null());
584        assert!(matches!(result, Err(CudaError::InvalidValue)));
585    }
586
587    // -- macOS synthetic tests (these run on all platforms for validation) --
588
589    #[cfg(target_os = "macos")]
590    mod macos_tests {
591        use super::*;
592
593        #[test]
594        fn register_slice_succeeds_on_macos() {
595            let mut data = vec![1.0f32, 2.0, 3.0, 4.0];
596            let reg = register_slice(data.as_mut_slice(), RegisterFlags::DEFAULT);
597            let reg = reg.ok();
598            assert!(reg.is_some());
599            let reg = reg.inspect(|r| {
600                assert_eq!(r.len(), 4);
601                assert!(!r.is_empty());
602                assert_eq!(r.flags(), RegisterFlags::DEFAULT);
603                assert_eq!(r.as_slice(), &[1.0, 2.0, 3.0, 4.0]);
604            });
605            drop(reg);
606        }
607
608        #[test]
609        fn register_vec_succeeds_on_macos() {
610            let mut v = vec![10u32, 20, 30];
611            let reg = register_vec(&mut v, RegisterFlags::PORTABLE);
612            assert!(reg.is_ok());
613            if let Ok(r) = reg {
614                assert_eq!(r.len(), 3);
615                assert_eq!(r.flags(), RegisterFlags::PORTABLE);
616                assert_ne!(r.device_ptr(), 0);
617            }
618        }
619
620        #[test]
621        fn registered_memory_deref_works() {
622            let mut data = vec![100i64, 200, 300];
623            let reg = register_vec(&mut data, RegisterFlags::DEFAULT);
624            assert!(reg.is_ok());
625            if let Ok(r) = reg {
626                // Deref to &[T]
627                let slice: &[i64] = &r;
628                assert_eq!(slice.len(), 3);
629                assert_eq!(slice[0], 100);
630                assert_eq!(slice[2], 300);
631            }
632        }
633
634        #[test]
635        fn registered_memory_deref_mut_works() {
636            let mut data = vec![1u8, 2, 3, 4, 5];
637            let reg = register_slice(&mut data, RegisterFlags::DEFAULT);
638            assert!(reg.is_ok());
639            if let Ok(mut r) = reg {
640                r[0] = 99;
641                assert_eq!(r[0], 99);
642                let mslice: &mut [u8] = &mut r;
643                mslice[4] = 88;
644                assert_eq!(mslice[4], 88);
645            }
646        }
647
648        #[test]
649        fn query_pointer_info_on_macos() {
650            let data = [42u8; 64];
651            let info = query_registered_pointer_info(data.as_ptr());
652            assert!(info.is_ok());
653            if let Ok(info) = info {
654                assert!(!info.is_managed);
655                assert_eq!(info.memory_type, RegisteredMemoryType::Host);
656                assert_ne!(info.device_ptr, 0);
657            }
658        }
659
660        #[test]
661        fn registered_memory_as_ptr_mut_ptr() {
662            let mut data = vec![5.0f64; 10];
663            let original_ptr = data.as_mut_ptr();
664            let reg = register_vec(&mut data, RegisterFlags::DEFAULT);
665            assert!(reg.is_ok());
666            if let Ok(mut r) = reg {
667                assert_eq!(r.as_ptr(), original_ptr as *const f64);
668                assert_eq!(r.as_mut_ptr(), original_ptr);
669            }
670        }
671    }
672
673    // -- GPU integration tests (require real hardware) ---------------------
674
675    #[cfg(feature = "gpu-tests")]
676    mod gpu_tests {
677        use super::*;
678
679        #[test]
680        fn register_and_unregister_on_gpu() {
681            // cuMemHostRegister requires an active CUDA context bound to the
682            // calling thread.  Create one via Context::new (which calls
683            // cuCtxCreate, making the context current).  Skip the test if no
684            // GPU or driver is available.
685            if oxicuda_driver::init().is_err() || oxicuda_driver::Device::count().unwrap_or(0) == 0
686            {
687                return;
688            }
689            let Ok(dev) = oxicuda_driver::Device::get(0) else {
690                return;
691            };
692            let Ok(_ctx) = oxicuda_driver::Context::new(&dev) else {
693                return;
694            };
695            // _ctx keeps the CUDA context alive and current for this thread.
696
697            let mut data = vec![0.0f32; 4096];
698            let reg = register_vec(&mut data, RegisterFlags::DEFAULT);
699            assert!(reg.is_ok(), "registration failed: {:?}", reg.err());
700            if let Ok(r) = reg {
701                assert_eq!(r.len(), 4096);
702                assert!(r.device_ptr() != 0, "device_ptr should be non-zero");
703            }
704        }
705    }
706}