Skip to main content

oxicuda_runtime/
texture.rs

1//! Texture and surface memory — CUDA array allocation and bindless objects.
2//!
3//! Implements the following CUDA Runtime API families:
4//!
5//! - **Array management**: `cudaMallocArray`, `cudaFreeArray`,
6//!   `cudaMalloc3DArray`, `cudaArrayGetInfo`
7//! - **Host-to-array copies**: `cudaMemcpyToArray`, `cudaMemcpyFromArray`,
8//!   `cudaMemcpyToArrayAsync`, `cudaMemcpyFromArrayAsync`
9//! - **Texture objects (bindless)**: `cudaCreateTextureObject`,
10//!   `cudaDestroyTextureObject`, `cudaGetTextureObjectResourceDesc`
11//! - **Surface objects (bindless)**: `cudaCreateSurfaceObject`,
12//!   `cudaDestroySurfaceObject`
13
14use std::ffi::c_void;
15
16use oxicuda_driver::ffi::{
17    CUDA_ARRAY_DESCRIPTOR, CUDA_ARRAY3D_DESCRIPTOR, CUDA_RESOURCE_DESC, CUDA_RESOURCE_VIEW_DESC,
18    CUDA_TEXTURE_DESC, CUaddress_mode, CUarray, CUarray_format, CUfilter_mode, CUmipmappedArray,
19    CUresourceViewFormat, CUresourcetype, CUsurfObject, CUtexObject, CudaResourceDescArray,
20    CudaResourceDescLinear, CudaResourceDescMipmap, CudaResourceDescPitch2d, CudaResourceDescRes,
21};
22use oxicuda_driver::loader::try_driver;
23use oxicuda_driver::{
24    CU_TRSF_NORMALIZED_COORDINATES, CU_TRSF_READ_AS_INTEGER, CU_TRSF_SRGB, CUDA_ARRAY3D_CUBEMAP,
25    CUDA_ARRAY3D_LAYERED, CUDA_ARRAY3D_SURFACE_LDST, CUDA_ARRAY3D_TEXTURE_GATHER,
26};
27
28use crate::error::{CudaRtError, CudaRtResult};
29use crate::memory::DevicePtr;
30use crate::stream::CudaStream;
31
32// ─── Channel Format ───────────────────────────────────────────────────────────
33
34/// Element format for each channel in a CUDA array.
35///
36/// Mirrors `cudaChannelFormatKind` / `CUarray_format`.
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
38pub enum ArrayFormat {
39    /// 8-bit unsigned integer.
40    UnsignedInt8,
41    /// 16-bit unsigned integer.
42    UnsignedInt16,
43    /// 32-bit unsigned integer.
44    UnsignedInt32,
45    /// 8-bit signed integer.
46    SignedInt8,
47    /// 16-bit signed integer.
48    SignedInt16,
49    /// 32-bit signed integer.
50    SignedInt32,
51    /// 16-bit float (half precision).
52    Half,
53    /// 32-bit float (single precision).
54    Float,
55}
56
57impl ArrayFormat {
58    /// Convert to the driver-API [`CUarray_format`].
59    #[must_use]
60    pub const fn as_cu_format(self) -> CUarray_format {
61        match self {
62            Self::UnsignedInt8 => CUarray_format::UnsignedInt8,
63            Self::UnsignedInt16 => CUarray_format::UnsignedInt16,
64            Self::UnsignedInt32 => CUarray_format::UnsignedInt32,
65            Self::SignedInt8 => CUarray_format::SignedInt8,
66            Self::SignedInt16 => CUarray_format::SignedInt16,
67            Self::SignedInt32 => CUarray_format::SignedInt32,
68            Self::Half => CUarray_format::Half,
69            Self::Float => CUarray_format::Float,
70        }
71    }
72
73    /// Element byte width for one channel.
74    #[must_use]
75    pub const fn bytes_per_channel(self) -> usize {
76        match self {
77            Self::UnsignedInt8 | Self::SignedInt8 => 1,
78            Self::UnsignedInt16 | Self::SignedInt16 | Self::Half => 2,
79            Self::UnsignedInt32 | Self::SignedInt32 | Self::Float => 4,
80        }
81    }
82}
83
84// ─── Texture Address Mode ─────────────────────────────────────────────────────
85
86/// Texture coordinate wrapping mode (maps to `cudaTextureAddressMode`).
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
88pub enum AddressMode {
89    /// Coordinates wrap (tile).
90    Wrap,
91    /// Coordinates are clamped to the boundary.
92    Clamp,
93    /// Coordinates are mirrored at every boundary.
94    Mirror,
95    /// Out-of-range coordinates return the border colour.
96    Border,
97}
98
99impl AddressMode {
100    #[must_use]
101    const fn as_cu(self) -> CUaddress_mode {
102        match self {
103            Self::Wrap => CUaddress_mode::Wrap,
104            Self::Clamp => CUaddress_mode::Clamp,
105            Self::Mirror => CUaddress_mode::Mirror,
106            Self::Border => CUaddress_mode::Border,
107        }
108    }
109}
110
111// ─── Texture Filter Mode ──────────────────────────────────────────────────────
112
113/// Texture sampling filter mode (maps to `cudaTextureFilterMode`).
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
115pub enum FilterMode {
116    /// Nearest-neighbor (point) sampling.
117    Point,
118    /// Bilinear (linear) interpolation.
119    Linear,
120}
121
122impl FilterMode {
123    #[must_use]
124    const fn as_cu(self) -> CUfilter_mode {
125        match self {
126            Self::Point => CUfilter_mode::Point,
127            Self::Linear => CUfilter_mode::Linear,
128        }
129    }
130}
131
132// ─── CudaArray ────────────────────────────────────────────────────────────────
133
134/// RAII wrapper for a CUDA array (1-D or 2-D).
135///
136/// Created by [`CudaArray::create_1d`] or [`CudaArray::create_2d`]; freed by [`Drop`].
137///
138/// A `CudaArray` can be bound to a [`CudaTextureObject`] or
139/// [`CudaSurfaceObject`] for hardware-accelerated sampling.
140pub struct CudaArray {
141    handle: CUarray,
142    width: usize,
143    height: usize,
144    format: ArrayFormat,
145    num_channels: u32,
146}
147
148impl CudaArray {
149    /// Allocate a 1-D CUDA array with `width` elements of the given format and
150    /// channel count (`num_channels` must be 1, 2, or 4).
151    ///
152    /// Mirrors `cudaMallocArray` (1-D form).
153    ///
154    /// # Errors
155    ///
156    /// Returns [`CudaRtError::NotSupported`] if the driver does not expose
157    /// `cuArrayCreate_v2`, or propagates driver errors.
158    pub fn create_1d(width: usize, format: ArrayFormat, num_channels: u32) -> CudaRtResult<Self> {
159        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
160        let create_fn = api.cu_array_create_v2.ok_or(CudaRtError::NotSupported)?;
161        let desc = CUDA_ARRAY_DESCRIPTOR {
162            width,
163            height: 0,
164            format: format.as_cu_format(),
165            num_channels,
166        };
167        let mut handle = CUarray::default();
168        // SAFETY: desc is valid, handle is initialized after the call succeeds.
169        let rc = unsafe { create_fn(&raw mut handle, &desc) };
170        if rc != 0 {
171            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::MemoryAllocation));
172        }
173        Ok(Self {
174            handle,
175            width,
176            height: 0,
177            format,
178            num_channels,
179        })
180    }
181
182    /// Allocate a 2-D CUDA array with `width × height` elements.
183    ///
184    /// Mirrors `cudaMallocArray` (2-D form).
185    ///
186    /// # Errors
187    ///
188    /// Returns [`CudaRtError::NotSupported`] if `cuArrayCreate_v2` is absent.
189    pub fn create_2d(
190        width: usize,
191        height: usize,
192        format: ArrayFormat,
193        num_channels: u32,
194    ) -> CudaRtResult<Self> {
195        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
196        let create_fn = api.cu_array_create_v2.ok_or(CudaRtError::NotSupported)?;
197        let desc = CUDA_ARRAY_DESCRIPTOR {
198            width,
199            height,
200            format: format.as_cu_format(),
201            num_channels,
202        };
203        let mut handle = CUarray::default();
204        // SAFETY: FFI.
205        let rc = unsafe { create_fn(&raw mut handle, &desc) };
206        if rc != 0 {
207            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::MemoryAllocation));
208        }
209        Ok(Self {
210            handle,
211            width,
212            height,
213            format,
214            num_channels,
215        })
216    }
217
218    /// Copy a contiguous host buffer into the entire array (synchronous).
219    ///
220    /// `data` must contain exactly `width * height.max(1) * num_channels`
221    /// elements of the appropriate type.
222    ///
223    /// Mirrors `cudaMemcpyToArray` (host-to-array).
224    ///
225    /// # Errors
226    ///
227    /// Returns an error if the driver does not support `cuMemcpyHtoA_v2` or if
228    /// the copy fails.
229    ///
230    /// # Safety
231    ///
232    /// `src` must be valid for reading `byte_count` bytes.
233    pub unsafe fn copy_from_host_raw(
234        &self,
235        src: *const c_void,
236        byte_count: usize,
237    ) -> CudaRtResult<()> {
238        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
239        let f = api.cu_memcpy_htoa_v2.ok_or(CudaRtError::NotSupported)?;
240        // SAFETY: src is caller-guaranteed valid for `byte_count` bytes.
241        let rc = unsafe { f(self.handle, 0, src, byte_count) };
242        if rc != 0 {
243            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidMemcpyDirection));
244        }
245        Ok(())
246    }
247
248    /// Copy a typed host slice into the array (synchronous, type-safe helper).
249    ///
250    /// # Errors
251    ///
252    /// Forwards errors from [`Self::copy_from_host_raw`].
253    pub fn copy_from_host<T: Copy>(&self, src: &[T]) -> CudaRtResult<()> {
254        // SAFETY: src is a valid slice reference, so the pointer and size are valid.
255        unsafe {
256            self.copy_from_host_raw(src.as_ptr().cast::<c_void>(), std::mem::size_of_val(src))
257        }
258    }
259
260    /// Copy the entire array into a host buffer (synchronous, raw pointer).
261    ///
262    /// Mirrors `cudaMemcpyFromArray` (array-to-host).
263    ///
264    /// # Errors
265    ///
266    /// Returns an error if `cuMemcpyAtoH_v2` is absent or the copy fails.
267    ///
268    /// # Safety
269    ///
270    /// `dst` must be valid for writing `byte_count` bytes.
271    pub unsafe fn copy_to_host_raw(&self, dst: *mut c_void, byte_count: usize) -> CudaRtResult<()> {
272        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
273        let f = api.cu_memcpy_atoh_v2.ok_or(CudaRtError::NotSupported)?;
274        // SAFETY: dst is caller-guaranteed valid for `byte_count` bytes.
275        let rc = unsafe { f(dst, self.handle, 0, byte_count) };
276        if rc != 0 {
277            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidMemcpyDirection));
278        }
279        Ok(())
280    }
281
282    /// Copy the entire array into a typed host slice (synchronous, type-safe).
283    ///
284    /// # Errors
285    ///
286    /// Forwards errors from [`Self::copy_to_host_raw`].
287    pub fn copy_to_host<T: Copy>(&self, dst: &mut [T]) -> CudaRtResult<()> {
288        // SAFETY: dst is a valid mutable slice reference, so the pointer and size are valid.
289        unsafe {
290            self.copy_to_host_raw(
291                dst.as_mut_ptr().cast::<c_void>(),
292                std::mem::size_of_val(dst),
293            )
294        }
295    }
296
297    /// Asynchronously copy a host buffer into the array on `stream`.
298    ///
299    /// Mirrors `cudaMemcpyToArrayAsync`.
300    ///
301    /// # Errors
302    ///
303    /// Returns an error if `cuMemcpyHtoAAsync_v2` is absent.
304    ///
305    /// # Safety
306    ///
307    /// The caller must ensure `src` remains valid until the stream operation
308    /// completes (i.e., until the stream is synchronized).
309    pub unsafe fn copy_from_host_async_raw(
310        &self,
311        src: *const c_void,
312        byte_count: usize,
313        stream: CudaStream,
314    ) -> CudaRtResult<()> {
315        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
316        let f = api
317            .cu_memcpy_htoa_async_v2
318            .ok_or(CudaRtError::NotSupported)?;
319        // SAFETY: caller guarantees src + lifetime.
320        let rc = unsafe { f(self.handle, 0, src, byte_count, stream.raw()) };
321        if rc != 0 {
322            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidMemcpyDirection));
323        }
324        Ok(())
325    }
326
327    /// Returns the raw [`CUarray`] handle (for use in resource descriptors).
328    #[must_use]
329    pub fn raw(&self) -> CUarray {
330        self.handle
331    }
332
333    /// Width of the array in elements.
334    #[must_use]
335    pub const fn width(&self) -> usize {
336        self.width
337    }
338
339    /// Height of the array in elements (0 for 1-D arrays).
340    #[must_use]
341    pub const fn height(&self) -> usize {
342        self.height
343    }
344
345    /// Element format of this array.
346    #[must_use]
347    pub const fn format(&self) -> ArrayFormat {
348        self.format
349    }
350
351    /// Number of channels (1, 2, or 4).
352    #[must_use]
353    pub const fn num_channels(&self) -> u32 {
354        self.num_channels
355    }
356}
357
358impl Drop for CudaArray {
359    fn drop(&mut self) {
360        if let Ok(api) = try_driver() {
361            if let Some(f) = api.cu_array_destroy {
362                // SAFETY: handle was created by cuArrayCreate_v2 and not yet freed.
363                unsafe { f(self.handle) };
364            }
365        }
366    }
367}
368
369// ─── CudaArray3D ─────────────────────────────────────────────────────────────
370
371/// Flags for 3-D CUDA array creation.
372#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
373pub struct Array3DFlags(pub u32);
374
375impl Array3DFlags {
376    /// No special flags.
377    pub const DEFAULT: Self = Self(0);
378    /// Layered array (depth = number of layers).
379    pub const LAYERED: Self = Self(CUDA_ARRAY3D_LAYERED);
380    /// Supports surface load/store.
381    pub const SURFACE_LDST: Self = Self(CUDA_ARRAY3D_SURFACE_LDST);
382    /// Cubemap array (depth = 6 × num_layers).
383    pub const CUBEMAP: Self = Self(CUDA_ARRAY3D_CUBEMAP);
384    /// Supports texture gather operations.
385    pub const TEXTURE_GATHER: Self = Self(CUDA_ARRAY3D_TEXTURE_GATHER);
386
387    /// Combine flags with bitwise OR.
388    #[must_use]
389    pub const fn or(self, other: Self) -> Self {
390        Self(self.0 | other.0)
391    }
392}
393
394/// RAII wrapper for a 3-D (or layered / cubemap) CUDA array.
395pub struct CudaArray3D {
396    handle: CUarray,
397    width: usize,
398    height: usize,
399    depth: usize,
400    format: ArrayFormat,
401    num_channels: u32,
402    flags: Array3DFlags,
403}
404
405impl CudaArray3D {
406    /// Allocate a 3-D CUDA array.
407    ///
408    /// `depth = 0` is valid for 1-D and 2-D arrays allocated via the 3-D API;
409    /// for layered arrays it specifies the number of layers.
410    ///
411    /// Mirrors `cudaMalloc3DArray`.
412    ///
413    /// # Errors
414    ///
415    /// Returns [`CudaRtError::NotSupported`] if `cuArray3DCreate_v2` is absent.
416    pub fn create(
417        width: usize,
418        height: usize,
419        depth: usize,
420        format: ArrayFormat,
421        num_channels: u32,
422        flags: Array3DFlags,
423    ) -> CudaRtResult<Self> {
424        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
425        let create_fn = api.cu_array3d_create_v2.ok_or(CudaRtError::NotSupported)?;
426        let desc = CUDA_ARRAY3D_DESCRIPTOR {
427            width,
428            height,
429            depth,
430            format: format.as_cu_format(),
431            num_channels,
432            flags: flags.0,
433        };
434        let mut handle = CUarray::default();
435        // SAFETY: FFI.
436        let rc = unsafe { create_fn(&raw mut handle, &desc) };
437        if rc != 0 {
438            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::MemoryAllocation));
439        }
440        Ok(Self {
441            handle,
442            width,
443            height,
444            depth,
445            format,
446            num_channels,
447            flags,
448        })
449    }
450
451    /// Returns the raw [`CUarray`] handle.
452    #[must_use]
453    pub fn raw(&self) -> CUarray {
454        self.handle
455    }
456
457    /// Width of the array in elements.
458    #[must_use]
459    pub const fn width(&self) -> usize {
460        self.width
461    }
462    /// Height of the array in elements.
463    #[must_use]
464    pub const fn height(&self) -> usize {
465        self.height
466    }
467    /// Depth of the array (or layer count for layered arrays).
468    #[must_use]
469    pub const fn depth(&self) -> usize {
470        self.depth
471    }
472    /// Element format.
473    #[must_use]
474    pub const fn format(&self) -> ArrayFormat {
475        self.format
476    }
477    /// Number of channels.
478    #[must_use]
479    pub const fn num_channels(&self) -> u32 {
480        self.num_channels
481    }
482    /// Creation flags.
483    #[must_use]
484    pub const fn flags(&self) -> Array3DFlags {
485        self.flags
486    }
487}
488
489impl Drop for CudaArray3D {
490    fn drop(&mut self) {
491        if let Ok(api) = try_driver() {
492            if let Some(f) = api.cu_array_destroy {
493                // SAFETY: handle was created by cuArray3DCreate_v2 and not yet freed.
494                unsafe { f(self.handle) };
495            }
496        }
497    }
498}
499
500// ─── ResourceDesc ─────────────────────────────────────────────────────────────
501
502/// High-level resource description for texture and surface objects.
503///
504/// Converted to [`CUDA_RESOURCE_DESC`] when creating a [`CudaTextureObject`]
505/// or [`CudaSurfaceObject`].
506#[derive(Clone, Copy)]
507pub enum ResourceDesc {
508    /// A CUDA array resource (most common for textures and surfaces).
509    Array {
510        /// Raw array handle.
511        handle: CUarray,
512    },
513    /// A mipmapped CUDA array resource.
514    MipmappedArray {
515        /// Raw mipmapped array handle.
516        handle: CUmipmappedArray,
517    },
518    /// Linear device-memory resource (no filtering beyond point).
519    Linear {
520        /// Device pointer to the linear region.
521        dev_ptr: DevicePtr,
522        /// Channel element format.
523        format: ArrayFormat,
524        /// Number of channels.
525        num_channels: u32,
526        /// Total size in bytes.
527        size_in_bytes: usize,
528    },
529    /// Pitched 2-D device-memory resource.
530    Pitch2d {
531        /// Device pointer to the first row.
532        dev_ptr: DevicePtr,
533        /// Channel element format.
534        format: ArrayFormat,
535        /// Number of channels.
536        num_channels: u32,
537        /// Width of the region in elements.
538        width_in_elements: usize,
539        /// Height of the region in elements.
540        height: usize,
541        /// Row pitch in bytes.
542        pitch_in_bytes: usize,
543    },
544}
545
546impl ResourceDesc {
547    /// Convert to the raw [`CUDA_RESOURCE_DESC`] expected by the driver.
548    #[must_use]
549    pub fn as_raw(&self) -> CUDA_RESOURCE_DESC {
550        match *self {
551            Self::Array { handle } => CUDA_RESOURCE_DESC {
552                res_type: CUresourcetype::Array,
553                res: CudaResourceDescRes {
554                    array: CudaResourceDescArray { h_array: handle },
555                },
556                flags: 0,
557            },
558            Self::MipmappedArray { handle } => CUDA_RESOURCE_DESC {
559                res_type: CUresourcetype::MipmappedArray,
560                res: CudaResourceDescRes {
561                    mipmap: CudaResourceDescMipmap {
562                        h_mipmapped_array: handle,
563                    },
564                },
565                flags: 0,
566            },
567            Self::Linear {
568                dev_ptr,
569                format,
570                num_channels,
571                size_in_bytes,
572            } => CUDA_RESOURCE_DESC {
573                res_type: CUresourcetype::Linear,
574                res: CudaResourceDescRes {
575                    linear: CudaResourceDescLinear {
576                        dev_ptr: dev_ptr.0,
577                        format: format.as_cu_format(),
578                        num_channels,
579                        size_in_bytes,
580                    },
581                },
582                flags: 0,
583            },
584            Self::Pitch2d {
585                dev_ptr,
586                format,
587                num_channels,
588                width_in_elements,
589                height,
590                pitch_in_bytes,
591            } => CUDA_RESOURCE_DESC {
592                res_type: CUresourcetype::Pitch2d,
593                res: CudaResourceDescRes {
594                    pitch2d: CudaResourceDescPitch2d {
595                        dev_ptr: dev_ptr.0,
596                        format: format.as_cu_format(),
597                        num_channels,
598                        width_in_elements,
599                        height,
600                        pitch_in_bytes,
601                    },
602                },
603                flags: 0,
604            },
605        }
606    }
607}
608
609// ─── TextureDesc ──────────────────────────────────────────────────────────────
610
611/// Ergonomic texture-object sampling configuration.
612///
613/// Converted to [`CUDA_TEXTURE_DESC`] when creating a [`CudaTextureObject`].
614#[derive(Clone, Copy)]
615pub struct TextureDesc {
616    /// Address mode for the U (X) dimension.
617    pub address_u: AddressMode,
618    /// Address mode for the V (Y) dimension.
619    pub address_v: AddressMode,
620    /// Address mode for the W (Z) dimension.
621    pub address_w: AddressMode,
622    /// Sampling filter mode.
623    pub filter_mode: FilterMode,
624    /// When `true`, texture coordinates are in the normalized range [0, 1).
625    pub normalized_coords: bool,
626    /// When `true`, texture reads return raw integers rather than normalized floats.
627    pub read_as_integer: bool,
628    /// When `true`, hardware applies sRGB gamma decoding on read.
629    pub srgb: bool,
630    /// Maximum anisotropy ratio (1–16; 1 disables anisotropy).
631    pub max_anisotropy: u32,
632    /// Mipmap filter mode.
633    pub mipmap_filter: FilterMode,
634    /// Mipmap LOD bias.
635    pub mipmap_bias: f32,
636    /// Minimum mipmap LOD clamp.
637    pub min_lod: f32,
638    /// Maximum mipmap LOD clamp.
639    pub max_lod: f32,
640    /// Border color (RGBA).
641    pub border_color: [f32; 4],
642}
643
644impl TextureDesc {
645    /// Construct a sensible default texture descriptor:
646    ///
647    /// - Clamp address mode on all axes
648    /// - Nearest-neighbor filtering (no mipmap)
649    /// - Normalized coordinates
650    /// - No anisotropy
651    #[must_use]
652    pub const fn default_2d() -> Self {
653        Self {
654            address_u: AddressMode::Clamp,
655            address_v: AddressMode::Clamp,
656            address_w: AddressMode::Clamp,
657            filter_mode: FilterMode::Point,
658            normalized_coords: true,
659            read_as_integer: false,
660            srgb: false,
661            max_anisotropy: 1,
662            mipmap_filter: FilterMode::Point,
663            mipmap_bias: 0.0,
664            min_lod: 0.0,
665            max_lod: 0.0,
666            border_color: [0.0; 4],
667        }
668    }
669
670    /// Convert to the raw [`CUDA_TEXTURE_DESC`] expected by the driver.
671    #[must_use]
672    pub fn as_raw(&self) -> CUDA_TEXTURE_DESC {
673        let mut flags: u32 = 0;
674        if self.normalized_coords {
675            flags |= CU_TRSF_NORMALIZED_COORDINATES;
676        }
677        if self.read_as_integer {
678            flags |= CU_TRSF_READ_AS_INTEGER;
679        }
680        if self.srgb {
681            flags |= CU_TRSF_SRGB;
682        }
683        CUDA_TEXTURE_DESC {
684            address_mode: [
685                self.address_u.as_cu(),
686                self.address_v.as_cu(),
687                self.address_w.as_cu(),
688            ],
689            filter_mode: self.filter_mode.as_cu(),
690            flags,
691            max_anisotropy: self.max_anisotropy,
692            mipmap_filter_mode: self.mipmap_filter.as_cu(),
693            mipmap_level_bias: self.mipmap_bias,
694            min_mipmap_level_clamp: self.min_lod,
695            max_mipmap_level_clamp: self.max_lod,
696            border_color: self.border_color,
697            reserved: [0i32; 12],
698        }
699    }
700}
701
702// ─── ResourceViewDesc ─────────────────────────────────────────────────────────
703
704/// Optional resource-view descriptor for texture objects.
705///
706/// Allows re-interpretation of the array format, or restriction to a sub-range
707/// of mipmap levels and array layers.
708#[derive(Clone, Copy)]
709pub struct ResourceViewDesc {
710    /// Reinterpretation format (use `None` for the array's native format).
711    pub format: CUresourceViewFormat,
712    /// View width in elements.
713    pub width: usize,
714    /// View height in elements.
715    pub height: usize,
716    /// View depth in elements.
717    pub depth: usize,
718    /// First mipmap level in the view.
719    pub first_mip_level: u32,
720    /// Last mipmap level in the view.
721    pub last_mip_level: u32,
722    /// First array layer.
723    pub first_layer: u32,
724    /// Last array layer.
725    pub last_layer: u32,
726}
727
728impl ResourceViewDesc {
729    /// Convert to the raw [`CUDA_RESOURCE_VIEW_DESC`].
730    #[must_use]
731    pub fn as_raw(&self) -> CUDA_RESOURCE_VIEW_DESC {
732        CUDA_RESOURCE_VIEW_DESC {
733            format: self.format,
734            width: self.width,
735            height: self.height,
736            depth: self.depth,
737            first_mipmap_level: self.first_mip_level,
738            last_mipmap_level: self.last_mip_level,
739            first_layer: self.first_layer,
740            last_layer: self.last_layer,
741            reserved: [0u32; 16],
742        }
743    }
744}
745
746// ─── CudaTextureObject ────────────────────────────────────────────────────────
747
748/// RAII wrapper for a CUDA bindless texture object.
749///
750/// Created by [`CudaTextureObject::create`]; automatically destroyed on drop.
751/// Mirrors `cudaCreateTextureObject` / `cudaDestroyTextureObject`.
752pub struct CudaTextureObject {
753    handle: CUtexObject,
754}
755
756impl CudaTextureObject {
757    /// Create a texture object from a resource and texture descriptor.
758    ///
759    /// `view_desc` is optional — pass `None` to use the resource's native
760    /// format and full extent.
761    ///
762    /// # Errors
763    ///
764    /// Returns [`CudaRtError::NotSupported`] if `cuTexObjectCreate` is absent,
765    /// or propagates the driver error code.
766    pub fn create(
767        resource: &ResourceDesc,
768        texture: &TextureDesc,
769        view: Option<&ResourceViewDesc>,
770    ) -> CudaRtResult<Self> {
771        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
772        let create_fn = api.cu_tex_object_create.ok_or(CudaRtError::NotSupported)?;
773
774        let raw_res = resource.as_raw();
775        let raw_tex = texture.as_raw();
776        let (raw_view_ptr, _raw_view_storage);
777        if let Some(v) = view {
778            _raw_view_storage = v.as_raw();
779            raw_view_ptr = &_raw_view_storage as *const CUDA_RESOURCE_VIEW_DESC;
780        } else {
781            _raw_view_storage = unsafe { std::mem::zeroed() };
782            raw_view_ptr = std::ptr::null();
783        }
784
785        let mut handle = CUtexObject::default();
786        // SAFETY: All descriptor pointers are valid stack-allocated structs.
787        let rc = unsafe { create_fn(&raw mut handle, &raw_res, &raw_tex, raw_view_ptr) };
788        if rc != 0 {
789            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidValue));
790        }
791        Ok(Self { handle })
792    }
793
794    /// Returns the raw [`CUtexObject`] handle.
795    #[must_use]
796    pub fn raw(&self) -> CUtexObject {
797        self.handle
798    }
799}
800
801impl Drop for CudaTextureObject {
802    fn drop(&mut self) {
803        if let Ok(api) = try_driver() {
804            if let Some(f) = api.cu_tex_object_destroy {
805                // SAFETY: handle was created by cuTexObjectCreate and not yet freed.
806                unsafe { f(self.handle) };
807            }
808        }
809    }
810}
811
812// ─── CudaSurfaceObject ────────────────────────────────────────────────────────
813
814/// RAII wrapper for a CUDA bindless surface object.
815///
816/// Created by [`CudaSurfaceObject::create`]; automatically destroyed on drop.
817/// Mirrors `cudaCreateSurfaceObject` / `cudaDestroySurfaceObject`.
818///
819/// The resource must be a CUDA array allocated with the
820/// [`Array3DFlags::SURFACE_LDST`] flag (or equivalent).
821pub struct CudaSurfaceObject {
822    handle: CUsurfObject,
823}
824
825impl CudaSurfaceObject {
826    /// Create a surface object from a resource descriptor.
827    ///
828    /// The resource type must be `Array` — surfaces cannot be backed by linear
829    /// or pitched memory.
830    ///
831    /// # Errors
832    ///
833    /// Returns [`CudaRtError::NotSupported`] if `cuSurfObjectCreate` is absent,
834    /// or propagates the driver error code.
835    pub fn create(resource: &ResourceDesc) -> CudaRtResult<Self> {
836        let api = try_driver().map_err(|_| CudaRtError::DriverNotAvailable)?;
837        let create_fn = api.cu_surf_object_create.ok_or(CudaRtError::NotSupported)?;
838        let raw_res = resource.as_raw();
839        let mut handle = CUsurfObject::default();
840        // SAFETY: raw_res is a valid stack-allocated CUDA_RESOURCE_DESC.
841        let rc = unsafe { create_fn(&raw mut handle, &raw_res) };
842        if rc != 0 {
843            return Err(CudaRtError::from_code(rc).unwrap_or(CudaRtError::InvalidValue));
844        }
845        Ok(Self { handle })
846    }
847
848    /// Returns the raw [`CUsurfObject`] handle.
849    #[must_use]
850    pub fn raw(&self) -> CUsurfObject {
851        self.handle
852    }
853}
854
855impl Drop for CudaSurfaceObject {
856    fn drop(&mut self) {
857        if let Ok(api) = try_driver() {
858            if let Some(f) = api.cu_surf_object_destroy {
859                // SAFETY: handle was created by cuSurfObjectCreate and not yet freed.
860                unsafe { f(self.handle) };
861            }
862        }
863    }
864}
865
866// ─── Tests ───────────────────────────────────────────────────────────────────
867
868#[cfg(test)]
869mod tests {
870    use super::*;
871
872    #[test]
873    fn array_format_byte_widths() {
874        assert_eq!(ArrayFormat::UnsignedInt8.bytes_per_channel(), 1);
875        assert_eq!(ArrayFormat::UnsignedInt16.bytes_per_channel(), 2);
876        assert_eq!(ArrayFormat::Half.bytes_per_channel(), 2);
877        assert_eq!(ArrayFormat::Float.bytes_per_channel(), 4);
878        assert_eq!(ArrayFormat::SignedInt32.bytes_per_channel(), 4);
879    }
880
881    #[test]
882    fn array_format_cu_round_trip() {
883        let fmt = ArrayFormat::Float;
884        assert!(matches!(fmt.as_cu_format(), CUarray_format::Float));
885        let fmt_int = ArrayFormat::SignedInt8;
886        assert!(matches!(fmt_int.as_cu_format(), CUarray_format::SignedInt8));
887    }
888
889    #[test]
890    fn texture_desc_default_flags() {
891        let desc = TextureDesc::default_2d();
892        let raw = desc.as_raw();
893        // Normalized coordinates flag must be set.
894        assert!(raw.flags & CU_TRSF_NORMALIZED_COORDINATES != 0);
895        // No read-as-integer by default.
896        assert!(raw.flags & CU_TRSF_READ_AS_INTEGER == 0);
897        assert!(raw.flags & CU_TRSF_SRGB == 0);
898        // Point filtering by default.
899        assert!(matches!(raw.filter_mode, CUfilter_mode::Point));
900        // All address modes must be Clamp.
901        assert!(matches!(raw.address_mode[0], CUaddress_mode::Clamp));
902        assert!(matches!(raw.address_mode[1], CUaddress_mode::Clamp));
903        assert!(matches!(raw.address_mode[2], CUaddress_mode::Clamp));
904    }
905
906    #[test]
907    fn resource_desc_array_round_trip() {
908        let handle = CUarray::default();
909        let rd = ResourceDesc::Array { handle };
910        let raw = rd.as_raw();
911        assert!(matches!(raw.res_type, CUresourcetype::Array));
912        // SAFETY: we set the array variant, so reading it is valid.
913        let arr = unsafe { raw.res.array };
914        assert!(arr.h_array.is_null()); // default handle is null
915    }
916
917    #[test]
918    fn resource_desc_linear_round_trip() {
919        let rd = ResourceDesc::Linear {
920            dev_ptr: DevicePtr(0x1000),
921            format: ArrayFormat::Float,
922            num_channels: 4,
923            size_in_bytes: 1024,
924        };
925        let raw = rd.as_raw();
926        assert!(matches!(raw.res_type, CUresourcetype::Linear));
927        // SAFETY: we set the linear variant.
928        let lin = unsafe { raw.res.linear };
929        assert_eq!(lin.dev_ptr, 0x1000);
930        assert_eq!(lin.num_channels, 4);
931        assert_eq!(lin.size_in_bytes, 1024);
932        assert!(matches!(lin.format, CUarray_format::Float));
933    }
934
935    #[test]
936    fn cuda_array_create_no_gpu() {
937        // Driver absent → DriverNotAvailable/NotSupported/NoGpu.
938        // Driver present but no active context → DeviceUninitialized.
939        // Driver present with context → Ok or InvalidDevice.
940        match CudaArray::create_2d(64, 64, ArrayFormat::Float, 4) {
941            Ok(_) => { /* GPU present with active context — creation succeeded */ }
942            Err(CudaRtError::DriverNotAvailable)
943            | Err(CudaRtError::NotSupported)
944            | Err(CudaRtError::NoGpu)
945            | Err(CudaRtError::InitializationError)
946            | Err(CudaRtError::InvalidDevice)
947            | Err(CudaRtError::DeviceUninitialized) => { /* expected */ }
948            Err(e) => panic!("unexpected error: {e}"),
949        }
950    }
951
952    #[test]
953    fn cuda_texture_object_create_no_gpu() {
954        // Uses a null/default array handle — valid errors include driver-absent
955        // variants and, when a driver is present, invalid-handle variants.
956        let handle = CUarray::default();
957        let res = ResourceDesc::Array { handle };
958        let tex = TextureDesc::default_2d();
959        match CudaTextureObject::create(&res, &tex, None) {
960            Ok(_) => {}
961            Err(CudaRtError::DriverNotAvailable)
962            | Err(CudaRtError::NotSupported)
963            | Err(CudaRtError::NoGpu)
964            | Err(CudaRtError::InitializationError)
965            | Err(CudaRtError::InvalidDevice)
966            | Err(CudaRtError::InvalidValue)
967            | Err(CudaRtError::DeviceUninitialized) => {}
968            Err(e) => panic!("unexpected error: {e}"),
969        }
970    }
971
972    #[test]
973    fn cuda_surface_object_create_no_gpu() {
974        // Uses a null/default array handle — valid errors include driver-absent
975        // variants and, when a driver is present, invalid-handle variants.
976        let handle = CUarray::default();
977        let res = ResourceDesc::Array { handle };
978        match CudaSurfaceObject::create(&res) {
979            Ok(_) => {}
980            Err(CudaRtError::DriverNotAvailable)
981            | Err(CudaRtError::NotSupported)
982            | Err(CudaRtError::NoGpu)
983            | Err(CudaRtError::InitializationError)
984            | Err(CudaRtError::InvalidDevice)
985            | Err(CudaRtError::InvalidValue)
986            | Err(CudaRtError::DeviceUninitialized) => {}
987            Err(e) => panic!("unexpected error: {e}"),
988        }
989    }
990
991    #[test]
992    fn array_3d_flags_combine() {
993        let flags = Array3DFlags::LAYERED.or(Array3DFlags::SURFACE_LDST);
994        assert_eq!(flags.0, CUDA_ARRAY3D_LAYERED | CUDA_ARRAY3D_SURFACE_LDST);
995    }
996
997    #[test]
998    fn address_mode_variants_compile() {
999        let _ = AddressMode::Wrap.as_cu();
1000        let _ = AddressMode::Clamp.as_cu();
1001        let _ = AddressMode::Mirror.as_cu();
1002        let _ = AddressMode::Border.as_cu();
1003    }
1004}