cust/
texture.rs

1use crate::error::CudaResult;
2use crate::error::ToResult;
3use crate::memory::array::ArrayDescriptor;
4use crate::memory::array::ArrayFormat;
5use crate::memory::array::ArrayObject;
6use crate::sys::cuTexObjectCreate;
7use crate::sys::cuTexObjectGetResourceDesc;
8use crate::sys::{
9    self as cuda, cuTexObjectDestroy, CUDA_RESOURCE_DESC_st__bindgen_ty_1,
10    CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1, CUresourcetype, CUtexObject,
11    CUDA_RESOURCE_DESC, CUDA_RESOURCE_VIEW_DESC, CUDA_TEXTURE_DESC,
12};
13use std::mem::transmute;
14use std::mem::ManuallyDrop;
15use std::mem::MaybeUninit;
16use std::os::raw::c_ulonglong;
17use std::os::raw::{c_float, c_uint};
18use std::ptr;
19
20/// How a texture should behave if it's adressed with out of bounds indices.
21#[repr(u32)]
22#[derive(Clone, Copy, Debug, PartialEq, Eq)]
23pub enum TextureAdressingMode {
24    /// Wraps around for adresses that are out of bounds.
25    Wrap = 0,
26    /// Clamps to the edges of the texture for adresses that are out of bounds.
27    Clamp = 1,
28    /// Mirrors the texture for adresses that are out of bounds.
29    Mirror = 2,
30    /// Uses the border color for adresses that are out of bounds.
31    Border = 3,
32}
33
34/// The filtering mode to be used when fetching from the texture.
35#[repr(u32)]
36#[derive(Clone, Copy, Debug, PartialEq, Eq)]
37pub enum TextureFilterMode {
38    Point = 0,
39    Linear = 1,
40}
41
42bitflags::bitflags! {
43    /// Flags which modify the behavior of CUDA texture creation.
44    #[derive(Default)]
45    pub struct TextureDescriptorFlags: c_uint {
46        /// Suppresses the default behavior of having the texture promote data to floating point data in the range
47        /// of [0, 1]. This flag does nothing if the texture is a texture of `u32`s.
48        const READ_AS_INTEGER = cuda::CU_TRSF_READ_AS_INTEGER;
49        /// Suppresses the default behavior of having the texture coordinates range from [0, Dim], where Dim is the
50        /// width or height of the CUDA array. Instead, the texture coordinates [0, 1] reference the entire array.
51        /// This flag must be set if a mipmapped array is being used.
52        const NORMALIZED_COORDINATES = cuda::CU_TRSF_NORMALIZED_COORDINATES;
53        /// Disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering performance
54        /// by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results.
55        const DISABLE_TRILINEAR_OPTIMIZATION = 0x20; // cuda-sys doesnt have this for some reason?
56    }
57}
58
59#[repr(C)]
60#[derive(Debug, Clone, Copy)]
61pub struct TextureDescriptor {
62    /// The adressing mode for each dimension of the texture data.
63    pub adress_modes: [TextureAdressingMode; 3],
64    /// The filtering mode to be used when fetching from the texture.
65    pub filter_mode: TextureFilterMode,
66    /// Any flags to modify the texture creation.
67    pub flags: TextureDescriptorFlags,
68    /// The maximum anisotropy ratio for anisotropic filtering. This will be clamped to `[1.0, 16.0]`.
69    pub max_anisotropy: c_uint,
70    /// The filter mode used when the calculated mipmap level lies between two defined mipmap levels.
71    pub mipmap_filter_mode: TextureFilterMode,
72    /// The offset to be applied to the calculated mipmap level.
73    pub mipmap_level_bias: c_float,
74    /// The lower end of the mipmap level range to clamp access to.
75    pub min_mipmap_level_clamp: c_float,
76    /// The upper end of the mipmap level range to clamp access to.
77    pub max_mipmap_level_clamp: c_float,
78    /// The border color of the texture.
79    pub border_color: [c_float; 4],
80}
81
82impl Default for TextureDescriptor {
83    fn default() -> Self {
84        Self {
85            adress_modes: [TextureAdressingMode::Clamp; 3],
86            filter_mode: TextureFilterMode::Point,
87            flags: TextureDescriptorFlags::empty(),
88            max_anisotropy: 1,
89            mipmap_filter_mode: TextureFilterMode::Point,
90            mipmap_level_bias: 0.0,
91            min_mipmap_level_clamp: 0.0,
92            max_mipmap_level_clamp: 0.0,
93            border_color: [0.0, 0.0, 0.0, 1.0],
94        }
95    }
96}
97
98impl TextureDescriptor {
99    pub fn to_raw(self) -> CUDA_TEXTURE_DESC {
100        let TextureDescriptor {
101            adress_modes,
102            filter_mode,
103            flags,
104            max_anisotropy,
105            mipmap_filter_mode,
106            mipmap_level_bias,
107            min_mipmap_level_clamp,
108            max_mipmap_level_clamp,
109            border_color,
110        } = self;
111        CUDA_TEXTURE_DESC {
112            addressMode: unsafe { transmute(adress_modes) },
113            filterMode: unsafe { transmute(filter_mode) },
114            flags: flags.bits(),
115            maxAnisotropy: max_anisotropy,
116            mipmapFilterMode: unsafe { transmute(mipmap_filter_mode) },
117            mipmapLevelBias: mipmap_level_bias,
118            minMipmapLevelClamp: min_mipmap_level_clamp,
119            maxMipmapLevelClamp: max_mipmap_level_clamp,
120            borderColor: border_color,
121            reserved: [0; 12],
122        }
123    }
124}
125
126/// Specifies how the data in the CUDA array/mipmapped array should be interpreted for the texture. This could incur a change in the
127/// size of the texture data.
128///
129/// If the format is a block compressed format, then the underlying array must have a base of format [`ArrayFormat::U32`] with 2 or 4 channels depending
130/// on the compressed format. ex. BC1 and BC4 require the CUDA array to have a format of [`ArrayFormat::U32`] with 2 channels. The other BC formats require
131/// the resource to have the same format but with 4 channels.
132#[repr(u32)]
133#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
134pub enum ResourceViewFormat {
135    /// No resource view format (use underlying resource format)
136    None = 0,
137    /// 1 channel unsigned 8-bit integers
138    U8x1 = 1,
139    /// 2 channel unsigned 8-bit integers
140    U8x2 = 2,
141    /// 4 channel unsigned 8-bit integers
142    U8x4 = 3,
143    /// 1 channel signed 8-bit integers
144    I8x1 = 4,
145    /// 2 channel signed 8-bit integers
146    I8x2 = 5,
147    /// 4 channel signed 8-bit integers
148    I8x4 = 6,
149    /// 1 channel unsigned 16-bit integers
150    U16x1 = 7,
151    /// 2 channel unsigned 16-bit integers
152    U16x2 = 8,
153    /// 4 channel unsigned 16-bit integers
154    U16x4 = 9,
155    /// 1 channel signed 16-bit integers
156    I16x1 = 10,
157    /// 2 channel signed 16-bit integers
158    I16x2 = 11,
159    /// 4 channel signed 16-bit integers
160    I16x4 = 12,
161    /// 1 channel unsigned 32-bit integers
162    U32x1 = 13,
163    /// 2 channel unsigned 32-bit integers
164    U32x2 = 14,
165    /// 4 channel unsigned 32-bit integers
166    U32x4 = 15,
167    /// 1 channel signed 32-bit integers
168    I32x1 = 16,
169    /// 2 channel signed 32-bit integers
170    I32x2 = 17,
171    /// 4 channel signed 32-bit integers
172    I32x4 = 18,
173    /// 1 channel 16-bit floating point
174    F16x1 = 19,
175    /// 2 channel 16-bit floating point
176    F16x2 = 20,
177    /// 4 channel 16-bit floating point
178    F16x4 = 21,
179    /// 1 channel 32-bit floating point
180    F32x1 = 22,
181    /// 2 channel 32-bit floating point
182    F32x2 = 23,
183    /// 4 channel 32-bit floating point
184    F32x4 = 24,
185    /// Block compressed 1
186    BC1 = 25,
187    /// Block compressed 2
188    BC2 = 26,
189    /// Block compressed 3
190    BC3 = 27,
191    /// Block compressed 4 unsigned
192    BC4U = 28,
193    /// Block compressed 4 signed
194    BC4S = 29,
195    /// Block compressed 5 unsigned
196    BC5U = 30,
197    /// Block compressed 5 signed
198    BC5S = 31,
199    /// Block compressed 6 unsigned half-float
200    BC6HU = 32,
201    /// Block compressed 6 signed half-float
202    BC6HS = 33,
203    /// Block compressed 7
204    BC7 = 34,
205}
206
207impl ResourceViewFormat {
208    pub fn from_array_format(format: ArrayFormat, num_channels: c_uint) -> Self {
209        // i spent more time on this macro than it would have taken me to just write the matches out
210        // but thats kind of the essence of automation
211        macro_rules! format_impl {
212            ($num_channels:ident, $original:ident, $($res:ident),*) => {{
213                if format == ArrayFormat::$original {
214                    let res = [$(ResourceViewFormat::$res),*];
215                    return match $num_channels {
216                        1 => res[0],
217                        2 => res[1],
218                        4 => res[2],
219                        _ => unreachable!("num_channels must be 1, 2, or 4")
220                    };
221                }
222            }}
223        }
224
225        format_impl!(num_channels, U8, U8x1, U8x2, U8x4);
226        format_impl!(num_channels, U16, U16x1, U16x2, U16x4);
227        format_impl!(num_channels, U32, U32x1, U32x2, U32x4);
228        format_impl!(num_channels, I8, I8x1, I8x2, I8x4);
229        format_impl!(num_channels, I16, I16x1, I16x2, I16x4);
230        format_impl!(num_channels, I32, I32x1, I32x2, I32x4);
231        format_impl!(num_channels, F32, F32x1, F32x2, F32x4);
232        assert_ne!(
233            format,
234            ArrayFormat::F64,
235            "CUDA Does not have 64 bit float textures, you can instead use int textures with 2 channels then cast the ints to a double in the kernel"
236        );
237        unreachable!()
238    }
239}
240
241#[repr(C)]
242#[derive(Debug, Clone, Copy)]
243pub struct ResourceViewDescriptor {
244    /// The format of the resource view.
245    pub format: ResourceViewFormat,
246    /// The new width of the texture data. If this is a compressed format this must be 4x the original width.
247    /// Otherwise, it must be equal to that of the original resource.
248    pub width: usize,
249    /// The new height of the texture data. If this is a compressed format this must be 4x the original height.
250    /// Otherwise, it must be equal to that of the original resource.
251    pub height: usize,
252    /// The new depth of the texture data. If this is a compressed format this must be 4x the original depth.
253    /// Otherwise, it must be equal to that of the original resource.
254    pub depth: usize,
255    /// The most detailed mipmap level. This will be the new level zero. For non-mipmapped resources this must be `0`.
256    /// This value will be relative to [`TextureDescriptor::min_mipmap_level_clamp`] and [`TextureDescriptor::max_mipmap_level_clamp`]. Ex.
257    /// if the first mipmap level is `2` and the min level clamp is `1.2`, then the actual min mipmap level clamp will be `3.2`.
258    pub first_mipmap_level: c_uint,
259    /// The least detailed mipmap level. This must be `0` for non-mipmapped resources.
260    pub last_mipmap_level: c_uint,
261    /// The first layer index for layered textures. This must be `0` for non-layered resources.
262    pub first_layer: c_uint,
263    /// The last layer index for layered textures. This must be `0` for non-layered resources.
264    pub last_layer: c_uint,
265}
266
267impl ResourceViewDescriptor {
268    pub fn from_array_desc(desc: &ArrayDescriptor) -> Self {
269        Self {
270            format: ResourceViewFormat::from_array_format(desc.format(), desc.num_channels()),
271            width: desc.width(),
272            height: desc.height(),
273            depth: desc.depth(),
274            first_mipmap_level: 0,
275            last_mipmap_level: 0,
276            first_layer: 0,
277            last_layer: 0,
278        }
279    }
280
281    pub fn to_raw(self) -> CUDA_RESOURCE_VIEW_DESC {
282        let ResourceViewDescriptor {
283            format,
284            width,
285            height,
286            depth,
287            first_mipmap_level,
288            last_mipmap_level,
289            first_layer,
290            last_layer,
291        } = self;
292
293        CUDA_RESOURCE_VIEW_DESC {
294            format: unsafe { transmute(format) },
295            width,
296            height,
297            depth,
298            firstMipmapLevel: first_mipmap_level,
299            lastMipmapLevel: last_mipmap_level,
300            firstLayer: first_layer,
301            lastLayer: last_layer,
302            reserved: [0; 16],
303        }
304    }
305}
306
307bitflags::bitflags! {
308    /// Flags for a resource descriptor. Currently empty.
309    #[derive(Default)]
310    pub struct ResourceDescriptorFlags: c_uint {
311        #[doc(hidden)]
312        const _ZERO = 0;
313    }
314}
315
316#[non_exhaustive]
317#[derive(Debug)]
318pub enum ResourceType {
319    Array { array: ArrayObject },
320    // TODO: validate the soundness of linear and pitch2, they require some pointer to memory, but
321    // it might be possible to cause unsoundness by allocating some type then allocating a texture, and reading back
322    // the texture to host memory. Causing GPU UB is probably fine, but using that to cause host UB is not acceptable.
323
324    // Linear {
325    //     format: ArrayFormat,
326    //     num_channels: u32,
327    //     size: usize,
328    // },
329    // Pitch2d {
330    //     format: ArrayFormat,
331    //     num_channels: u32,
332    //     width: usize,
333    //     height: usize,
334    //     pitch_in_bytes: usize,
335    // },
336}
337
338#[derive(Debug)]
339pub struct ResourceDescriptor {
340    pub flags: ResourceDescriptorFlags,
341    pub ty: ResourceType,
342}
343
344impl ResourceDescriptor {
345    pub fn into_raw(self) -> CUDA_RESOURCE_DESC {
346        let ty = match self.ty {
347            ResourceType::Array { .. } => CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
348            // ResourceType::Linear { .. } => CUresourcetype::CU_RESOURCE_TYPE_LINEAR,
349            // ResourceType::Pitch2d { .. } => CUresourcetype::CU_RESOURCE_TYPE_PITCH2D,
350        };
351
352        // we can't just use `array.handle`, this will cause the array object to call `Drop` and destroy the
353        // array prematurely, which will yield a status access violation when we try to create the texture object
354        // so we need to essentially leak the array into just a handle.
355        let res = match self.ty {
356            ResourceType::Array { array } => CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
357                array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
358                    hArray: array.into_raw(),
359                },
360            },
361            // ResourceType::Linear { format, num_channels, size }
362        };
363
364        CUDA_RESOURCE_DESC {
365            resType: ty,
366            flags: self.flags.bits(),
367            res,
368        }
369    }
370
371    // TODO: evaluate if its possible to cause UB by making a raw descriptor with an invalid array handle.
372    pub(crate) fn from_raw(raw: CUDA_RESOURCE_DESC) -> Self {
373        match raw.resType {
374            cuda::CUresourcetype_enum::CU_RESOURCE_TYPE_ARRAY => Self {
375                flags: ResourceDescriptorFlags::from_bits(raw.flags)
376                    .expect("invalid resource descriptor flags"),
377                ty: ResourceType::Array {
378                    array: ArrayObject {
379                        handle: unsafe { raw.res.array.hArray },
380                    },
381                },
382            },
383            _ => panic!("Unsupported resource descriptor"),
384        }
385    }
386}
387
388#[derive(Debug)]
389pub struct Texture {
390    // needed to tell the destructor if it should drop the array if we havent
391    // used into_array. TODO: figure out a good way to deal with array ownership issues.
392    _destroy_array_on_destruct: bool,
393    handle: CUtexObject,
394}
395
396impl Drop for Texture {
397    fn drop(&mut self) {
398        unsafe {
399            // drop the descriptor, which causes the array inside it to be dropped too
400            if false {
401                let res = self.resource_desc();
402                if let Ok(res) = res {
403                    let _ = ManuallyDrop::into_inner(res);
404                }
405            }
406
407            cuTexObjectDestroy(self.handle);
408        }
409    }
410}
411
412pub type TextureHandle = c_ulonglong;
413
414impl Texture {
415    /// The opaque handle to this texture on the gpu. This is used for passing to a kernel.
416    pub fn handle(&self) -> TextureHandle {
417        self.handle
418    }
419
420    pub fn new(
421        resource_desc: ResourceDescriptor,
422        texture_desc: TextureDescriptor,
423        resource_view_desc: Option<ResourceViewDescriptor>,
424    ) -> CudaResult<Self> {
425        let handle = unsafe {
426            let mut uninit = MaybeUninit::<CUtexObject>::uninit();
427            let resource_view_desc =
428                if let Some(x) = resource_view_desc.map(|x| Box::new(x.to_raw())) {
429                    Box::into_raw(x)
430                } else {
431                    ptr::null_mut()
432                };
433
434            let resource_desc = &resource_desc.into_raw();
435            let texture_desc = &texture_desc.to_raw();
436
437            cuTexObjectCreate(
438                uninit.as_mut_ptr(),
439                resource_desc as *const _,
440                texture_desc as *const _,
441                resource_view_desc as *const _,
442            )
443            .to_result()?;
444            if !resource_view_desc.is_null() {
445                let _ = Box::from_raw(resource_view_desc);
446            }
447            uninit.assume_init()
448        };
449        Ok(Self {
450            handle,
451            _destroy_array_on_destruct: true,
452        })
453    }
454
455    pub fn from_array(array: ArrayObject) -> CudaResult<Self> {
456        let resource_desc = ResourceDescriptor {
457            flags: ResourceDescriptorFlags::empty(),
458            ty: ResourceType::Array { array },
459        };
460        Self::new(resource_desc, Default::default(), None)
461    }
462
463    pub fn into_array(mut self) -> CudaResult<Option<ArrayObject>> {
464        let desc = unsafe { ManuallyDrop::take(&mut self.resource_desc()?) };
465        self._destroy_array_on_destruct = false;
466        Ok(match desc.ty {
467            ResourceType::Array { array } => Some(array),
468        })
469    }
470
471    // pub fn array(&mut self) -> CudaResult<Option<&ArrayObject>> {
472    //     let desc = self.resource_desc()?;
473    //     Ok(match desc.ty {
474    //         ResourceType::Array { array } => Some(array),
475    //     })
476    // }
477
478    // this function returns a ManuallyDrop because dropping the descriptor will cause the underlying
479    // array to be dropped, which will cause UB or undesired consequences.
480    unsafe fn resource_desc(&mut self) -> CudaResult<ManuallyDrop<ResourceDescriptor>> {
481        let raw = {
482            let mut uninit = MaybeUninit::<CUDA_RESOURCE_DESC>::uninit();
483            cuTexObjectGetResourceDesc(uninit.as_mut_ptr(), self.handle).to_result()?;
484            uninit.assume_init()
485        };
486        Ok(ManuallyDrop::new(ResourceDescriptor::from_raw(raw)))
487    }
488
489    // pub fn resource_view_desc(&self) -> CudaResult<ResourceViewDescriptor> {
490    //     let raw = unsafe {
491    //         let ptr = ptr::null_mut();
492    //         cuTexObjectGetResourceViewDesc(ptr, self.handle).to_result()?;
493    //         *ptr
494    //     };
495    //     Ok(ResourceViewDescriptor::)
496    // }
497}