1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
use crate::error::CudaResult;
use crate::error::ToResult;
use crate::memory::array::ArrayDescriptor;
use crate::memory::array::ArrayFormat;
use crate::memory::array::ArrayObject;
use crate::sys::cuTexObjectCreate;
use crate::sys::cuTexObjectGetResourceDesc;
use crate::sys::{
    self as cuda, cuTexObjectDestroy, CUDA_RESOURCE_DESC_st__bindgen_ty_1,
    CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1, CUresourcetype, CUtexObject,
    CUDA_RESOURCE_DESC, CUDA_RESOURCE_VIEW_DESC, CUDA_TEXTURE_DESC,
};
use std::mem::transmute;
use std::mem::ManuallyDrop;
use std::mem::MaybeUninit;
use std::os::raw::c_ulonglong;
use std::os::raw::{c_float, c_uint};
use std::ptr;

/// How a texture should behave if it's adressed with out of bounds indices.
#[repr(u32)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TextureAdressingMode {
    /// Wraps around for adresses that are out of bounds.
    Wrap = 0,
    /// Clamps to the edges of the texture for adresses that are out of bounds.
    Clamp = 1,
    /// Mirrors the texture for adresses that are out of bounds.
    Mirror = 2,
    /// Uses the border color for adresses that are out of bounds.
    Border = 3,
}

/// The filtering mode to be used when fetching from the texture.
#[repr(u32)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum TextureFilterMode {
    Point = 0,
    Linear = 1,
}

bitflags::bitflags! {
    /// Flags which modify the behavior of CUDA texture creation.
    #[derive(Default)]
    pub struct TextureDescriptorFlags: c_uint {
        /// Suppresses the default behavior of having the texture promote data to floating point data in the range
        /// of [0, 1]. This flag does nothing if the texture is a texture of `u32`s.
        const READ_AS_INTEGER = cuda::CU_TRSF_READ_AS_INTEGER;
        /// Suppresses the default behavior of having the texture coordinates range from [0, Dim], where Dim is the
        /// width or height of the CUDA array. Instead, the texture coordinates [0, 1] reference the entire array.
        /// This flag must be set if a mipmapped array is being used.
        const NORMALIZED_COORDINATES = cuda::CU_TRSF_NORMALIZED_COORDINATES;
        /// Disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering performance
        /// by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results.
        const DISABLE_TRILINEAR_OPTIMIZATION = 0x20; // cuda-sys doesnt have this for some reason?
    }
}

#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct TextureDescriptor {
    /// The adressing mode for each dimension of the texture data.
    pub adress_modes: [TextureAdressingMode; 3],
    /// The filtering mode to be used when fetching from the texture.
    pub filter_mode: TextureFilterMode,
    /// Any flags to modify the texture creation.
    pub flags: TextureDescriptorFlags,
    /// The maximum anisotropy ratio for anisotropic filtering. This will be clamped to `[1.0, 16.0]`.
    pub max_anisotropy: c_uint,
    /// The filter mode used when the calculated mipmap level lies between two defined mipmap levels.
    pub mipmap_filter_mode: TextureFilterMode,
    /// The offset to be applied to the calculated mipmap level.
    pub mipmap_level_bias: c_float,
    /// The lower end of the mipmap level range to clamp access to.
    pub min_mipmap_level_clamp: c_float,
    /// The upper end of the mipmap level range to clamp access to.
    pub max_mipmap_level_clamp: c_float,
    /// The border color of the texture.
    pub border_color: [c_float; 4],
}

impl Default for TextureDescriptor {
    fn default() -> Self {
        Self {
            adress_modes: [TextureAdressingMode::Clamp; 3],
            filter_mode: TextureFilterMode::Point,
            flags: TextureDescriptorFlags::empty(),
            max_anisotropy: 1,
            mipmap_filter_mode: TextureFilterMode::Point,
            mipmap_level_bias: 0.0,
            min_mipmap_level_clamp: 0.0,
            max_mipmap_level_clamp: 0.0,
            border_color: [0.0, 0.0, 0.0, 1.0],
        }
    }
}

impl TextureDescriptor {
    pub fn to_raw(self) -> CUDA_TEXTURE_DESC {
        let TextureDescriptor {
            adress_modes,
            filter_mode,
            flags,
            max_anisotropy,
            mipmap_filter_mode,
            mipmap_level_bias,
            min_mipmap_level_clamp,
            max_mipmap_level_clamp,
            border_color,
        } = self;
        CUDA_TEXTURE_DESC {
            addressMode: unsafe { transmute(adress_modes) },
            filterMode: unsafe { transmute(filter_mode) },
            flags: flags.bits(),
            maxAnisotropy: max_anisotropy,
            mipmapFilterMode: unsafe { transmute(mipmap_filter_mode) },
            mipmapLevelBias: mipmap_level_bias,
            minMipmapLevelClamp: min_mipmap_level_clamp,
            maxMipmapLevelClamp: max_mipmap_level_clamp,
            borderColor: border_color,
            reserved: [0; 12],
        }
    }
}

/// Specifies how the data in the CUDA array/mipmapped array should be interpreted for the texture. This could incur a change in the
/// size of the texture data.
///
/// If the format is a block compressed format, then the underlying array must have a base of format [`ArrayFormat::U32`] with 2 or 4 channels depending
/// on the compressed format. ex. BC1 and BC4 require the CUDA array to have a format of [`ArrayFormat::U32`] with 2 channels. The other BC formats require
/// the resource to have the same format but with 4 channels.
#[repr(u32)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum ResourceViewFormat {
    /// No resource view format (use underlying resource format)
    None = 0,
    /// 1 channel unsigned 8-bit integers
    U8x1 = 1,
    /// 2 channel unsigned 8-bit integers
    U8x2 = 2,
    /// 4 channel unsigned 8-bit integers
    U8x4 = 3,
    /// 1 channel signed 8-bit integers
    I8x1 = 4,
    /// 2 channel signed 8-bit integers
    I8x2 = 5,
    /// 4 channel signed 8-bit integers
    I8x4 = 6,
    /// 1 channel unsigned 16-bit integers
    U16x1 = 7,
    /// 2 channel unsigned 16-bit integers
    U16x2 = 8,
    /// 4 channel unsigned 16-bit integers
    U16x4 = 9,
    /// 1 channel signed 16-bit integers
    I16x1 = 10,
    /// 2 channel signed 16-bit integers
    I16x2 = 11,
    /// 4 channel signed 16-bit integers
    I16x4 = 12,
    /// 1 channel unsigned 32-bit integers
    U32x1 = 13,
    /// 2 channel unsigned 32-bit integers
    U32x2 = 14,
    /// 4 channel unsigned 32-bit integers
    U32x4 = 15,
    /// 1 channel signed 32-bit integers
    I32x1 = 16,
    /// 2 channel signed 32-bit integers
    I32x2 = 17,
    /// 4 channel signed 32-bit integers
    I32x4 = 18,
    /// 1 channel 16-bit floating point
    F16x1 = 19,
    /// 2 channel 16-bit floating point
    F16x2 = 20,
    /// 4 channel 16-bit floating point
    F16x4 = 21,
    /// 1 channel 32-bit floating point
    F32x1 = 22,
    /// 2 channel 32-bit floating point
    F32x2 = 23,
    /// 4 channel 32-bit floating point
    F32x4 = 24,
    /// Block compressed 1
    BC1 = 25,
    /// Block compressed 2
    BC2 = 26,
    /// Block compressed 3
    BC3 = 27,
    /// Block compressed 4 unsigned
    BC4U = 28,
    /// Block compressed 4 signed
    BC4S = 29,
    /// Block compressed 5 unsigned
    BC5U = 30,
    /// Block compressed 5 signed
    BC5S = 31,
    /// Block compressed 6 unsigned half-float
    BC6HU = 32,
    /// Block compressed 6 signed half-float
    BC6HS = 33,
    /// Block compressed 7
    BC7 = 34,
}

impl ResourceViewFormat {
    pub fn from_array_format(format: ArrayFormat, num_channels: c_uint) -> Self {
        // i spent more time on this macro than it would have taken me to just write the matches out
        // but thats kind of the essence of automation
        macro_rules! format_impl {
            ($num_channels:ident, $original:ident, $($res:ident),*) => {{
                if format == ArrayFormat::$original {
                    let res = [$(ResourceViewFormat::$res),*];
                    return match $num_channels {
                        1 => res[0],
                        2 => res[1],
                        4 => res[2],
                        _ => unreachable!("num_channels must be 1, 2, or 4")
                    };
                }
            }}
        }

        format_impl!(num_channels, U8, U8x1, U8x2, U8x4);
        format_impl!(num_channels, U16, U16x1, U16x2, U16x4);
        format_impl!(num_channels, U32, U32x1, U32x2, U32x4);
        format_impl!(num_channels, I8, I8x1, I8x2, I8x4);
        format_impl!(num_channels, I16, I16x1, I16x2, I16x4);
        format_impl!(num_channels, I32, I32x1, I32x2, I32x4);
        format_impl!(num_channels, F32, F32x1, F32x2, F32x4);
        assert_ne!(
            format,
            ArrayFormat::F64,
            "CUDA Does not have 64 bit float textures, you can instead use int textures with 2 channels then cast the ints to a double in the kernel"
        );
        unreachable!()
    }
}

#[repr(C)]
#[derive(Debug, Clone, Copy)]
pub struct ResourceViewDescriptor {
    /// The format of the resource view.
    pub format: ResourceViewFormat,
    /// The new width of the texture data. If this is a compressed format this must be 4x the original width.
    /// Otherwise, it must be equal to that of the original resource.
    pub width: usize,
    /// The new height of the texture data. If this is a compressed format this must be 4x the original height.
    /// Otherwise, it must be equal to that of the original resource.
    pub height: usize,
    /// The new depth of the texture data. If this is a compressed format this must be 4x the original depth.
    /// Otherwise, it must be equal to that of the original resource.
    pub depth: usize,
    /// The most detailed mipmap level. This will be the new level zero. For non-mipmapped resources this must be `0`.
    /// This value will be relative to [`TextureDescriptor::min_mipmap_level_clamp`] and [`TextureDescriptor::max_mipmap_level_clamp`]. Ex.
    /// if the first mipmap level is `2` and the min level clamp is `1.2`, then the actual min mipmap level clamp will be `3.2`.
    pub first_mipmap_level: c_uint,
    /// The least detailed mipmap level. This must be `0` for non-mipmapped resources.
    pub last_mipmap_level: c_uint,
    /// The first layer index for layered textures. This must be `0` for non-layered resources.
    pub first_layer: c_uint,
    /// The last layer index for layered textures. This must be `0` for non-layered resources.
    pub last_layer: c_uint,
}

impl ResourceViewDescriptor {
    pub fn from_array_desc(desc: &ArrayDescriptor) -> Self {
        Self {
            format: ResourceViewFormat::from_array_format(desc.format(), desc.num_channels()),
            width: desc.width(),
            height: desc.height(),
            depth: desc.depth(),
            first_mipmap_level: 0,
            last_mipmap_level: 0,
            first_layer: 0,
            last_layer: 0,
        }
    }

    pub fn to_raw(self) -> CUDA_RESOURCE_VIEW_DESC {
        let ResourceViewDescriptor {
            format,
            width,
            height,
            depth,
            first_mipmap_level,
            last_mipmap_level,
            first_layer,
            last_layer,
        } = self;

        CUDA_RESOURCE_VIEW_DESC {
            format: unsafe { transmute(format) },
            width,
            height,
            depth,
            firstMipmapLevel: first_mipmap_level,
            lastMipmapLevel: last_mipmap_level,
            firstLayer: first_layer,
            lastLayer: last_layer,
            reserved: [0; 16],
        }
    }
}

bitflags::bitflags! {
    /// Flags for a resource descriptor. Currently empty.
    #[derive(Default)]
    pub struct ResourceDescriptorFlags: c_uint {
        #[doc(hidden)]
        const _ZERO = 0;
    }
}

#[non_exhaustive]
#[derive(Debug)]
pub enum ResourceType {
    Array { array: ArrayObject },
    // TODO: validate the soundness of linear and pitch2, they require some pointer to memory, but
    // it might be possible to cause unsoundness by allocating some type then allocating a texture, and reading back
    // the texture to host memory. Causing GPU UB is probably fine, but using that to cause host UB is not acceptable.

    // Linear {
    //     format: ArrayFormat,
    //     num_channels: u32,
    //     size: usize,
    // },
    // Pitch2d {
    //     format: ArrayFormat,
    //     num_channels: u32,
    //     width: usize,
    //     height: usize,
    //     pitch_in_bytes: usize,
    // },
}

#[derive(Debug)]
pub struct ResourceDescriptor {
    pub flags: ResourceDescriptorFlags,
    pub ty: ResourceType,
}

impl ResourceDescriptor {
    pub fn into_raw(self) -> CUDA_RESOURCE_DESC {
        let ty = match self.ty {
            ResourceType::Array { .. } => CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
            // ResourceType::Linear { .. } => CUresourcetype::CU_RESOURCE_TYPE_LINEAR,
            // ResourceType::Pitch2d { .. } => CUresourcetype::CU_RESOURCE_TYPE_PITCH2D,
        };

        // we can't just use `array.handle`, this will cause the array object to call `Drop` and destroy the
        // array prematurely, which will yield a status access violation when we try to create the texture object
        // so we need to essentially leak the array into just a handle.
        let res = match self.ty {
            ResourceType::Array { array } => CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
                array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
                    hArray: array.into_raw(),
                },
            },
            // ResourceType::Linear { format, num_channels, size }
        };

        CUDA_RESOURCE_DESC {
            resType: ty,
            flags: self.flags.bits(),
            res,
        }
    }

    // TODO: evaluate if its possible to cause UB by making a raw descriptor with an invalid array handle.
    pub(crate) fn from_raw(raw: CUDA_RESOURCE_DESC) -> Self {
        match raw.resType {
            cuda::CUresourcetype_enum::CU_RESOURCE_TYPE_ARRAY => Self {
                flags: ResourceDescriptorFlags::from_bits(raw.flags)
                    .expect("invalid resource descriptor flags"),
                ty: ResourceType::Array {
                    array: ArrayObject {
                        handle: unsafe { raw.res.array.hArray },
                    },
                },
            },
            _ => panic!("Unsupported resource descriptor"),
        }
    }
}

#[derive(Debug)]
pub struct Texture {
    // needed to tell the destructor if it should drop the array if we havent
    // used into_array. TODO: figure out a good way to deal with array ownership issues.
    _destroy_array_on_destruct: bool,
    handle: CUtexObject,
}

impl Drop for Texture {
    fn drop(&mut self) {
        unsafe {
            // drop the descriptor, which causes the array inside it to be dropped too
            if false {
                let res = self.resource_desc();
                if let Ok(res) = res {
                    let _ = ManuallyDrop::into_inner(res);
                }
            }

            cuTexObjectDestroy(self.handle);
        }
    }
}

pub type TextureHandle = c_ulonglong;

impl Texture {
    /// The opaque handle to this texture on the gpu. This is used for passing to a kernel.
    pub fn handle(&self) -> TextureHandle {
        self.handle
    }

    pub fn new(
        resource_desc: ResourceDescriptor,
        texture_desc: TextureDescriptor,
        resource_view_desc: Option<ResourceViewDescriptor>,
    ) -> CudaResult<Self> {
        let handle = unsafe {
            let mut uninit = MaybeUninit::<CUtexObject>::uninit();
            let resource_view_desc =
                if let Some(x) = resource_view_desc.map(|x| Box::new(x.to_raw())) {
                    Box::into_raw(x)
                } else {
                    ptr::null_mut()
                };

            let resource_desc = &resource_desc.into_raw();
            let texture_desc = &texture_desc.to_raw();

            cuTexObjectCreate(
                uninit.as_mut_ptr(),
                resource_desc as *const _,
                texture_desc as *const _,
                resource_view_desc as *const _,
            )
            .to_result()?;
            if !resource_view_desc.is_null() {
                let _ = Box::from_raw(resource_view_desc);
            }
            uninit.assume_init()
        };
        Ok(Self {
            handle,
            _destroy_array_on_destruct: true,
        })
    }

    pub fn from_array(array: ArrayObject) -> CudaResult<Self> {
        let resource_desc = ResourceDescriptor {
            flags: ResourceDescriptorFlags::empty(),
            ty: ResourceType::Array { array },
        };
        Self::new(resource_desc, Default::default(), None)
    }

    pub fn into_array(mut self) -> CudaResult<Option<ArrayObject>> {
        let desc = unsafe { ManuallyDrop::take(&mut self.resource_desc()?) };
        self._destroy_array_on_destruct = false;
        Ok(match desc.ty {
            ResourceType::Array { array } => Some(array),
        })
    }

    // pub fn array(&mut self) -> CudaResult<Option<&ArrayObject>> {
    //     let desc = self.resource_desc()?;
    //     Ok(match desc.ty {
    //         ResourceType::Array { array } => Some(array),
    //     })
    // }

    // this function returns a ManuallyDrop because dropping the descriptor will cause the underlying
    // array to be dropped, which will cause UB or undesired consequences.
    unsafe fn resource_desc(&mut self) -> CudaResult<ManuallyDrop<ResourceDescriptor>> {
        let raw = {
            let mut uninit = MaybeUninit::<CUDA_RESOURCE_DESC>::uninit();
            cuTexObjectGetResourceDesc(uninit.as_mut_ptr(), self.handle).to_result()?;
            uninit.assume_init()
        };
        Ok(ManuallyDrop::new(ResourceDescriptor::from_raw(raw)))
    }

    // pub fn resource_view_desc(&self) -> CudaResult<ResourceViewDescriptor> {
    //     let raw = unsafe {
    //         let ptr = ptr::null_mut();
    //         cuTexObjectGetResourceViewDesc(ptr, self.handle).to_result()?;
    //         *ptr
    //     };
    //     Ok(ResourceViewDescriptor::)
    // }
}