cust/texture.rs
1use crate::error::CudaResult;
2use crate::error::ToResult;
3use crate::memory::array::ArrayDescriptor;
4use crate::memory::array::ArrayFormat;
5use crate::memory::array::ArrayObject;
6use crate::sys::cuTexObjectCreate;
7use crate::sys::cuTexObjectGetResourceDesc;
8use crate::sys::{
9 self as cuda, cuTexObjectDestroy, CUDA_RESOURCE_DESC_st__bindgen_ty_1,
10 CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1, CUresourcetype, CUtexObject,
11 CUDA_RESOURCE_DESC, CUDA_RESOURCE_VIEW_DESC, CUDA_TEXTURE_DESC,
12};
13use std::mem::transmute;
14use std::mem::ManuallyDrop;
15use std::mem::MaybeUninit;
16use std::os::raw::c_ulonglong;
17use std::os::raw::{c_float, c_uint};
18use std::ptr;
19
20/// How a texture should behave if it's adressed with out of bounds indices.
21#[repr(u32)]
22#[derive(Clone, Copy, Debug, PartialEq, Eq)]
23pub enum TextureAdressingMode {
24 /// Wraps around for adresses that are out of bounds.
25 Wrap = 0,
26 /// Clamps to the edges of the texture for adresses that are out of bounds.
27 Clamp = 1,
28 /// Mirrors the texture for adresses that are out of bounds.
29 Mirror = 2,
30 /// Uses the border color for adresses that are out of bounds.
31 Border = 3,
32}
33
34/// The filtering mode to be used when fetching from the texture.
35#[repr(u32)]
36#[derive(Clone, Copy, Debug, PartialEq, Eq)]
37pub enum TextureFilterMode {
38 Point = 0,
39 Linear = 1,
40}
41
42bitflags::bitflags! {
43 /// Flags which modify the behavior of CUDA texture creation.
44 #[derive(Default)]
45 pub struct TextureDescriptorFlags: c_uint {
46 /// Suppresses the default behavior of having the texture promote data to floating point data in the range
47 /// of [0, 1]. This flag does nothing if the texture is a texture of `u32`s.
48 const READ_AS_INTEGER = cuda::CU_TRSF_READ_AS_INTEGER;
49 /// Suppresses the default behavior of having the texture coordinates range from [0, Dim], where Dim is the
50 /// width or height of the CUDA array. Instead, the texture coordinates [0, 1] reference the entire array.
51 /// This flag must be set if a mipmapped array is being used.
52 const NORMALIZED_COORDINATES = cuda::CU_TRSF_NORMALIZED_COORDINATES;
53 /// Disables any trilinear filtering optimizations. Trilinear optimizations improve texture filtering performance
54 /// by allowing bilinear filtering on textures in scenarios where it can closely approximate the expected results.
55 const DISABLE_TRILINEAR_OPTIMIZATION = 0x20; // cuda-sys doesnt have this for some reason?
56 }
57}
58
59#[repr(C)]
60#[derive(Debug, Clone, Copy)]
61pub struct TextureDescriptor {
62 /// The adressing mode for each dimension of the texture data.
63 pub adress_modes: [TextureAdressingMode; 3],
64 /// The filtering mode to be used when fetching from the texture.
65 pub filter_mode: TextureFilterMode,
66 /// Any flags to modify the texture creation.
67 pub flags: TextureDescriptorFlags,
68 /// The maximum anisotropy ratio for anisotropic filtering. This will be clamped to `[1.0, 16.0]`.
69 pub max_anisotropy: c_uint,
70 /// The filter mode used when the calculated mipmap level lies between two defined mipmap levels.
71 pub mipmap_filter_mode: TextureFilterMode,
72 /// The offset to be applied to the calculated mipmap level.
73 pub mipmap_level_bias: c_float,
74 /// The lower end of the mipmap level range to clamp access to.
75 pub min_mipmap_level_clamp: c_float,
76 /// The upper end of the mipmap level range to clamp access to.
77 pub max_mipmap_level_clamp: c_float,
78 /// The border color of the texture.
79 pub border_color: [c_float; 4],
80}
81
82impl Default for TextureDescriptor {
83 fn default() -> Self {
84 Self {
85 adress_modes: [TextureAdressingMode::Clamp; 3],
86 filter_mode: TextureFilterMode::Point,
87 flags: TextureDescriptorFlags::empty(),
88 max_anisotropy: 1,
89 mipmap_filter_mode: TextureFilterMode::Point,
90 mipmap_level_bias: 0.0,
91 min_mipmap_level_clamp: 0.0,
92 max_mipmap_level_clamp: 0.0,
93 border_color: [0.0, 0.0, 0.0, 1.0],
94 }
95 }
96}
97
98impl TextureDescriptor {
99 pub fn to_raw(self) -> CUDA_TEXTURE_DESC {
100 let TextureDescriptor {
101 adress_modes,
102 filter_mode,
103 flags,
104 max_anisotropy,
105 mipmap_filter_mode,
106 mipmap_level_bias,
107 min_mipmap_level_clamp,
108 max_mipmap_level_clamp,
109 border_color,
110 } = self;
111 CUDA_TEXTURE_DESC {
112 addressMode: unsafe { transmute(adress_modes) },
113 filterMode: unsafe { transmute(filter_mode) },
114 flags: flags.bits(),
115 maxAnisotropy: max_anisotropy,
116 mipmapFilterMode: unsafe { transmute(mipmap_filter_mode) },
117 mipmapLevelBias: mipmap_level_bias,
118 minMipmapLevelClamp: min_mipmap_level_clamp,
119 maxMipmapLevelClamp: max_mipmap_level_clamp,
120 borderColor: border_color,
121 reserved: [0; 12],
122 }
123 }
124}
125
126/// Specifies how the data in the CUDA array/mipmapped array should be interpreted for the texture. This could incur a change in the
127/// size of the texture data.
128///
129/// If the format is a block compressed format, then the underlying array must have a base of format [`ArrayFormat::U32`] with 2 or 4 channels depending
130/// on the compressed format. ex. BC1 and BC4 require the CUDA array to have a format of [`ArrayFormat::U32`] with 2 channels. The other BC formats require
131/// the resource to have the same format but with 4 channels.
132#[repr(u32)]
133#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
134pub enum ResourceViewFormat {
135 /// No resource view format (use underlying resource format)
136 None = 0,
137 /// 1 channel unsigned 8-bit integers
138 U8x1 = 1,
139 /// 2 channel unsigned 8-bit integers
140 U8x2 = 2,
141 /// 4 channel unsigned 8-bit integers
142 U8x4 = 3,
143 /// 1 channel signed 8-bit integers
144 I8x1 = 4,
145 /// 2 channel signed 8-bit integers
146 I8x2 = 5,
147 /// 4 channel signed 8-bit integers
148 I8x4 = 6,
149 /// 1 channel unsigned 16-bit integers
150 U16x1 = 7,
151 /// 2 channel unsigned 16-bit integers
152 U16x2 = 8,
153 /// 4 channel unsigned 16-bit integers
154 U16x4 = 9,
155 /// 1 channel signed 16-bit integers
156 I16x1 = 10,
157 /// 2 channel signed 16-bit integers
158 I16x2 = 11,
159 /// 4 channel signed 16-bit integers
160 I16x4 = 12,
161 /// 1 channel unsigned 32-bit integers
162 U32x1 = 13,
163 /// 2 channel unsigned 32-bit integers
164 U32x2 = 14,
165 /// 4 channel unsigned 32-bit integers
166 U32x4 = 15,
167 /// 1 channel signed 32-bit integers
168 I32x1 = 16,
169 /// 2 channel signed 32-bit integers
170 I32x2 = 17,
171 /// 4 channel signed 32-bit integers
172 I32x4 = 18,
173 /// 1 channel 16-bit floating point
174 F16x1 = 19,
175 /// 2 channel 16-bit floating point
176 F16x2 = 20,
177 /// 4 channel 16-bit floating point
178 F16x4 = 21,
179 /// 1 channel 32-bit floating point
180 F32x1 = 22,
181 /// 2 channel 32-bit floating point
182 F32x2 = 23,
183 /// 4 channel 32-bit floating point
184 F32x4 = 24,
185 /// Block compressed 1
186 BC1 = 25,
187 /// Block compressed 2
188 BC2 = 26,
189 /// Block compressed 3
190 BC3 = 27,
191 /// Block compressed 4 unsigned
192 BC4U = 28,
193 /// Block compressed 4 signed
194 BC4S = 29,
195 /// Block compressed 5 unsigned
196 BC5U = 30,
197 /// Block compressed 5 signed
198 BC5S = 31,
199 /// Block compressed 6 unsigned half-float
200 BC6HU = 32,
201 /// Block compressed 6 signed half-float
202 BC6HS = 33,
203 /// Block compressed 7
204 BC7 = 34,
205}
206
207impl ResourceViewFormat {
208 pub fn from_array_format(format: ArrayFormat, num_channels: c_uint) -> Self {
209 // i spent more time on this macro than it would have taken me to just write the matches out
210 // but thats kind of the essence of automation
211 macro_rules! format_impl {
212 ($num_channels:ident, $original:ident, $($res:ident),*) => {{
213 if format == ArrayFormat::$original {
214 let res = [$(ResourceViewFormat::$res),*];
215 return match $num_channels {
216 1 => res[0],
217 2 => res[1],
218 4 => res[2],
219 _ => unreachable!("num_channels must be 1, 2, or 4")
220 };
221 }
222 }}
223 }
224
225 format_impl!(num_channels, U8, U8x1, U8x2, U8x4);
226 format_impl!(num_channels, U16, U16x1, U16x2, U16x4);
227 format_impl!(num_channels, U32, U32x1, U32x2, U32x4);
228 format_impl!(num_channels, I8, I8x1, I8x2, I8x4);
229 format_impl!(num_channels, I16, I16x1, I16x2, I16x4);
230 format_impl!(num_channels, I32, I32x1, I32x2, I32x4);
231 format_impl!(num_channels, F32, F32x1, F32x2, F32x4);
232 assert_ne!(
233 format,
234 ArrayFormat::F64,
235 "CUDA Does not have 64 bit float textures, you can instead use int textures with 2 channels then cast the ints to a double in the kernel"
236 );
237 unreachable!()
238 }
239}
240
241#[repr(C)]
242#[derive(Debug, Clone, Copy)]
243pub struct ResourceViewDescriptor {
244 /// The format of the resource view.
245 pub format: ResourceViewFormat,
246 /// The new width of the texture data. If this is a compressed format this must be 4x the original width.
247 /// Otherwise, it must be equal to that of the original resource.
248 pub width: usize,
249 /// The new height of the texture data. If this is a compressed format this must be 4x the original height.
250 /// Otherwise, it must be equal to that of the original resource.
251 pub height: usize,
252 /// The new depth of the texture data. If this is a compressed format this must be 4x the original depth.
253 /// Otherwise, it must be equal to that of the original resource.
254 pub depth: usize,
255 /// The most detailed mipmap level. This will be the new level zero. For non-mipmapped resources this must be `0`.
256 /// This value will be relative to [`TextureDescriptor::min_mipmap_level_clamp`] and [`TextureDescriptor::max_mipmap_level_clamp`]. Ex.
257 /// if the first mipmap level is `2` and the min level clamp is `1.2`, then the actual min mipmap level clamp will be `3.2`.
258 pub first_mipmap_level: c_uint,
259 /// The least detailed mipmap level. This must be `0` for non-mipmapped resources.
260 pub last_mipmap_level: c_uint,
261 /// The first layer index for layered textures. This must be `0` for non-layered resources.
262 pub first_layer: c_uint,
263 /// The last layer index for layered textures. This must be `0` for non-layered resources.
264 pub last_layer: c_uint,
265}
266
267impl ResourceViewDescriptor {
268 pub fn from_array_desc(desc: &ArrayDescriptor) -> Self {
269 Self {
270 format: ResourceViewFormat::from_array_format(desc.format(), desc.num_channels()),
271 width: desc.width(),
272 height: desc.height(),
273 depth: desc.depth(),
274 first_mipmap_level: 0,
275 last_mipmap_level: 0,
276 first_layer: 0,
277 last_layer: 0,
278 }
279 }
280
281 pub fn to_raw(self) -> CUDA_RESOURCE_VIEW_DESC {
282 let ResourceViewDescriptor {
283 format,
284 width,
285 height,
286 depth,
287 first_mipmap_level,
288 last_mipmap_level,
289 first_layer,
290 last_layer,
291 } = self;
292
293 CUDA_RESOURCE_VIEW_DESC {
294 format: unsafe { transmute(format) },
295 width,
296 height,
297 depth,
298 firstMipmapLevel: first_mipmap_level,
299 lastMipmapLevel: last_mipmap_level,
300 firstLayer: first_layer,
301 lastLayer: last_layer,
302 reserved: [0; 16],
303 }
304 }
305}
306
307bitflags::bitflags! {
308 /// Flags for a resource descriptor. Currently empty.
309 #[derive(Default)]
310 pub struct ResourceDescriptorFlags: c_uint {
311 #[doc(hidden)]
312 const _ZERO = 0;
313 }
314}
315
316#[non_exhaustive]
317#[derive(Debug)]
318pub enum ResourceType {
319 Array { array: ArrayObject },
320 // TODO: validate the soundness of linear and pitch2, they require some pointer to memory, but
321 // it might be possible to cause unsoundness by allocating some type then allocating a texture, and reading back
322 // the texture to host memory. Causing GPU UB is probably fine, but using that to cause host UB is not acceptable.
323
324 // Linear {
325 // format: ArrayFormat,
326 // num_channels: u32,
327 // size: usize,
328 // },
329 // Pitch2d {
330 // format: ArrayFormat,
331 // num_channels: u32,
332 // width: usize,
333 // height: usize,
334 // pitch_in_bytes: usize,
335 // },
336}
337
338#[derive(Debug)]
339pub struct ResourceDescriptor {
340 pub flags: ResourceDescriptorFlags,
341 pub ty: ResourceType,
342}
343
344impl ResourceDescriptor {
345 pub fn into_raw(self) -> CUDA_RESOURCE_DESC {
346 let ty = match self.ty {
347 ResourceType::Array { .. } => CUresourcetype::CU_RESOURCE_TYPE_ARRAY,
348 // ResourceType::Linear { .. } => CUresourcetype::CU_RESOURCE_TYPE_LINEAR,
349 // ResourceType::Pitch2d { .. } => CUresourcetype::CU_RESOURCE_TYPE_PITCH2D,
350 };
351
352 // we can't just use `array.handle`, this will cause the array object to call `Drop` and destroy the
353 // array prematurely, which will yield a status access violation when we try to create the texture object
354 // so we need to essentially leak the array into just a handle.
355 let res = match self.ty {
356 ResourceType::Array { array } => CUDA_RESOURCE_DESC_st__bindgen_ty_1 {
357 array: CUDA_RESOURCE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
358 hArray: array.into_raw(),
359 },
360 },
361 // ResourceType::Linear { format, num_channels, size }
362 };
363
364 CUDA_RESOURCE_DESC {
365 resType: ty,
366 flags: self.flags.bits(),
367 res,
368 }
369 }
370
371 // TODO: evaluate if its possible to cause UB by making a raw descriptor with an invalid array handle.
372 pub(crate) fn from_raw(raw: CUDA_RESOURCE_DESC) -> Self {
373 match raw.resType {
374 cuda::CUresourcetype_enum::CU_RESOURCE_TYPE_ARRAY => Self {
375 flags: ResourceDescriptorFlags::from_bits(raw.flags)
376 .expect("invalid resource descriptor flags"),
377 ty: ResourceType::Array {
378 array: ArrayObject {
379 handle: unsafe { raw.res.array.hArray },
380 },
381 },
382 },
383 _ => panic!("Unsupported resource descriptor"),
384 }
385 }
386}
387
388#[derive(Debug)]
389pub struct Texture {
390 // needed to tell the destructor if it should drop the array if we havent
391 // used into_array. TODO: figure out a good way to deal with array ownership issues.
392 _destroy_array_on_destruct: bool,
393 handle: CUtexObject,
394}
395
396impl Drop for Texture {
397 fn drop(&mut self) {
398 unsafe {
399 // drop the descriptor, which causes the array inside it to be dropped too
400 if false {
401 let res = self.resource_desc();
402 if let Ok(res) = res {
403 let _ = ManuallyDrop::into_inner(res);
404 }
405 }
406
407 cuTexObjectDestroy(self.handle);
408 }
409 }
410}
411
412pub type TextureHandle = c_ulonglong;
413
414impl Texture {
415 /// The opaque handle to this texture on the gpu. This is used for passing to a kernel.
416 pub fn handle(&self) -> TextureHandle {
417 self.handle
418 }
419
420 pub fn new(
421 resource_desc: ResourceDescriptor,
422 texture_desc: TextureDescriptor,
423 resource_view_desc: Option<ResourceViewDescriptor>,
424 ) -> CudaResult<Self> {
425 let handle = unsafe {
426 let mut uninit = MaybeUninit::<CUtexObject>::uninit();
427 let resource_view_desc =
428 if let Some(x) = resource_view_desc.map(|x| Box::new(x.to_raw())) {
429 Box::into_raw(x)
430 } else {
431 ptr::null_mut()
432 };
433
434 let resource_desc = &resource_desc.into_raw();
435 let texture_desc = &texture_desc.to_raw();
436
437 cuTexObjectCreate(
438 uninit.as_mut_ptr(),
439 resource_desc as *const _,
440 texture_desc as *const _,
441 resource_view_desc as *const _,
442 )
443 .to_result()?;
444 if !resource_view_desc.is_null() {
445 let _ = Box::from_raw(resource_view_desc);
446 }
447 uninit.assume_init()
448 };
449 Ok(Self {
450 handle,
451 _destroy_array_on_destruct: true,
452 })
453 }
454
455 pub fn from_array(array: ArrayObject) -> CudaResult<Self> {
456 let resource_desc = ResourceDescriptor {
457 flags: ResourceDescriptorFlags::empty(),
458 ty: ResourceType::Array { array },
459 };
460 Self::new(resource_desc, Default::default(), None)
461 }
462
463 pub fn into_array(mut self) -> CudaResult<Option<ArrayObject>> {
464 let desc = unsafe { ManuallyDrop::take(&mut self.resource_desc()?) };
465 self._destroy_array_on_destruct = false;
466 Ok(match desc.ty {
467 ResourceType::Array { array } => Some(array),
468 })
469 }
470
471 // pub fn array(&mut self) -> CudaResult<Option<&ArrayObject>> {
472 // let desc = self.resource_desc()?;
473 // Ok(match desc.ty {
474 // ResourceType::Array { array } => Some(array),
475 // })
476 // }
477
478 // this function returns a ManuallyDrop because dropping the descriptor will cause the underlying
479 // array to be dropped, which will cause UB or undesired consequences.
480 unsafe fn resource_desc(&mut self) -> CudaResult<ManuallyDrop<ResourceDescriptor>> {
481 let raw = {
482 let mut uninit = MaybeUninit::<CUDA_RESOURCE_DESC>::uninit();
483 cuTexObjectGetResourceDesc(uninit.as_mut_ptr(), self.handle).to_result()?;
484 uninit.assume_init()
485 };
486 Ok(ManuallyDrop::new(ResourceDescriptor::from_raw(raw)))
487 }
488
489 // pub fn resource_view_desc(&self) -> CudaResult<ResourceViewDescriptor> {
490 // let raw = unsafe {
491 // let ptr = ptr::null_mut();
492 // cuTexObjectGetResourceViewDesc(ptr, self.handle).to_result()?;
493 // *ptr
494 // };
495 // Ok(ResourceViewDescriptor::)
496 // }
497}