cust/memory/
array.rs

1//! Routines for allocating and using CUDA Array Objects.
2//!
3//! Detailed documentation about allocating CUDA Arrays can be found in the
4//! [CUDA Driver API](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gc2322c70b38c2984536c90ed118bb1d7)
5
6use crate::context::CurrentContext;
7use crate::device::DeviceAttribute;
8use crate::error::*;
9use crate::sys::cuMemcpy2D_v2;
10use crate::sys::cuMemcpyAtoH_v2;
11use crate::sys::cuMemcpyHtoA_v2;
12use crate::sys::CUDA_MEMCPY2D;
13use crate::sys::{self as cuda, CUarray, CUarray_format, CUarray_format_enum};
14use std::ffi::c_void;
15use std::mem;
16use std::mem::zeroed;
17use std::mem::ManuallyDrop;
18use std::mem::MaybeUninit;
19use std::os::raw::c_uint;
20use std::ptr::null;
21use std::ptr::null_mut;
22
23/// Describes the format used for a CUDA Array.
24#[derive(Clone, Copy, Debug, PartialEq, Eq)]
25pub enum ArrayFormat {
26    /// Unsigned 8-bit integer
27    U8,
28    /// Unsigned 16-bit integer
29    U16,
30    /// Unsigned 32-bit integer
31    U32,
32    /// Signed 8-bit integer
33    I8,
34    /// Signed 16-bit integer
35    I16,
36    /// Signed 32-bit integer
37    I32,
38    /// Half-precision floating point number
39    F32,
40    /// Single-precision floating point number
41    F64,
42}
43
44impl ArrayFormat {
45    /// The size of this array format in bytes.
46    pub fn mem_size(&self) -> usize {
47        use ArrayFormat::*;
48
49        match self {
50            U8 | I8 => 1,
51            U16 | I16 => 2,
52            U32 | I32 | F32 => 4,
53            F64 => 8,
54        }
55    }
56}
57
58mod private {
59    pub trait Sealed {}
60}
61
62pub trait ArrayPrimitive: private::Sealed + Copy + Default {
63    fn array_format() -> ArrayFormat;
64}
65
66impl private::Sealed for u8 {}
67impl private::Sealed for u16 {}
68impl private::Sealed for u32 {}
69impl private::Sealed for i8 {}
70impl private::Sealed for i16 {}
71impl private::Sealed for i32 {}
72impl private::Sealed for f32 {}
73impl private::Sealed for f64 {}
74
75impl ArrayPrimitive for u8 {
76    fn array_format() -> ArrayFormat {
77        ArrayFormat::U8
78    }
79}
80
81impl ArrayPrimitive for u16 {
82    fn array_format() -> ArrayFormat {
83        ArrayFormat::U16
84    }
85}
86
87impl ArrayPrimitive for u32 {
88    fn array_format() -> ArrayFormat {
89        ArrayFormat::U32
90    }
91}
92
93impl ArrayPrimitive for i8 {
94    fn array_format() -> ArrayFormat {
95        ArrayFormat::I8
96    }
97}
98
99impl ArrayPrimitive for i16 {
100    fn array_format() -> ArrayFormat {
101        ArrayFormat::I16
102    }
103}
104
105impl ArrayPrimitive for i32 {
106    fn array_format() -> ArrayFormat {
107        ArrayFormat::I32
108    }
109}
110
111impl ArrayPrimitive for f32 {
112    fn array_format() -> ArrayFormat {
113        ArrayFormat::F32
114    }
115}
116
117impl ArrayPrimitive for f64 {
118    fn array_format() -> ArrayFormat {
119        ArrayFormat::F64
120    }
121}
122
123impl ArrayFormat {
124    /// Creates ArrayFormat from the CUDA Driver API enum
125    pub fn from_raw(raw: CUarray_format) -> Self {
126        match raw {
127            CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT8 => ArrayFormat::U8,
128            CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT16 => ArrayFormat::U16,
129            CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT32 => ArrayFormat::U32,
130            CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT8 => ArrayFormat::I8,
131            CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT16 => ArrayFormat::I16,
132            CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT32 => ArrayFormat::I32,
133            CUarray_format_enum::CU_AD_FORMAT_HALF => ArrayFormat::F32,
134            CUarray_format_enum::CU_AD_FORMAT_FLOAT => ArrayFormat::F64,
135            // there are literally no docs on what nv12 is???
136            // it seems to be something with multiplanar arrays, needs some investigation
137            CUarray_format_enum::CU_AD_FORMAT_NV12 => panic!("nv12 is not supported yet"),
138        }
139    }
140
141    /// Converts ArrayFormat to the CUDA Driver API enum
142    pub fn to_raw(self) -> CUarray_format {
143        match self {
144            ArrayFormat::U8 => CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT8,
145            ArrayFormat::U16 => CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT16,
146            ArrayFormat::U32 => CUarray_format_enum::CU_AD_FORMAT_UNSIGNED_INT32,
147            ArrayFormat::I8 => CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT8,
148            ArrayFormat::I16 => CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT16,
149            ArrayFormat::I32 => CUarray_format_enum::CU_AD_FORMAT_SIGNED_INT32,
150            ArrayFormat::F32 => CUarray_format_enum::CU_AD_FORMAT_HALF,
151            ArrayFormat::F64 => CUarray_format_enum::CU_AD_FORMAT_FLOAT,
152        }
153    }
154}
155
156bitflags::bitflags! {
157    /// Flags which modify the behavior of CUDA array creation.
158    #[derive(Default)]
159    pub struct ArrayObjectFlags: c_uint {
160        /// Enables creation of layered CUDA arrays. When this flag is set, depth specifies the
161        /// number of layers, not the depth of a 3D array.
162        const LAYERED = cuda::CUDA_ARRAY3D_LAYERED;
163
164        /// Enables surface references to be bound to the CUDA array.
165        const SURFACE_LDST = cuda::CUDA_ARRAY3D_SURFACE_LDST;
166
167        /// Enables creation of cubemaps. If this flag is set, Width must be equal to Height, and
168        /// Depth must be six. If the `LAYERED` flag is also set, then Depth must be a multiple of
169        /// six.
170        const CUBEMAP = cuda::CUDA_ARRAY3D_CUBEMAP;
171
172        /// Indicates that the CUDA array will be used for texture gather. Texture gather can only
173        /// be performed on 2D CUDA arrays.
174        const TEXTURE_GATHER = cuda::CUDA_ARRAY3D_TEXTURE_GATHER;
175    }
176}
177
178impl ArrayObjectFlags {
179    /// Creates a default flags object with no flags set.
180    pub fn new() -> Self {
181        Self::default()
182    }
183}
184
185/// Describes a CUDA Array
186#[derive(Clone, Copy, Debug)]
187pub struct ArrayDescriptor {
188    desc: cuda::CUDA_ARRAY3D_DESCRIPTOR,
189}
190
191impl ArrayDescriptor {
192    /// Constructs an ArrayDescriptor from a CUDA Driver API Array Descriptor.
193    pub fn from_raw(desc: cuda::CUDA_ARRAY3D_DESCRIPTOR) -> Self {
194        Self { desc }
195    }
196
197    /// Constructs an ArrayDescriptor from dimensions, format, num_channels, and flags.
198    pub fn new(
199        dims: [usize; 3],
200        format: ArrayFormat,
201        num_channels: c_uint,
202        flags: ArrayObjectFlags,
203    ) -> Self {
204        Self {
205            desc: cuda::CUDA_ARRAY3D_DESCRIPTOR {
206                Width: dims[0],
207                Height: dims[1],
208                Depth: dims[2],
209                Format: format.to_raw(),
210                NumChannels: num_channels,
211                Flags: flags.bits(),
212            },
213        }
214    }
215
216    /// Creates a new ArrayDescriptor from a set of dimensions and format.
217    pub fn from_dims_format(dims: [usize; 3], format: ArrayFormat) -> Self {
218        Self {
219            desc: cuda::CUDA_ARRAY3D_DESCRIPTOR {
220                Width: dims[0],
221                Height: dims[1],
222                Depth: dims[2],
223                Format: format.to_raw(),
224                NumChannels: 1,
225                Flags: ArrayObjectFlags::default().bits(),
226            },
227        }
228    }
229
230    /// Returns the dimensions of the ArrayDescriptor
231    pub fn dims(&self) -> [usize; 3] {
232        [self.desc.Width, self.desc.Height, self.desc.Depth]
233    }
234
235    /// Sets the dimensions of the ArrayDescriptor
236    pub fn set_dims(&mut self, dims: [usize; 3]) {
237        self.desc.Width = dims[0];
238        self.desc.Height = dims[1];
239        self.desc.Depth = dims[2];
240    }
241
242    /// Returns the width of the ArrayDescripor
243    pub fn width(&self) -> usize {
244        self.desc.Width
245    }
246
247    /// Sets the width of the ArrayDescriptor
248    pub fn set_width(&mut self, width: usize) {
249        self.desc.Width = width;
250    }
251
252    /// Returns the height of the ArrayDescripor
253    pub fn height(&self) -> usize {
254        self.desc.Height
255    }
256
257    /// Sets the height of the ArrayDescriptor
258    pub fn set_height(&mut self, height: usize) {
259        self.desc.Height = height;
260    }
261
262    /// Returns the depth of the ArrayDescripor
263    pub fn depth(&self) -> usize {
264        self.desc.Depth
265    }
266
267    /// Sets the depth of the ArrayDescriptor
268    pub fn set_depth(&mut self, depth: usize) {
269        self.desc.Depth = depth;
270    }
271
272    /// Returns the format of the ArrayDescripor
273    pub fn format(&self) -> ArrayFormat {
274        ArrayFormat::from_raw(self.desc.Format)
275    }
276
277    /// Sets the format of the ArrayDescriptor
278    pub fn set_format(&mut self, format: ArrayFormat) {
279        self.desc.Format = format.to_raw();
280    }
281
282    /// Returns the number of channels in the ArrayDescriptor
283    pub fn num_channels(&self) -> c_uint {
284        self.desc.NumChannels
285    }
286
287    /// Sets the number of channels in the ArrayDescriptor
288    pub fn set_num_channels(&mut self, num_channels: c_uint) {
289        self.desc.NumChannels = num_channels;
290    }
291
292    /// Returns the flags of the ArrayDescriptor
293    pub fn flags(&self) -> ArrayObjectFlags {
294        ArrayObjectFlags::from_bits_truncate(self.desc.Flags)
295    }
296
297    /// Sets the flags of the ArrayDescriptor.
298    pub fn set_flags(&mut self, flags: ArrayObjectFlags) {
299        self.desc.Flags = flags.bits();
300    }
301}
302
303/// A CUDA Array. Can be bound to a texture or surface.
304pub struct ArrayObject {
305    pub(crate) handle: CUarray,
306}
307
308unsafe impl Send for ArrayObject {}
309unsafe impl Sync for ArrayObject {}
310
311impl ArrayObject {
312    pub(crate) fn into_raw(self) -> CUarray {
313        ManuallyDrop::new(self).handle
314    }
315
316    /// Constructs a generic ArrayObject from an `ArrayDescriptor`.
317    pub fn from_descriptor(descriptor: &ArrayDescriptor) -> CudaResult<Self> {
318        // We validate the descriptor up front in debug mode. This provides a good error message to
319        // the user when they get something wrong, but doesn't re-validate in release mode.
320        if cfg!(debug_assertions) {
321            assert_ne!(
322                0,
323                descriptor.width(),
324                "Cannot allocate an array with 0 Width"
325            );
326
327            if !descriptor.flags().contains(ArrayObjectFlags::LAYERED) && descriptor.depth() > 0 {
328                assert_ne!(
329                    0,
330                    descriptor.height(),
331                    "If Depth is non-zero and the descriptor is not LAYERED, then Height must also \
332                    be non-zero."
333                );
334            }
335
336            if descriptor.flags().contains(ArrayObjectFlags::CUBEMAP) {
337                assert_eq!(
338                    descriptor.height(),
339                    descriptor.width(),
340                    "Height and Width must be equal for CUBEMAP arrays."
341                );
342
343                if descriptor.flags().contains(ArrayObjectFlags::LAYERED) {
344                    assert_eq!(
345                        0,
346                        descriptor.depth() % 6,
347                        "Depth must be a multiple of 6 when the array descriptor is for a LAYERED \
348                         CUBEMAP."
349                    );
350                } else {
351                    assert_eq!(
352                        6,
353                        descriptor.depth(),
354                        "Depth must be equal to 6 when the array descriptor is for a CUBEMAP."
355                    );
356                }
357            }
358
359            assert!(
360                descriptor.num_channels() == 1
361                    || descriptor.num_channels() == 2
362                    || descriptor.num_channels() == 4,
363                "NumChannels was set to {}. It must be 1, 2, or 4.",
364                descriptor.num_channels()
365            );
366
367            // Exhaustively check bounds of arrays
368            let device = CurrentContext::get_device()?;
369
370            let attr = |attr| Ok(1..=(device.get_attribute(attr)? as usize));
371
372            let (description, bounds) = if descriptor.flags().contains(ArrayObjectFlags::CUBEMAP) {
373                if descriptor.flags().contains(ArrayObjectFlags::LAYERED) {
374                    (
375                        "Layered Cubemap",
376                        vec![[
377                            attr(DeviceAttribute::MaximumTextureCubemapLayeredWidth)?,
378                            attr(DeviceAttribute::MaximumTextureCubemapLayeredWidth)?,
379                            attr(DeviceAttribute::MaximumTextureCubemapLayeredLayers)?,
380                        ]],
381                    )
382                } else {
383                    (
384                        "Cubemap",
385                        vec![[
386                            attr(DeviceAttribute::MaximumTextureCubemapWidth)?,
387                            attr(DeviceAttribute::MaximumTextureCubemapWidth)?,
388                            6..=6,
389                        ]],
390                    )
391                }
392            } else if descriptor.flags().contains(ArrayObjectFlags::LAYERED) {
393                if descriptor.height() > 0 {
394                    (
395                        "2D Layered",
396                        vec![[
397                            attr(DeviceAttribute::MaximumTexture2DLayeredWidth)?,
398                            attr(DeviceAttribute::MaximumTexture2DLayeredHeight)?,
399                            attr(DeviceAttribute::MaximumTexture2DLayeredLayers)?,
400                        ]],
401                    )
402                } else {
403                    (
404                        "1D Layered",
405                        vec![[
406                            attr(DeviceAttribute::MaximumTexture1DLayeredWidth)?,
407                            0..=0,
408                            attr(DeviceAttribute::MaximumTexture1DLayeredLayers)?,
409                        ]],
410                    )
411                }
412            } else if descriptor.depth() > 0 {
413                (
414                    "3D",
415                    vec![
416                        [
417                            attr(DeviceAttribute::MaximumTexture3DWidth)?,
418                            attr(DeviceAttribute::MaximumTexture3DHeight)?,
419                            attr(DeviceAttribute::MaximumTexture3DDepth)?,
420                        ],
421                        [
422                            attr(DeviceAttribute::MaximumTexture3DWidthAlternate)?,
423                            attr(DeviceAttribute::MaximumTexture3DHeightAlternate)?,
424                            attr(DeviceAttribute::MaximumTexture3DDepthAlternate)?,
425                        ],
426                    ],
427                )
428            } else if descriptor.height() > 0 {
429                if descriptor
430                    .flags()
431                    .contains(ArrayObjectFlags::TEXTURE_GATHER)
432                {
433                    (
434                        "2D Texture Gather",
435                        vec![[
436                            attr(DeviceAttribute::MaximumTexture2DGatherWidth)?,
437                            attr(DeviceAttribute::MaximumTexture2DGatherHeight)?,
438                            0..=0,
439                        ]],
440                    )
441                } else {
442                    (
443                        "2D",
444                        vec![[
445                            attr(DeviceAttribute::MaximumTexture2DWidth)?,
446                            attr(DeviceAttribute::MaximumTexture2DHeight)?,
447                            0..=0,
448                        ]],
449                    )
450                }
451            } else {
452                assert!(descriptor.width() > 0);
453                (
454                    "1D",
455                    vec![[attr(DeviceAttribute::MaximumTexture1DWidth)?, 0..=0, 0..=0]],
456                )
457            };
458
459            let bounds_invalid = |x: &[::std::ops::RangeInclusive<usize>; 3]| {
460                (descriptor.width() >= *x[0].start() && descriptor.width() <= *x[0].end())
461                    && (descriptor.height() >= *x[1].start() && descriptor.height() <= *x[1].end())
462                    && (descriptor.depth() >= *x[2].start() && descriptor.depth() <= *x[2].end())
463            };
464
465            assert!(
466                bounds.iter().any(bounds_invalid),
467                "The dimensions of the {} ArrayObject did not fall within the valid bounds for \
468                the array. descriptor = {:?}, dims = {:?}, valid bounds = {:?}",
469                description,
470                descriptor,
471                [descriptor.width(), descriptor.height(), descriptor.depth()],
472                bounds
473            );
474        }
475
476        let mut handle = MaybeUninit::uninit();
477        unsafe { cuda::cuArray3DCreate_v2(handle.as_mut_ptr(), &descriptor.desc) }.to_result()?;
478        Ok(Self {
479            handle: unsafe { handle.assume_init() },
480        })
481    }
482
483    /// Allocates a new CUDA Array that is up to 3-dimensions.
484    ///
485    /// `dims` contains the extents of the array. `dims[0]` must be non-zero. `dims[1]` must be
486    /// non-zero if `dims[2]` is non-zero. The rank of the array is equal to the number of non-zero
487    /// `dims`.
488    ///
489    /// `format` determines the data-type of the array.
490    ///
491    /// `num_channels` determines the number of channels per array element (1, 2, or 4).
492    ///
493    /// ```
494    /// # use cust::*;
495    /// # use std::error::Error;
496    /// # fn main() -> Result<(), Box<dyn Error>> {
497    /// # let _ctx = quick_init()?;
498    /// use cust::memory::array::{ArrayObject, ArrayFormat};
499    ///
500    /// let one_dim_array = ArrayObject::new([10, 0, 0], ArrayFormat::F32, 1)?;
501    /// let two_dim_array = ArrayObject::new([10, 12, 0], ArrayFormat::F32, 1)?;
502    /// let three_dim_array = ArrayObject::new([10, 12, 14], ArrayFormat::F32, 1)?;
503    /// # Ok(())
504    /// # }
505    /// ```
506    pub fn new(dims: [usize; 3], format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
507        Self::from_descriptor(&ArrayDescriptor::new(
508            dims,
509            format,
510            num_channels,
511            Default::default(),
512        ))
513    }
514
515    /// Allocates a new 1D CUDA Array.
516    ///
517    /// `width` must be non-zero.
518    ///
519    /// `format` determines the data-type of the array.
520    ///
521    /// `num_channels` determines the number of channels per array element (1, 2, or 4).
522    ///
523    /// ```
524    /// # use cust::*;
525    /// # use std::error::Error;
526    /// # fn main() -> Result<(), Box<dyn Error>> {
527    /// # let _ctx = quick_init()?;
528    /// use cust::memory::array::{ArrayObject, ArrayFormat};
529    ///
530    /// // Allocates a 1D array of 10 single-precision, single-channel floating point values.
531    /// let one_dim_array = ArrayObject::new_1d(10, ArrayFormat::F32, 1)?;
532    /// # Ok(())
533    /// # }
534    /// ```
535    pub fn new_1d(width: usize, format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
536        Self::from_descriptor(&ArrayDescriptor::new(
537            [width, 0, 0],
538            format,
539            num_channels,
540            Default::default(),
541        ))
542    }
543
544    /// Allocates a new CUDA Array that is up to 2-dimensions.
545    ///
546    /// `dims` contains the extents of the array. `dims[0]` must be non-zero. The rank of the array
547    /// is equal to the number of non-zero `dims`.
548    ///
549    /// `format` determines the data-type of the array.
550    ///
551    /// `num_channels` determines the number of channels per array element (1, 2, or 4).
552    ///
553    /// ```
554    /// # use cust::*;
555    /// # use std::error::Error;
556    /// # fn main() -> Result<(), Box<dyn Error>> {
557    /// # let _ctx = quick_init()?;
558    /// use cust::memory::array::{ArrayObject, ArrayFormat};
559    ///
560    /// // Allocates an 8x24 array of single-precision, single-channel floating point values.
561    /// let one_dim_array = ArrayObject::new_2d([8, 24], ArrayFormat::F32, 1)?;
562    /// # Ok(())
563    /// # }
564    /// ```
565    pub fn new_2d(dims: [usize; 2], format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
566        Self::from_descriptor(&ArrayDescriptor::new(
567            [dims[0], dims[1], 0],
568            format,
569            num_channels,
570            Default::default(),
571        ))
572    }
573
574    /// Creates a new Layered 1D or 2D CUDA Array.
575    ///
576    /// `dims` contains the extents of the array. `dims[0]` must be non-zero. The rank of the array
577    /// is equivalent to the number of non-zero dimensions.
578    ///
579    /// `num_layers` determines the number of layers in the array.
580    ///
581    /// `format` determines the data-type of the array.
582    ///
583    /// `num_channels` determines the number of channels per array element (1, 2, or 4).
584    ///
585    /// ```
586    /// # use cust::*;
587    /// # use std::error::Error;
588    /// # fn main() -> Result<(), Box<dyn Error>> {
589    /// # let _ctx = quick_init()?;
590    /// use cust::memory::array::{ArrayObject, ArrayFormat};
591    ///
592    /// // Allocates a 7x8 array with 10 layers of single-precision, single-channel floating
593    /// // point values.
594    /// let layered_array = ArrayObject::new_layered([7, 8], 10, ArrayFormat::F32, 1)?;
595    /// # Ok(())
596    /// # }
597    /// ```
598    pub fn new_layered(
599        dims: [usize; 2],
600        num_layers: usize,
601        format: ArrayFormat,
602        num_channels: c_uint,
603    ) -> CudaResult<Self> {
604        Self::from_descriptor(&ArrayDescriptor::new(
605            [dims[0], dims[1], num_layers],
606            format,
607            num_channels,
608            ArrayObjectFlags::LAYERED,
609        ))
610    }
611
612    /// Creates a new Layered 1D CUDA Array.
613    ///
614    /// `width` must be non-zero.
615    ///
616    /// `num_layers` determines the number of layers in the array.
617    ///
618    /// `format` determines the data-type of the array.
619    ///
620    /// `num_channels` determines the number of channels per array element (1, 2, or 4).
621    ///
622    /// ```
623    /// # use cust::*;
624    /// # use std::error::Error;
625    /// # fn main() -> Result<(), Box<dyn Error>> {
626    /// # let _ctx = quick_init()?;
627    /// use cust::memory::array::{ArrayObject, ArrayFormat};
628    ///
629    /// // Allocates a 5-element array with 10 layers of single-precision, single-channel floating
630    /// // point values.
631    /// let layered_array = ArrayObject::new_layered_1d(5, 10, ArrayFormat::F32, 1)?;
632    /// # Ok(())
633    /// # }
634    /// ```
635    pub fn new_layered_1d(
636        width: usize,
637        num_layers: usize,
638        format: ArrayFormat,
639        num_channels: c_uint,
640    ) -> CudaResult<Self> {
641        Self::from_descriptor(&ArrayDescriptor::new(
642            [width, 0, num_layers],
643            format,
644            num_channels,
645            ArrayObjectFlags::LAYERED,
646        ))
647    }
648
649    /// Creates a new Cubemap CUDA Array. The array is represented as 6 side x side 2D arrays.
650    ///
651    /// `side` is the length of an edge of the cube.
652    ///
653    /// `format` determines the data-type of the array.
654    ///
655    /// `num_channels` determines the number of channels per array element (1, 2, or 4).
656    ///
657    /// ```
658    /// # use cust::*;
659    /// # use std::error::Error;
660    /// # fn main() -> Result<(), Box<dyn Error>> {
661    /// # let _ctx = quick_init()?;
662    /// use cust::memory::array::{ArrayObject, ArrayFormat};
663    ///
664    /// // Allocates an 8x8 Cubemap array of single-precision, single-channel floating point
665    /// // numbers.
666    /// let layered_array = ArrayObject::new_cubemap(8, ArrayFormat::F32, 1)?;
667    ///
668    /// // All non-layered cubemap arrays have a depth of 6.
669    /// assert_eq!(6, layered_array.descriptor()?.depth());
670    /// # Ok(())
671    /// # }
672    /// ```
673    pub fn new_cubemap(side: usize, format: ArrayFormat, num_channels: c_uint) -> CudaResult<Self> {
674        Self::from_descriptor(&ArrayDescriptor::new(
675            [side, side, 6],
676            format,
677            num_channels,
678            ArrayObjectFlags::CUBEMAP,
679        ))
680    }
681
682    /// Creates a new Layered Cubemap CUDA Array. The array is represented as multiple 6 side x side
683    /// 2D arrays.
684    ///
685    /// `side` is the length of an edge of the cube.
686    ///
687    /// `num_layers` is the number of cubemaps in the array. The actual "depth" of the array is
688    /// `num_layers * 6`.
689    ///
690    /// `format` determines the data-type of the array.
691    ///
692    /// `num_channels` determines the number of channels per array element (1, 2, or 4).
693    ///
694    /// ```
695    /// # use cust::*;
696    /// # use std::error::Error;
697    /// # fn main() -> Result<(), Box<dyn Error>> {
698    /// # let _ctx = quick_init()?;
699    /// use cust::memory::array::{ArrayObject, ArrayFormat};
700    ///
701    /// // Allocates an 8x8 Layered Cubemap array of single-precision, single-channel floating point
702    /// // values with 5 layers.
703    /// let layered_array = ArrayObject::new_layered_cubemap(8, 5, ArrayFormat::F32, 1)?;
704    ///
705    /// // The depth of a layered cubemap array is equal to the number of layers * 6.
706    /// assert_eq!(30, layered_array.descriptor()?.depth());
707    /// # Ok(())
708    /// # }
709    /// ```
710    pub fn new_layered_cubemap(
711        side: usize,
712        num_layers: usize,
713        format: ArrayFormat,
714        num_channels: c_uint,
715    ) -> CudaResult<Self> {
716        Self::from_descriptor(&ArrayDescriptor::new(
717            [side, side, num_layers * 6],
718            format,
719            num_channels,
720            ArrayObjectFlags::CUBEMAP | ArrayObjectFlags::LAYERED,
721        ))
722    }
723
724    /// Gets the descriptor associated with this array.
725    pub fn descriptor(&self) -> CudaResult<ArrayDescriptor> {
726        // Use "zeroed" incase CUDA_ARRAY3D_DESCRIPTOR has uninitialized padding
727        let mut raw_descriptor = MaybeUninit::zeroed();
728        unsafe { cuda::cuArray3DGetDescriptor_v2(raw_descriptor.as_mut_ptr(), self.handle) }
729            .to_result()?;
730
731        Ok(ArrayDescriptor::from_raw(unsafe {
732            raw_descriptor.assume_init()
733        }))
734    }
735
736    /// Try to destroy an `ArrayObject`. Can fail - if it does, returns the CUDA error and the
737    /// un-destroyed array object
738    pub fn drop(array: ArrayObject) -> DropResult<ArrayObject> {
739        match unsafe { cuda::cuArrayDestroy(array.handle) }.to_result() {
740            Ok(()) => Ok(()),
741            Err(e) => Err((e, array)),
742        }
743    }
744
745    /// Copy data from the host to the array on the device. **This will not check if the formats match, it does
746    /// however check for memory size mismatch**.
747    ///
748    /// For example, you can copy a `[u32; 2]` value to a `[u8; 8]` array just fine, but not to a `[u8; 10]` array.
749    pub fn copy_from<T: ArrayPrimitive, U: AsRef<[T]>>(&mut self, val: &U) -> CudaResult<()> {
750        let val = val.as_ref();
751        let desc = self.descriptor()?;
752        let self_size = desc.width()
753            * desc.height().max(1)
754            * desc.depth().max(1)
755            * desc.num_channels() as usize
756            * desc.format().mem_size();
757        let other_size = mem::size_of_val(val);
758        assert_eq!(self_size, other_size, "Array and value sizes don't match");
759        unsafe {
760            if desc.height() == 0 && desc.depth() == 0 {
761                cuMemcpyHtoA_v2(self.handle, 0, val.as_ptr() as *const c_void, self_size)
762                    .to_result()
763            } else if desc.depth() == 0 {
764                let desc = CUDA_MEMCPY2D {
765                    Height: desc.height(),
766                    WidthInBytes: desc.width()
767                        * desc.num_channels() as usize
768                        * desc.format().mem_size(),
769                    dstArray: self.handle,
770                    dstDevice: 0,
771                    dstHost: null_mut(),
772                    dstMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY,
773                    dstPitch: 0,
774                    dstXInBytes: 0,
775                    dstY: 0,
776                    srcArray: null_mut(),
777                    srcDevice: 0,
778                    srcHost: val.as_ptr() as *const c_void,
779                    srcMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_HOST,
780                    srcPitch: 0,
781                    srcXInBytes: 0,
782                    srcY: 0,
783                };
784                cuMemcpy2D_v2(&desc as *const _).to_result()
785            } else {
786                panic!();
787            }
788        }
789    }
790
791    /// Copy data from the array to the host. **This will not check if the formats match, it does
792    /// however check for memory size mismatch**.
793    ///
794    /// For example, you can copy a `[u32; 2]` value to a `[u8; 8]` array just fine, but not to a `[u8; 10]` array.
795    pub fn copy_to<T: ArrayPrimitive, U: AsMut<[T]>>(&self, val: &mut U) -> CudaResult<()> {
796        let val = val.as_mut();
797        let desc = self.descriptor()?;
798        let self_size = desc.width()
799            * desc.height().max(1)
800            * desc.depth().max(1)
801            * desc.num_channels() as usize
802            * desc.format().mem_size();
803        let other_size = mem::size_of_val(val);
804        assert_eq!(self_size, other_size, "Array and value sizes don't match");
805        unsafe {
806            if desc.height() == 0 && desc.depth() == 0 {
807                cuMemcpyAtoH_v2(val.as_mut_ptr() as *mut c_void, self.handle, 0, self_size)
808                    .to_result()
809            } else if desc.depth() == 0 {
810                let width = desc.width() * desc.num_channels() as usize * desc.format().mem_size();
811                let desc = CUDA_MEMCPY2D {
812                    Height: desc.height(),
813                    WidthInBytes: width,
814                    dstArray: null_mut(),
815                    dstDevice: 0,
816                    dstHost: val.as_mut_ptr() as *mut c_void,
817                    dstMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_HOST,
818                    dstPitch: 0,
819                    dstXInBytes: 0,
820                    dstY: 0,
821                    srcArray: self.handle,
822                    srcDevice: 0,
823                    srcHost: null(),
824                    srcMemoryType: cuda::CUmemorytype_enum::CU_MEMORYTYPE_ARRAY,
825                    srcPitch: 0,
826                    srcXInBytes: 0,
827                    srcY: 0,
828                };
829                cuMemcpy2D_v2(&desc as *const _).to_result()?;
830                Ok(())
831            } else {
832                panic!();
833            }
834        }
835    }
836
837    /// Copy data from the array into a vec on the host. **This will not check if the formats match, it does
838    /// however yield a correct vec**. Format mismatch and especially format size mismatch may yield incorrect (but not unsound!)
839    /// behavior
840    pub fn as_host_vec<T: ArrayPrimitive>(&self) -> CudaResult<Vec<T>> {
841        let desc = self.descriptor()?;
842        let self_size = desc.width()
843            * desc.height().max(1)
844            * desc.depth().max(1)
845            * desc.num_channels() as usize
846            * desc.format().mem_size();
847
848        let len = self_size / T::array_format().mem_size();
849        unsafe {
850            // SAFETY: anything ArrayPrimitive is a number and therefore zeroable.
851            let mut vec = vec![zeroed(); len];
852            self.copy_to(&mut vec)?;
853            Ok(vec)
854        }
855    }
856}
857
858impl std::fmt::Debug for ArrayObject {
859    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
860        self.descriptor().fmt(f)
861    }
862}
863
864impl Drop for ArrayObject {
865    fn drop(&mut self) {
866        unsafe { cuda::cuArrayDestroy(self.handle) };
867    }
868}
869
870// impl<I: AsRef<[T]> + AsMut<[T]>, T: ArrayPrimitive + DeviceCopy> CopyDestination<I>
871//     for ArrayObject
872// {
873//     fn copy_from(&mut self, val: &I) -> CudaResult<()> {
874//         let val = val.as_ref();
875//         assert!(
876//             self.len() == val.len(),
877//             "destination and source slices have different lengths"
878//         );
879//         let size = mem::size_of::<T>() * self.len();
880//         if size != 0 {
881//             unsafe {
882//                 cuda::cuMemcpyHtoD_v2(
883//                     self.0.as_mut_ptr() as u64,
884//                     val.as_ptr() as *const c_void,
885//                     size,
886//                 )
887//                 .to_result()?
888//             }
889//         }
890//         Ok(())
891//     }
892
893//     fn copy_to(&self, val: &mut I) -> CudaResult<()> {
894//         let val = val.as_mut();
895//         assert!(
896//             self.len() == val.len(),
897//             "destination and source slices have different lengths"
898//         );
899//         let size = mem::size_of::<T>() * self.len();
900//         if size != 0 {
901//             unsafe {
902//                 cuda::cuMemcpyDtoH_v2(val.as_mut_ptr() as *mut c_void, self.as_ptr() as u64, size)
903//                     .to_result()?
904//             }
905//         }
906//         Ok(())
907//     }
908// }
909
910#[cfg(test)]
911mod test {
912    use super::*;
913
914    #[test]
915    fn descriptor_round_trip() {
916        let _context = crate::quick_init().unwrap();
917
918        let obj = ArrayObject::new([1, 2, 3], ArrayFormat::F64, 2).unwrap();
919
920        let descriptor = obj.descriptor().unwrap();
921        assert_eq!([1, 2, 3], descriptor.dims());
922        assert_eq!(ArrayFormat::F64, descriptor.format());
923        assert_eq!(2, descriptor.num_channels());
924        assert_eq!(ArrayObjectFlags::default(), descriptor.flags());
925    }
926
927    #[test]
928    fn allow_1d_arrays() {
929        let _context = crate::quick_init().unwrap();
930
931        let obj = ArrayObject::new([10, 0, 0], ArrayFormat::F64, 1).unwrap();
932
933        let descriptor = obj.descriptor().unwrap();
934        assert_eq!([10, 0, 0], descriptor.dims());
935    }
936
937    #[test]
938    fn allow_2d_arrays() {
939        let _context = crate::quick_init().unwrap();
940
941        let obj = ArrayObject::new([10, 20, 0], ArrayFormat::F64, 1).unwrap();
942
943        let descriptor = obj.descriptor().unwrap();
944        assert_eq!([10, 20, 0], descriptor.dims());
945    }
946
947    #[test]
948    fn allow_1d_layered_arrays() {
949        let _context = crate::quick_init().unwrap();
950
951        let obj = ArrayObject::new_layered([10, 0], 20, ArrayFormat::F64, 1).unwrap();
952
953        let descriptor = obj.descriptor().unwrap();
954        assert_eq!([10, 0, 20], descriptor.dims());
955        assert_eq!(ArrayObjectFlags::LAYERED, descriptor.flags());
956    }
957
958    #[test]
959    fn allow_cubemaps() {
960        let _context = crate::quick_init().unwrap();
961
962        let obj = ArrayObject::new_cubemap(4, ArrayFormat::F64, 1).unwrap();
963
964        let descriptor = obj.descriptor().unwrap();
965        assert_eq!([4, 4, 6], descriptor.dims());
966        assert_eq!(ArrayObjectFlags::CUBEMAP, descriptor.flags());
967    }
968
969    #[test]
970    fn allow_layered_cubemaps() {
971        let _context = crate::quick_init().unwrap();
972
973        let obj = ArrayObject::new_layered_cubemap(4, 4, ArrayFormat::F64, 1).unwrap();
974
975        let descriptor = obj.descriptor().unwrap();
976        assert_eq!([4, 4, 24], descriptor.dims());
977        assert_eq!(
978            ArrayObjectFlags::CUBEMAP | ArrayObjectFlags::LAYERED,
979            descriptor.flags()
980        );
981    }
982
983    #[test]
984    #[should_panic]
985    fn fail_on_zero_width_1d_array() {
986        let _context = crate::quick_init().unwrap();
987
988        let _ = ArrayObject::new_1d(0, ArrayFormat::F64, 1).unwrap();
989    }
990
991    #[test]
992    #[should_panic]
993    fn fail_on_zero_size_widths() {
994        let _context = crate::quick_init().unwrap();
995
996        let _ = ArrayObject::new([0, 10, 20], ArrayFormat::F64, 1).unwrap();
997    }
998
999    #[test]
1000    #[should_panic]
1001    fn fail_cubemaps_with_unmatching_width_height() {
1002        let _context = crate::quick_init().unwrap();
1003
1004        let mut descriptor = ArrayDescriptor::from_dims_format([2, 3, 6], ArrayFormat::F64);
1005        descriptor.set_flags(ArrayObjectFlags::CUBEMAP);
1006
1007        let _ = ArrayObject::from_descriptor(&descriptor).unwrap();
1008    }
1009
1010    #[test]
1011    #[should_panic]
1012    fn fail_cubemaps_with_non_six_depth() {
1013        let _context = crate::quick_init().unwrap();
1014
1015        let mut descriptor = ArrayDescriptor::from_dims_format([4, 4, 5], ArrayFormat::F64);
1016        descriptor.set_flags(ArrayObjectFlags::CUBEMAP);
1017
1018        let _ = ArrayObject::from_descriptor(&descriptor).unwrap();
1019    }
1020
1021    #[test]
1022    #[should_panic]
1023    fn fail_cubemaps_with_non_six_multiple_depth() {
1024        let _context = crate::quick_init().unwrap();
1025
1026        let mut descriptor = ArrayDescriptor::from_dims_format([4, 4, 10], ArrayFormat::F64);
1027        descriptor.set_flags(ArrayObjectFlags::LAYERED | ArrayObjectFlags::CUBEMAP);
1028
1029        let _ = ArrayObject::from_descriptor(&descriptor).unwrap();
1030    }
1031
1032    #[test]
1033    #[should_panic]
1034    fn fail_with_depth_without_height() {
1035        let _context = crate::quick_init().unwrap();
1036
1037        let _ = ArrayObject::new([10, 0, 20], ArrayFormat::F64, 1).unwrap();
1038    }
1039
1040    #[test]
1041    #[should_panic]
1042    fn fails_on_invalid_num_channels() {
1043        let _context = crate::quick_init().unwrap();
1044
1045        let _ = ArrayObject::new([1, 2, 3], ArrayFormat::F64, 3).unwrap();
1046    }
1047}