cudarc/cudnn/safe/
mod.rs

1//! Safe wrappers around cuDNN.
2//!
3//! # Convolutions
4//!
5//! 1. Allocate tensor descriptors with [`Cudnn::create_4d_tensor()`]
6//! 2. Allocate filter descriptors with [`Cudnn::create_4d_filter()`]
7//! 3. Allocate conv descriptors with [`Cudnn::create_conv2d()`]
8//! 4. Instantiate one of the following algorithms with the descriptors:
9//!    a. [`Conv2dForward`]
10//!    b. [`Conv2dBackwardData`] for computing gradient of image
11//!    c. [`Conv2dBackwardFilter`] for computing gradient of filters
12//! 5. Call the `pick_algorithm` method of the struct. Specify the number of options to compare with a const generic.
13//! 6. Call the `get_workspace_size` method of the struct.
14//! 7. Re-allocate the workspace to the appropriate size.
15//! 8. Call the `launch` method of the struct.
16//!
17//! # Reductions
18
19mod activation;
20mod conv;
21mod core;
22mod pooling;
23mod reduce;
24mod softmax;
25
26#[allow(deprecated)]
27pub use self::conv::{
28    // Deprecated APIs
29    Conv2dBackwardData,
30    Conv2dBackwardFilter,
31    Conv2dDescriptor,
32    Conv2dForward,
33    // Current APIs
34    ConvBackwardData,
35    ConvBackwardFilter,
36    ConvBiasActivationForward,
37    ConvDescriptor,
38    ConvForward,
39    FilterDescriptor,
40};
41pub use self::core::{Cudnn, CudnnDataType, TensorDescriptor};
42pub use self::pooling::{PoolingDescriptor, PoolingForward};
43pub use self::reduce::{FlatIndices, NoIndices, ReduceTensor, ReductionDescriptor};
44pub use super::result::CudnnError;
45pub use activation::{ActivationDescriptor, ActivationForward};
46pub use softmax::{Softmax, SoftmaxForward};
47
48#[cfg(test)]
49mod tests {
50    use super::*;
51    use crate::cudnn::safe::softmax::SoftmaxForward;
52    use crate::{cudnn, driver::CudaContext};
53    #[cfg(feature = "no-std")]
54    use no_std_compat::vec;
55
56    #[test]
57    fn test_create_descriptors() -> Result<(), CudnnError> {
58        let ctx = CudaContext::new(0).unwrap();
59        let stream = ctx.default_stream();
60        let cudnn = Cudnn::new(stream)?;
61        let _ = cudnn.create_4d_tensor_ex::<f32>([1, 2, 3, 4], [24, 12, 4, 1])?;
62        let _ = cudnn.create_nd_tensor::<f64>(&[1, 2, 3, 4, 5, 6], &[720, 360, 120, 30, 6, 1])?;
63        let _ = cudnn.create_4d_filter::<f32>(
64            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
65            [3, 3, 3, 3],
66        )?;
67        let _ = cudnn.create_reduction_flat_indices::<f32>(
68            cudnn::sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD,
69            cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
70        )?;
71        let _ = cudnn.create_reduction_no_indices::<f32>(
72            cudnn::sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD,
73            cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
74        )?;
75        Ok(())
76    }
77
78    #[test]
79    fn test_conv2d_pick_algorithms() -> Result<(), CudnnError> {
80        let ctx = CudaContext::new(0).unwrap();
81        let stream = ctx.default_stream();
82        let cudnn = Cudnn::new(stream)?;
83
84        let conv = cudnn.create_conv2d::<f32>(
85            [0; 2],
86            [1; 2],
87            [1; 2],
88            cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
89        )?;
90        let x = cudnn.create_4d_tensor::<f32>(
91            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
92            [100, 128, 224, 224],
93        )?;
94        let filter = cudnn.create_4d_filter::<f32>(
95            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
96            [256, 128, 3, 3],
97        )?;
98        let y = cudnn.create_4d_tensor::<f32>(
99            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
100            [100, 256, 222, 222],
101        )?;
102
103        {
104            let op = ConvForward {
105                conv: &conv,
106                x: &x,
107                w: &filter,
108                y: &y,
109            };
110            let algo = op.pick_algorithm()?;
111            assert_eq!(
112                algo,
113                cudnn::sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
114            );
115        }
116
117        {
118            let op = ConvBackwardData {
119                conv: &conv,
120                dx: &x,
121                w: &filter,
122                dy: &y,
123            };
124            let algo = op.pick_algorithm()?;
125            assert_eq!(
126                algo,
127                cudnn::sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
128            );
129        }
130
131        {
132            let op = ConvBackwardFilter {
133                conv: &conv,
134                x: &x,
135                dw: &filter,
136                dy: &y,
137            };
138            let algo = op.pick_algorithm()?;
139            assert_eq!(
140                algo,
141                cudnn::sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
142            );
143        }
144
145        Ok(())
146    }
147
148    #[test]
149    fn test_conv1d() -> Result<(), CudnnError> {
150        let ctx = CudaContext::new(0).unwrap();
151        let stream = ctx.default_stream();
152        let cudnn = Cudnn::new(stream.clone())?;
153
154        let conv = cudnn.create_convnd::<f32>(
155            &[0; 2],
156            &[1; 2],
157            &[1; 2],
158            cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
159        )?;
160        // With less than 4 dimensions, 4D tensors should be used with 1 set for unused
161        // dimensions
162
163        // Create input, filter and output tensors
164        let x = stream.clone_htod(&vec![1.0f32; 100 * 128 * 32]).unwrap();
165        let x_desc = cudnn.create_4d_tensor::<f32>(
166            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
167            [100, 128, 32, 1],
168        )?;
169        let filter = stream.clone_htod(&vec![1.0f32; 256 * 128 * 3]).unwrap();
170        let filter_desc = cudnn.create_nd_filter::<f32>(
171            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
172            &[256, 128, 3, 1],
173        )?;
174        let mut y = stream.alloc_zeros::<f32>(100 * 256 * 30).unwrap();
175        let y_desc = cudnn.create_4d_tensor::<f32>(
176            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
177            [100, 256, 30, 1],
178        )?;
179
180        {
181            let op = ConvForward {
182                conv: &conv,
183                x: &x_desc,
184                w: &filter_desc,
185                y: &y_desc,
186            };
187
188            // Pick algorithm
189            // Note that the number of dimensions in the filter and input
190            // must match. Hence the similarity with Conv2D operation.
191            let algo = op.pick_algorithm()?;
192
193            // Get workspace size
194            let workspace_size = op.get_workspace_size(algo)?;
195            let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
196
197            // Launch conv operation
198            unsafe {
199                op.launch(algo, Some(&mut workspace), (1.0, 0.0), &x, &filter, &mut y)?;
200            }
201
202            let y_host = stream.clone_dtoh(&y).unwrap();
203            assert_eq!(y_host.len(), 100 * 256 * 30);
204            assert_eq!(y_host[0], 128.0 * 3.0);
205        }
206
207        Ok(())
208    }
209
210    #[test]
211    fn test_conv3d() -> Result<(), CudnnError> {
212        let ctx = CudaContext::new(0).unwrap();
213        let stream = ctx.default_stream();
214        let cudnn = Cudnn::new(stream.clone())?;
215
216        let conv = cudnn.create_convnd::<f32>(
217            &[0; 3],
218            &[1; 3],
219            &[1; 3],
220            cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
221        )?;
222
223        // Create input, filter and output tensors
224        let x = stream
225            .clone_htod(&vec![1.0f32; 32 * 3 * 64 * 64 * 64])
226            .unwrap();
227        let x_desc = cudnn.create_nd_tensor::<f32>(
228            &[32, 3, 64, 64, 64],
229            &[3 * 64 * 64 * 64, 64 * 64 * 64, 64 * 64, 64, 1],
230        )?;
231        let filter = stream
232            .clone_htod(&vec![1.0f32; 32 * 3 * 4 * 4 * 4])
233            .unwrap();
234        let filter_desc = cudnn.create_nd_filter::<f32>(
235            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
236            &[32, 3, 4, 4, 4],
237        )?;
238        let mut y = stream.alloc_zeros::<f32>(32 * 32 * 61 * 61 * 61).unwrap();
239        let y_desc = cudnn.create_nd_tensor::<f32>(
240            &[32, 32, 61, 61, 61],
241            &[32 * 61 * 61 * 61, 61 * 61 * 61, 61 * 61, 61, 1],
242        )?;
243
244        {
245            let op = ConvForward {
246                conv: &conv,
247                x: &x_desc,
248                w: &filter_desc,
249                y: &y_desc,
250            };
251
252            // Pick algorithm
253            let algo = op.pick_algorithm()?;
254
255            // Get workspace size
256            let workspace_size = op.get_workspace_size(algo)?;
257            let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
258
259            // Launch conv operation
260            unsafe {
261                op.launch(algo, Some(&mut workspace), (1.0, 0.0), &x, &filter, &mut y)?;
262            }
263
264            let y_host = stream.clone_dtoh(&y).unwrap();
265            assert_eq!(y_host.len(), 32 * 32 * 61 * 61 * 61);
266            assert_eq!(y_host[0], 3.0 * 4.0 * 4.0 * 4.0);
267        }
268
269        Ok(())
270    }
271
272    #[test]
273    fn test_reduction() {
274        let ctx = CudaContext::new(0).unwrap();
275        let stream = ctx.default_stream();
276        let cudnn = Cudnn::new(stream.clone()).unwrap();
277
278        let a = stream
279            .clone_htod(&std::vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0])
280            .unwrap();
281        let mut c = stream.alloc_zeros::<f32>(1).unwrap();
282
283        let reduce = cudnn
284            .create_reduction_no_indices::<f32>(
285                cudnn::sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD,
286                cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
287            )
288            .unwrap();
289        let a_desc = cudnn
290            .create_nd_tensor::<f32>(&[1, 1, 2, 3], &[0, 6, 3, 1])
291            .unwrap();
292        let c_desc = cudnn
293            .create_nd_tensor::<f32>(&[1, 1, 1, 1], &[0, 0, 0, 1])
294            .unwrap();
295        let op = ReduceTensor {
296            reduce: &reduce,
297            a: &a_desc,
298            c: &c_desc,
299        };
300
301        let workspace_size = op.get_workspace_size().unwrap();
302        let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
303
304        unsafe { op.launch(&mut workspace, (1.0, 0.0), &a, &mut c) }.unwrap();
305
306        let c_host = stream.clone_dtoh(&c).unwrap();
307        assert_eq!(c_host.len(), 1);
308        assert_eq!(c_host[0], 21.0);
309    }
310
311    #[test]
312    fn test_conv_bias_activation() -> Result<(), CudnnError> {
313        let ctx = CudaContext::new(0).unwrap();
314        let stream = ctx.default_stream();
315        let cudnn = Cudnn::new(stream.clone())?;
316
317        let conv = cudnn.create_convnd::<f32>(
318            &[0; 3],
319            &[1; 3],
320            &[1; 3],
321            cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
322        )?;
323
324        // Create input, filter and output tensors
325        let x = stream
326            .clone_htod(&vec![1.0f32; 32 * 3 * 64 * 64 * 64])
327            .unwrap();
328        let x_desc = cudnn.create_nd_tensor::<f32>(
329            &[32, 3, 64, 64, 64],
330            &[3 * 64 * 64 * 64, 64 * 64 * 64, 64 * 64, 64, 1],
331        )?;
332        let filter = stream
333            .clone_htod(&vec![1.0f32; 32 * 3 * 4 * 4 * 4])
334            .unwrap();
335        let filter_desc = cudnn.create_nd_filter::<f32>(
336            cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
337            &[32, 3, 4, 4, 4],
338        )?;
339        let bias = stream.clone_htod(&[1.0f32; 32]).unwrap();
340        let bias_desc = cudnn.create_nd_tensor::<f32>(&[1, 32, 1, 1, 1], &[32, 1, 1, 1, 1])?;
341        let activation_desc = cudnn.create_activation::<f32>(
342            cudnn::sys::cudnnActivationMode_t::CUDNN_ACTIVATION_RELU,
343            cudnn::sys::cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN,
344            f64::MAX,
345        )?;
346        let z = stream
347            .clone_htod(&vec![0.0f32; 32 * 32 * 61 * 61 * 61])
348            .unwrap();
349        let z_desc = cudnn.create_nd_tensor::<f32>(
350            &[32, 32, 61, 61, 61],
351            &[32 * 61 * 61 * 61, 61 * 61 * 61, 61 * 61, 61, 1],
352        )?;
353        let mut y = stream.alloc_zeros::<f32>(32 * 32 * 61 * 61 * 61).unwrap();
354        let y_desc = cudnn.create_nd_tensor::<f32>(
355            &[32, 32, 61, 61, 61],
356            &[32 * 61 * 61 * 61, 61 * 61 * 61, 61 * 61, 61, 1],
357        )?;
358
359        {
360            let op = ConvBiasActivationForward {
361                conv: &conv,
362                act: &activation_desc,
363                x: &x_desc,
364                w: &filter_desc,
365                y: &y_desc,
366                z: &z_desc,
367                bias: &bias_desc,
368            };
369
370            // Pick algorithm
371            let algo = op.pick_algorithm()?;
372
373            // Get workspace size
374            let workspace_size = op.get_workspace_size(algo)?;
375            let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
376
377            // Launch conv operation
378            unsafe {
379                op.launch(
380                    algo,
381                    Some(&mut workspace),
382                    (1.0, 0.0),
383                    &x,
384                    &filter,
385                    &z,
386                    &bias,
387                    &mut y,
388                )?;
389            }
390
391            let y_host = stream.clone_dtoh(&y).unwrap();
392            assert_eq!(y_host.len(), 32 * 32 * 61 * 61 * 61);
393            assert_eq!(y_host[0], 3.0 * 4.0 * 4.0 * 4.0 + 1.0);
394        }
395
396        Ok(())
397    }
398
399    #[test]
400    fn test_pooling() -> Result<(), CudnnError> {
401        let ctx = CudaContext::new(0).unwrap();
402        let stream = ctx.default_stream();
403        let cudnn = Cudnn::new(stream.clone())?;
404
405        let pooling = cudnn.create_poolingnd::<f32>(
406            &[2, 2],
407            &[0, 0],
408            &[2, 2],
409            cudnn::sys::cudnnPoolingMode_t::CUDNN_POOLING_MAX,
410            cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
411        )?;
412
413        // Create input, filter and output tensors
414        let x = stream
415            .clone_htod(&[
416                1.0, 1.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0,
417            ])
418            .unwrap();
419        let x_desc = cudnn.create_nd_tensor::<f32>(&[32, 3, 4, 4], &[32 * 3 * 4, 3 * 4, 4, 1])?;
420        let mut y = stream.alloc_zeros::<f32>(32 * 3 * 2 * 2).unwrap();
421        let y_desc = cudnn.create_nd_tensor::<f32>(&[32, 3, 2, 2], &[3 * 2 * 2, 2 * 2, 2, 1])?;
422
423        {
424            let op = PoolingForward {
425                pooling: &pooling,
426                x: &x_desc,
427                y: &y_desc,
428            };
429
430            // Launch conv operation
431            unsafe {
432                op.launch((1.0, 0.0), &x, &mut y)?;
433            }
434
435            let y_host = stream.clone_dtoh(&y).unwrap();
436            assert_eq!(y_host.len(), 32 * 3 * 2 * 2);
437            assert_eq!(y_host[0], 6.0);
438        }
439
440        Ok(())
441    }
442
443    #[test]
444    fn test_activation() -> Result<(), CudnnError> {
445        let ctx = CudaContext::new(0).unwrap();
446        let stream = ctx.default_stream();
447        let cudnn = Cudnn::new(stream.clone())?;
448
449        let act = cudnn.create_activation::<f32>(
450            cudnn::sys::cudnnActivationMode_t::CUDNN_ACTIVATION_RELU,
451            cudnn::sys::cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN,
452            f64::MAX,
453        )?;
454
455        // Create input, filter and output tensors
456        let x = stream.clone_htod(&[-1.0, 2.0, -3.0, 100.0]).unwrap();
457        let x_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
458        let mut y = stream.alloc_zeros::<f32>(4).unwrap();
459        let y_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
460
461        {
462            let op = ActivationForward {
463                act: &act,
464                x: &x_desc,
465                y: &y_desc,
466            };
467
468            // Launch conv operation
469            unsafe {
470                op.launch((1.0, 0.0), &x, &mut y)?;
471            }
472
473            let y_host = stream.clone_dtoh(&y).unwrap();
474            assert_eq!(y_host.len(), 2 * 2);
475            assert_eq!(y_host[0], 0.0);
476            assert_eq!(y_host[1], 2.0);
477            assert_eq!(y_host[2], 0.0);
478            assert_eq!(y_host[3], 100.0);
479        }
480
481        Ok(())
482    }
483
484    #[test]
485    fn test_softmax() -> Result<(), CudnnError> {
486        let ctx = CudaContext::new(0).unwrap();
487        let stream = ctx.default_stream();
488        let cudnn = Cudnn::new(stream.clone())?;
489
490        let softmax = cudnn
491            .create_softmax::<f32>(cudnn::sys::cudnnSoftmaxMode_t::CUDNN_SOFTMAX_MODE_INSTANCE)?;
492
493        // Create input, filter and output tensors.
494        let x = stream.clone_htod(&[1.0, 2.0, 3.0, 4.0]).unwrap();
495        let x_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
496        let mut y = stream.alloc_zeros::<f32>(4).unwrap();
497        let y_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
498
499        {
500            let op = SoftmaxForward {
501                softmax: &softmax,
502                x: &x_desc,
503                y: &y_desc,
504            };
505
506            unsafe {
507                op.launch(
508                    (1.0, 0.0),
509                    cudnn::sys::cudnnSoftmaxAlgorithm_t::CUDNN_SOFTMAX_FAST,
510                    &x,
511                    &mut y,
512                )?;
513            }
514
515            let y_host = stream.clone_dtoh(&y).unwrap();
516            assert_eq!(y_host.len(), 2 * 2);
517            assert_eq!(y_host[0], 0.0320586);
518            assert_eq!(y_host[1], 0.08714432);
519            assert_eq!(y_host[2], 0.23688282);
520            assert_eq!(y_host[3], 0.6439142);
521        }
522
523        Ok(())
524    }
525}