1mod activation;
20mod conv;
21mod core;
22mod pooling;
23mod reduce;
24mod softmax;
25
26#[allow(deprecated)]
27pub use self::conv::{
28 Conv2dBackwardData,
30 Conv2dBackwardFilter,
31 Conv2dDescriptor,
32 Conv2dForward,
33 ConvBackwardData,
35 ConvBackwardFilter,
36 ConvBiasActivationForward,
37 ConvDescriptor,
38 ConvForward,
39 FilterDescriptor,
40};
41pub use self::core::{Cudnn, CudnnDataType, TensorDescriptor};
42pub use self::pooling::{PoolingDescriptor, PoolingForward};
43pub use self::reduce::{FlatIndices, NoIndices, ReduceTensor, ReductionDescriptor};
44pub use super::result::CudnnError;
45pub use activation::{ActivationDescriptor, ActivationForward};
46pub use softmax::{Softmax, SoftmaxForward};
47
48#[cfg(test)]
49mod tests {
50 use super::*;
51 use crate::cudnn::safe::softmax::SoftmaxForward;
52 use crate::{cudnn, driver::CudaContext};
53 #[cfg(feature = "no-std")]
54 use no_std_compat::vec;
55
56 #[test]
57 fn test_create_descriptors() -> Result<(), CudnnError> {
58 let ctx = CudaContext::new(0).unwrap();
59 let stream = ctx.default_stream();
60 let cudnn = Cudnn::new(stream)?;
61 let _ = cudnn.create_4d_tensor_ex::<f32>([1, 2, 3, 4], [24, 12, 4, 1])?;
62 let _ = cudnn.create_nd_tensor::<f64>(&[1, 2, 3, 4, 5, 6], &[720, 360, 120, 30, 6, 1])?;
63 let _ = cudnn.create_4d_filter::<f32>(
64 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
65 [3, 3, 3, 3],
66 )?;
67 let _ = cudnn.create_reduction_flat_indices::<f32>(
68 cudnn::sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD,
69 cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
70 )?;
71 let _ = cudnn.create_reduction_no_indices::<f32>(
72 cudnn::sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD,
73 cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
74 )?;
75 Ok(())
76 }
77
78 #[test]
79 fn test_conv2d_pick_algorithms() -> Result<(), CudnnError> {
80 let ctx = CudaContext::new(0).unwrap();
81 let stream = ctx.default_stream();
82 let cudnn = Cudnn::new(stream)?;
83
84 let conv = cudnn.create_conv2d::<f32>(
85 [0; 2],
86 [1; 2],
87 [1; 2],
88 cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
89 )?;
90 let x = cudnn.create_4d_tensor::<f32>(
91 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
92 [100, 128, 224, 224],
93 )?;
94 let filter = cudnn.create_4d_filter::<f32>(
95 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
96 [256, 128, 3, 3],
97 )?;
98 let y = cudnn.create_4d_tensor::<f32>(
99 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
100 [100, 256, 222, 222],
101 )?;
102
103 {
104 let op = ConvForward {
105 conv: &conv,
106 x: &x,
107 w: &filter,
108 y: &y,
109 };
110 let algo = op.pick_algorithm()?;
111 assert_eq!(
112 algo,
113 cudnn::sys::cudnnConvolutionFwdAlgo_t::CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
114 );
115 }
116
117 {
118 let op = ConvBackwardData {
119 conv: &conv,
120 dx: &x,
121 w: &filter,
122 dy: &y,
123 };
124 let algo = op.pick_algorithm()?;
125 assert_eq!(
126 algo,
127 cudnn::sys::cudnnConvolutionBwdDataAlgo_t::CUDNN_CONVOLUTION_BWD_DATA_ALGO_1
128 );
129 }
130
131 {
132 let op = ConvBackwardFilter {
133 conv: &conv,
134 x: &x,
135 dw: &filter,
136 dy: &y,
137 };
138 let algo = op.pick_algorithm()?;
139 assert_eq!(
140 algo,
141 cudnn::sys::cudnnConvolutionBwdFilterAlgo_t::CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1
142 );
143 }
144
145 Ok(())
146 }
147
148 #[test]
149 fn test_conv1d() -> Result<(), CudnnError> {
150 let ctx = CudaContext::new(0).unwrap();
151 let stream = ctx.default_stream();
152 let cudnn = Cudnn::new(stream.clone())?;
153
154 let conv = cudnn.create_convnd::<f32>(
155 &[0; 2],
156 &[1; 2],
157 &[1; 2],
158 cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
159 )?;
160 let x = stream.clone_htod(&vec![1.0f32; 100 * 128 * 32]).unwrap();
165 let x_desc = cudnn.create_4d_tensor::<f32>(
166 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
167 [100, 128, 32, 1],
168 )?;
169 let filter = stream.clone_htod(&vec![1.0f32; 256 * 128 * 3]).unwrap();
170 let filter_desc = cudnn.create_nd_filter::<f32>(
171 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
172 &[256, 128, 3, 1],
173 )?;
174 let mut y = stream.alloc_zeros::<f32>(100 * 256 * 30).unwrap();
175 let y_desc = cudnn.create_4d_tensor::<f32>(
176 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
177 [100, 256, 30, 1],
178 )?;
179
180 {
181 let op = ConvForward {
182 conv: &conv,
183 x: &x_desc,
184 w: &filter_desc,
185 y: &y_desc,
186 };
187
188 let algo = op.pick_algorithm()?;
192
193 let workspace_size = op.get_workspace_size(algo)?;
195 let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
196
197 unsafe {
199 op.launch(algo, Some(&mut workspace), (1.0, 0.0), &x, &filter, &mut y)?;
200 }
201
202 let y_host = stream.clone_dtoh(&y).unwrap();
203 assert_eq!(y_host.len(), 100 * 256 * 30);
204 assert_eq!(y_host[0], 128.0 * 3.0);
205 }
206
207 Ok(())
208 }
209
210 #[test]
211 fn test_conv3d() -> Result<(), CudnnError> {
212 let ctx = CudaContext::new(0).unwrap();
213 let stream = ctx.default_stream();
214 let cudnn = Cudnn::new(stream.clone())?;
215
216 let conv = cudnn.create_convnd::<f32>(
217 &[0; 3],
218 &[1; 3],
219 &[1; 3],
220 cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
221 )?;
222
223 let x = stream
225 .clone_htod(&vec![1.0f32; 32 * 3 * 64 * 64 * 64])
226 .unwrap();
227 let x_desc = cudnn.create_nd_tensor::<f32>(
228 &[32, 3, 64, 64, 64],
229 &[3 * 64 * 64 * 64, 64 * 64 * 64, 64 * 64, 64, 1],
230 )?;
231 let filter = stream
232 .clone_htod(&vec![1.0f32; 32 * 3 * 4 * 4 * 4])
233 .unwrap();
234 let filter_desc = cudnn.create_nd_filter::<f32>(
235 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
236 &[32, 3, 4, 4, 4],
237 )?;
238 let mut y = stream.alloc_zeros::<f32>(32 * 32 * 61 * 61 * 61).unwrap();
239 let y_desc = cudnn.create_nd_tensor::<f32>(
240 &[32, 32, 61, 61, 61],
241 &[32 * 61 * 61 * 61, 61 * 61 * 61, 61 * 61, 61, 1],
242 )?;
243
244 {
245 let op = ConvForward {
246 conv: &conv,
247 x: &x_desc,
248 w: &filter_desc,
249 y: &y_desc,
250 };
251
252 let algo = op.pick_algorithm()?;
254
255 let workspace_size = op.get_workspace_size(algo)?;
257 let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
258
259 unsafe {
261 op.launch(algo, Some(&mut workspace), (1.0, 0.0), &x, &filter, &mut y)?;
262 }
263
264 let y_host = stream.clone_dtoh(&y).unwrap();
265 assert_eq!(y_host.len(), 32 * 32 * 61 * 61 * 61);
266 assert_eq!(y_host[0], 3.0 * 4.0 * 4.0 * 4.0);
267 }
268
269 Ok(())
270 }
271
272 #[test]
273 fn test_reduction() {
274 let ctx = CudaContext::new(0).unwrap();
275 let stream = ctx.default_stream();
276 let cudnn = Cudnn::new(stream.clone()).unwrap();
277
278 let a = stream
279 .clone_htod(&std::vec![1.0f32, 2.0, 3.0, 4.0, 5.0, 6.0])
280 .unwrap();
281 let mut c = stream.alloc_zeros::<f32>(1).unwrap();
282
283 let reduce = cudnn
284 .create_reduction_no_indices::<f32>(
285 cudnn::sys::cudnnReduceTensorOp_t::CUDNN_REDUCE_TENSOR_ADD,
286 cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
287 )
288 .unwrap();
289 let a_desc = cudnn
290 .create_nd_tensor::<f32>(&[1, 1, 2, 3], &[0, 6, 3, 1])
291 .unwrap();
292 let c_desc = cudnn
293 .create_nd_tensor::<f32>(&[1, 1, 1, 1], &[0, 0, 0, 1])
294 .unwrap();
295 let op = ReduceTensor {
296 reduce: &reduce,
297 a: &a_desc,
298 c: &c_desc,
299 };
300
301 let workspace_size = op.get_workspace_size().unwrap();
302 let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
303
304 unsafe { op.launch(&mut workspace, (1.0, 0.0), &a, &mut c) }.unwrap();
305
306 let c_host = stream.clone_dtoh(&c).unwrap();
307 assert_eq!(c_host.len(), 1);
308 assert_eq!(c_host[0], 21.0);
309 }
310
311 #[test]
312 fn test_conv_bias_activation() -> Result<(), CudnnError> {
313 let ctx = CudaContext::new(0).unwrap();
314 let stream = ctx.default_stream();
315 let cudnn = Cudnn::new(stream.clone())?;
316
317 let conv = cudnn.create_convnd::<f32>(
318 &[0; 3],
319 &[1; 3],
320 &[1; 3],
321 cudnn::sys::cudnnConvolutionMode_t::CUDNN_CROSS_CORRELATION,
322 )?;
323
324 let x = stream
326 .clone_htod(&vec![1.0f32; 32 * 3 * 64 * 64 * 64])
327 .unwrap();
328 let x_desc = cudnn.create_nd_tensor::<f32>(
329 &[32, 3, 64, 64, 64],
330 &[3 * 64 * 64 * 64, 64 * 64 * 64, 64 * 64, 64, 1],
331 )?;
332 let filter = stream
333 .clone_htod(&vec![1.0f32; 32 * 3 * 4 * 4 * 4])
334 .unwrap();
335 let filter_desc = cudnn.create_nd_filter::<f32>(
336 cudnn::sys::cudnnTensorFormat_t::CUDNN_TENSOR_NCHW,
337 &[32, 3, 4, 4, 4],
338 )?;
339 let bias = stream.clone_htod(&[1.0f32; 32]).unwrap();
340 let bias_desc = cudnn.create_nd_tensor::<f32>(&[1, 32, 1, 1, 1], &[32, 1, 1, 1, 1])?;
341 let activation_desc = cudnn.create_activation::<f32>(
342 cudnn::sys::cudnnActivationMode_t::CUDNN_ACTIVATION_RELU,
343 cudnn::sys::cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN,
344 f64::MAX,
345 )?;
346 let z = stream
347 .clone_htod(&vec![0.0f32; 32 * 32 * 61 * 61 * 61])
348 .unwrap();
349 let z_desc = cudnn.create_nd_tensor::<f32>(
350 &[32, 32, 61, 61, 61],
351 &[32 * 61 * 61 * 61, 61 * 61 * 61, 61 * 61, 61, 1],
352 )?;
353 let mut y = stream.alloc_zeros::<f32>(32 * 32 * 61 * 61 * 61).unwrap();
354 let y_desc = cudnn.create_nd_tensor::<f32>(
355 &[32, 32, 61, 61, 61],
356 &[32 * 61 * 61 * 61, 61 * 61 * 61, 61 * 61, 61, 1],
357 )?;
358
359 {
360 let op = ConvBiasActivationForward {
361 conv: &conv,
362 act: &activation_desc,
363 x: &x_desc,
364 w: &filter_desc,
365 y: &y_desc,
366 z: &z_desc,
367 bias: &bias_desc,
368 };
369
370 let algo = op.pick_algorithm()?;
372
373 let workspace_size = op.get_workspace_size(algo)?;
375 let mut workspace = stream.alloc_zeros::<u8>(workspace_size).unwrap();
376
377 unsafe {
379 op.launch(
380 algo,
381 Some(&mut workspace),
382 (1.0, 0.0),
383 &x,
384 &filter,
385 &z,
386 &bias,
387 &mut y,
388 )?;
389 }
390
391 let y_host = stream.clone_dtoh(&y).unwrap();
392 assert_eq!(y_host.len(), 32 * 32 * 61 * 61 * 61);
393 assert_eq!(y_host[0], 3.0 * 4.0 * 4.0 * 4.0 + 1.0);
394 }
395
396 Ok(())
397 }
398
399 #[test]
400 fn test_pooling() -> Result<(), CudnnError> {
401 let ctx = CudaContext::new(0).unwrap();
402 let stream = ctx.default_stream();
403 let cudnn = Cudnn::new(stream.clone())?;
404
405 let pooling = cudnn.create_poolingnd::<f32>(
406 &[2, 2],
407 &[0, 0],
408 &[2, 2],
409 cudnn::sys::cudnnPoolingMode_t::CUDNN_POOLING_MAX,
410 cudnn::sys::cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN,
411 )?;
412
413 let x = stream
415 .clone_htod(&[
416 1.0, 1.0, 2.0, 4.0, 5.0, 6.0, 7.0, 8.0, 3.0, 2.0, 1.0, 0.0, 1.0, 2.0, 3.0, 4.0,
417 ])
418 .unwrap();
419 let x_desc = cudnn.create_nd_tensor::<f32>(&[32, 3, 4, 4], &[32 * 3 * 4, 3 * 4, 4, 1])?;
420 let mut y = stream.alloc_zeros::<f32>(32 * 3 * 2 * 2).unwrap();
421 let y_desc = cudnn.create_nd_tensor::<f32>(&[32, 3, 2, 2], &[3 * 2 * 2, 2 * 2, 2, 1])?;
422
423 {
424 let op = PoolingForward {
425 pooling: &pooling,
426 x: &x_desc,
427 y: &y_desc,
428 };
429
430 unsafe {
432 op.launch((1.0, 0.0), &x, &mut y)?;
433 }
434
435 let y_host = stream.clone_dtoh(&y).unwrap();
436 assert_eq!(y_host.len(), 32 * 3 * 2 * 2);
437 assert_eq!(y_host[0], 6.0);
438 }
439
440 Ok(())
441 }
442
443 #[test]
444 fn test_activation() -> Result<(), CudnnError> {
445 let ctx = CudaContext::new(0).unwrap();
446 let stream = ctx.default_stream();
447 let cudnn = Cudnn::new(stream.clone())?;
448
449 let act = cudnn.create_activation::<f32>(
450 cudnn::sys::cudnnActivationMode_t::CUDNN_ACTIVATION_RELU,
451 cudnn::sys::cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN,
452 f64::MAX,
453 )?;
454
455 let x = stream.clone_htod(&[-1.0, 2.0, -3.0, 100.0]).unwrap();
457 let x_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
458 let mut y = stream.alloc_zeros::<f32>(4).unwrap();
459 let y_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
460
461 {
462 let op = ActivationForward {
463 act: &act,
464 x: &x_desc,
465 y: &y_desc,
466 };
467
468 unsafe {
470 op.launch((1.0, 0.0), &x, &mut y)?;
471 }
472
473 let y_host = stream.clone_dtoh(&y).unwrap();
474 assert_eq!(y_host.len(), 2 * 2);
475 assert_eq!(y_host[0], 0.0);
476 assert_eq!(y_host[1], 2.0);
477 assert_eq!(y_host[2], 0.0);
478 assert_eq!(y_host[3], 100.0);
479 }
480
481 Ok(())
482 }
483
484 #[test]
485 fn test_softmax() -> Result<(), CudnnError> {
486 let ctx = CudaContext::new(0).unwrap();
487 let stream = ctx.default_stream();
488 let cudnn = Cudnn::new(stream.clone())?;
489
490 let softmax = cudnn
491 .create_softmax::<f32>(cudnn::sys::cudnnSoftmaxMode_t::CUDNN_SOFTMAX_MODE_INSTANCE)?;
492
493 let x = stream.clone_htod(&[1.0, 2.0, 3.0, 4.0]).unwrap();
495 let x_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
496 let mut y = stream.alloc_zeros::<f32>(4).unwrap();
497 let y_desc = cudnn.create_nd_tensor::<f32>(&[1, 1, 2, 2], &[2 * 2, 2 * 2, 2, 1])?;
498
499 {
500 let op = SoftmaxForward {
501 softmax: &softmax,
502 x: &x_desc,
503 y: &y_desc,
504 };
505
506 unsafe {
507 op.launch(
508 (1.0, 0.0),
509 cudnn::sys::cudnnSoftmaxAlgorithm_t::CUDNN_SOFTMAX_FAST,
510 &x,
511 &mut y,
512 )?;
513 }
514
515 let y_host = stream.clone_dtoh(&y).unwrap();
516 assert_eq!(y_host.len(), 2 * 2);
517 assert_eq!(y_host[0], 0.0320586);
518 assert_eq!(y_host[1], 0.08714432);
519 assert_eq!(y_host[2], 0.23688282);
520 assert_eq!(y_host[3], 0.6439142);
521 }
522
523 Ok(())
524 }
525}