1#![allow(missing_docs)]
3
4pub use crate::cudnn::utils::{DataType, DataTypeInfo};
5use crate::cudnn::*;
6use crate::plugin::*;
7use co::plugin::numeric_helpers::Float;
8use co::plugin::Error as PluginError;
9use co::prelude::*;
10use co::Error;
11use coaster as co;
12
13#[macro_use]
14pub mod helper;
15
16pub(crate) fn rnn_sequence_descriptors(
17 sequence_length: i32,
18 input_size: i32,
19 hidden_size: i32,
20 batch_size: i32,
21 num_layers: i32,
22 direction_mode: DirectionMode,
23 data_type: DataType,
24) -> Result<RnnSequenceDescriptors, Error> {
25 let bidirectional = if direction_mode == DirectionMode::UniDirectional {
26 1
27 } else {
28 2 };
30
31 let dim_input = vec![num_layers, batch_size, input_size];
33 let dim_output = vec![num_layers, batch_size, hidden_size];
34 let dim_hidden_cell = vec![num_layers * bidirectional, batch_size, hidden_size];
35 let _stride_input = vec![dim_input[2] * dim_input[1], dim_input[2], 1];
36 let _stride_output = vec![dim_output[2] * dim_output[1], dim_output[2], 1];
37 let stride_hidden_cell = vec![
38 dim_hidden_cell[2] * dim_hidden_cell[1],
39 dim_hidden_cell[2],
40 1,
41 ];
42
43 let mut x_desc: Vec<TensorDescriptor> = Vec::with_capacity(sequence_length as usize);
44 let mut y_desc: Vec<TensorDescriptor> = Vec::with_capacity(sequence_length as usize);
45 let mut dx_desc: Vec<TensorDescriptor> = Vec::with_capacity(sequence_length as usize);
46 let mut dy_desc: Vec<TensorDescriptor> = Vec::with_capacity(sequence_length as usize);
47
48 {
49 let dim_x = vec![batch_size, input_size, 1];
50 let stride_x = vec![dim_x[2] * dim_x[1], dim_x[2], 1];
51 let dim_y = vec![batch_size, hidden_size * bidirectional, 1];
52 let stride_y = vec![dim_y[2] * dim_y[1], dim_y[2], 1];
53 for _ in 0..sequence_length {
54 x_desc.push(TensorDescriptor::new(&dim_x, &stride_x, data_type).unwrap());
55 dx_desc.push(TensorDescriptor::new(&dim_x, &stride_x, data_type).unwrap());
56 y_desc.push(TensorDescriptor::new(&dim_y, &stride_y, data_type).unwrap());
57 dy_desc.push(TensorDescriptor::new(&dim_y, &stride_y, data_type).unwrap());
58 }
59 }
60
61 Ok(RnnSequenceDescriptors {
62 x_desc,
63 y_desc,
64 dx_desc,
65 dy_desc,
66 hx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
67 hy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
68 cx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
69 cy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
70 dhx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
71 dhy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
72 dcx_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
73 dcy_desc: TensorDescriptor::new(&dim_hidden_cell, &stride_hidden_cell, data_type).unwrap(),
74 })
75}
76
77pub trait ICudnnDesc<T> {
78 fn cudnn_tensor_desc(&self) -> Result<TensorDescriptor, PluginError>;
79 fn cudnn_tensor_desc_softmax(&self) -> Result<TensorDescriptor, PluginError>;
82 fn cudnn_tensor_desc_flat(&self) -> Result<TensorDescriptor, PluginError>;
88
89 fn cudnn_filter_desc(&self) -> Result<FilterDescriptor, PluginError>;
90
91 fn cudnn_convolution_desc(
92 &self,
93 filter: &SharedTensor<T>,
94 ) -> Result<ConvolutionDescriptor, PluginError>;
95
96 fn cudnn_rnn_desc(
97 &self,
98 cudnn_framework: &Cudnn,
99 hidden_size: i32,
100 num_layers: i32,
101 dropout_desc: utils::DropoutConfig,
102 input_mode: cudnnRNNInputMode_t,
103 direction: cudnnDirectionMode_t,
104 mode: cudnnRNNMode_t,
105 algorithm: cudnnRNNAlgo_t,
106 padding_mode: cudnnRNNPaddingMode_t,
107 ) -> Result<RnnDescriptor, PluginError>;
108}
109
110impl ConvForwardAlgo {
111 fn as_cudnn(&self) -> Result<cudnnConvolutionFwdAlgo_t, Error> {
113 use crate::cudnn::cudnnConvolutionFwdAlgo_t::*;
114 use crate::ConvForwardAlgo::*;
115 Ok(match *self {
116 Auto => {
117 return Err(Error::Plugin(PluginError::Plugin(
118 "Can't create cuDNN convolution forward algorithm from \
119 ConvForwardAlgo::Auto. Use `find_cudnn_algo` to find an algorithm.",
120 )))
121 }
122 GEMM => CUDNN_CONVOLUTION_FWD_ALGO_GEMM,
123 ImplicitGEMM => CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM,
124 ImplicitPrecompiledGEMM => CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM,
125 FFT => CUDNN_CONVOLUTION_FWD_ALGO_FFT,
126 FFTTiling => CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING,
127 Direct => CUDNN_CONVOLUTION_FWD_ALGO_DIRECT,
128 Winograd => CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD,
129 WinogradNonFused => CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED,
130 })
131 }
132
133 fn from_cudnn(algo: &cudnnConvolutionFwdAlgo_t) -> ConvForwardAlgo {
135 use crate::cudnn::cudnnConvolutionFwdAlgo_t::*;
136 use crate::ConvForwardAlgo::*;
137 match *algo {
138 CUDNN_CONVOLUTION_FWD_ALGO_GEMM => GEMM,
139 CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM => ImplicitGEMM,
140 CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM => ImplicitPrecompiledGEMM,
141 CUDNN_CONVOLUTION_FWD_ALGO_FFT => FFT,
142 CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING => FFTTiling,
143 CUDNN_CONVOLUTION_FWD_ALGO_DIRECT => Direct,
144 CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD => Winograd,
145 CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED => WinogradNonFused,
146 _ => unreachable!(),
147 }
148 }
149
150 fn find_cudnn_algo(
152 &self,
153 cudnn_framework: &Cudnn,
154 filter_desc: &FilterDescriptor,
155 conv_desc: &ConvolutionDescriptor,
156 src_desc: &TensorDescriptor,
157 dest_desc: &TensorDescriptor,
158 ) -> Result<ConvForwardAlgo, Error> {
159 if !self.is_auto() {
160 return Ok(*self);
161 }
162 let algos = API::find_convolution_forward_algorithm(
163 *cudnn_framework.id_c(),
164 *filter_desc.id_c(),
165 *conv_desc.id_c(),
166 *src_desc.id_c(),
167 *dest_desc.id_c(),
168 )
169 .unwrap();
170 let algo = match algos.len() {
171 0 => {
172 return Err(Error::Plugin(PluginError::Operation(
173 "Unable to find CUDA cuDNN convolution forward algorithm.",
174 )))
175 }
176 _ => algos[0].algo,
177 };
178 Ok(ConvForwardAlgo::from_cudnn(&algo))
179 }
180}
181
182impl ConvBackwardFilterAlgo {
183 fn as_cudnn(&self) -> Result<cudnnConvolutionBwdFilterAlgo_t, Error> {
185 use crate::cudnn::cudnnConvolutionBwdFilterAlgo_t::*;
186 use crate::ConvBackwardFilterAlgo::*;
187 Ok(match *self {
188 Auto => {
189 return Err(Error::Plugin(PluginError::Plugin(
190 "Can't create cuDNN convolution backward filter algorithm from \
191 ConvBackwardFilterAlgo::Auto. Use `find_cudnn_algo` to find an \
192 algorithm.",
193 )))
194 }
195 ImplicitGEMM => CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1,
196 ImplicitGEMMSum => CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0,
197 ImplicitPrecompiledGEMMSum => CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3,
198 FFT => CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT,
199 WinogradNonFused => CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED,
200 })
201 }
202
203 fn from_cudnn(algo: &cudnnConvolutionBwdFilterAlgo_t) -> ConvBackwardFilterAlgo {
205 use crate::cudnn::cudnnConvolutionBwdFilterAlgo_t::*;
206 use crate::ConvBackwardFilterAlgo::*;
207 match *algo {
208 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 => ImplicitGEMMSum,
209 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 => ImplicitGEMM,
210 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT => FFT,
211 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 => ImplicitPrecompiledGEMMSum,
212 CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED => WinogradNonFused,
213 _ => unimplemented!(),
214 }
215 }
216
217 fn find_cudnn_algo(
219 &self,
220 cudnn_framework: &Cudnn,
221 filter_desc: &FilterDescriptor,
222 conv_desc: &ConvolutionDescriptor,
223 src_desc: &TensorDescriptor,
224 dest_desc: &TensorDescriptor,
225 ) -> Result<ConvBackwardFilterAlgo, Error> {
226 if !self.is_auto() {
227 return Ok(*self);
228 }
229 let algos = API::find_convolution_backward_filter_algorithm(
230 *cudnn_framework.id_c(),
231 *filter_desc.id_c(),
232 *conv_desc.id_c(),
233 *src_desc.id_c(),
234 *dest_desc.id_c(),
235 )
236 .unwrap();
237 let algo = match algos.len() {
238 0 => {
239 return Err(Error::Plugin(PluginError::Operation(
240 "Unable to find CUDA cuDNN convolution backward filter algorithm.",
241 )))
242 }
243 _ => algos[0].algo,
244 };
245 Ok(ConvBackwardFilterAlgo::from_cudnn(&algo))
246 }
247}
248
249impl ConvBackwardDataAlgo {
250 fn as_cudnn(&self) -> Result<cudnnConvolutionBwdDataAlgo_t, Error> {
252 use crate::cudnn::cudnnConvolutionBwdDataAlgo_t::*;
253 use crate::ConvBackwardDataAlgo::*;
254 Ok(match *self {
255 Auto => {
256 return Err(Error::Plugin(PluginError::Plugin(
257 "Can't create cuDNN convolution backward data algorithm from \
258 ConvBackwardDataAlgo::Auto. Use `find_cudnn_algo` to find \
259 an algorithm.",
260 )))
261 }
262 ImplicitGEMM => CUDNN_CONVOLUTION_BWD_DATA_ALGO_1,
263 ImplicitGEMMSum => CUDNN_CONVOLUTION_BWD_DATA_ALGO_0,
264 FFT => CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT,
265 FFTTiling => CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING,
266 Winograd => CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD,
267 WinogradNonFused => CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED,
268 })
269 }
270
271 fn from_cudnn(algo: &cudnnConvolutionBwdDataAlgo_t) -> ConvBackwardDataAlgo {
273 use crate::cudnn::cudnnConvolutionBwdDataAlgo_t::*;
274 use crate::ConvBackwardDataAlgo::*;
275 match *algo {
276 CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 => ImplicitGEMMSum,
277 CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 => ImplicitGEMM,
278 CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT => FFT,
279 CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING => FFTTiling,
280 CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD => Winograd,
281 CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED => WinogradNonFused,
282 _ => unimplemented!(),
283 }
284 }
285
286 fn find_cudnn_algo(
288 &self,
289 cudnn_framework: &Cudnn,
290 filter_desc: &FilterDescriptor,
291 conv_desc: &ConvolutionDescriptor,
292 src_desc: &TensorDescriptor,
293 dest_desc: &TensorDescriptor,
294 ) -> Result<ConvBackwardDataAlgo, Error> {
295 if !self.is_auto() {
296 return Ok(*self);
297 }
298 let algos = API::find_convolution_backward_data_algorithm(
299 *cudnn_framework.id_c(),
300 *filter_desc.id_c(),
301 *conv_desc.id_c(),
302 *src_desc.id_c(),
303 *dest_desc.id_c(),
304 )
305 .unwrap();
306
307 let algo = match algos.len() {
308 0 => {
309 return Err(Error::Plugin(PluginError::Operation(
310 "Unable to find CUDA cuDNN convolution backward data algorithm.",
311 )))
312 }
313 _ => algos[0].algo,
314 };
315 Ok(ConvBackwardDataAlgo::from_cudnn(&algo))
316 }
317}
318
319impl<T> ICudnnDesc<T> for SharedTensor<T>
320where
321 T: Float + DataTypeInfo,
322{
323 fn cudnn_tensor_desc(&self) -> Result<TensorDescriptor, PluginError> {
324 exec!(TensorDescriptor::new(
325 &self.desc().dims_i32().clone(),
326 &self.desc().default_stride_i32().clone(),
327 <T as DataTypeInfo>::cudnn_data_type(),
328 ) => "Unable to create CuDNN TensorDescriptor.")
329 }
330
331 fn cudnn_tensor_desc_softmax(&self) -> Result<TensorDescriptor, PluginError> {
332 let actual_desc = self.desc().clone();
333 let override_desc = match actual_desc.len() {
334 1 => vec![1, actual_desc[0], 1, 1],
336 2 => vec![actual_desc[0], actual_desc[1], 1, 1],
338 3 => vec![1, actual_desc[0], actual_desc[1], actual_desc[2]],
340 _ => actual_desc,
341 };
342 exec!(TensorDescriptor::new(
343 &override_desc.dims_i32().clone(),
344 &override_desc.default_stride_i32().clone(),
345 <T as DataTypeInfo>::cudnn_data_type(),
346 ) => "Unable to create CuDNN TensorDescriptor.")
347 }
348
349 fn cudnn_tensor_desc_flat(&self) -> Result<TensorDescriptor, PluginError> {
350 let actual_desc = self.desc().clone();
351 let mut override_desc = match actual_desc.len() {
352 1 => vec![1, 1],
353 2 => vec![1],
354 _ => vec![],
355 };
356 for dim in actual_desc {
357 override_desc.push(dim);
358 }
359 exec!(TensorDescriptor::new(
360 &override_desc.dims_i32().clone(),
361 &override_desc.default_stride_i32().clone(),
362 <T as DataTypeInfo>::cudnn_data_type(),
363 ) => "Unable to create CuDNN TensorDescriptor.")
364 }
365
366 fn cudnn_filter_desc(&self) -> Result<FilterDescriptor, PluginError> {
367 exec!(FilterDescriptor::new(
368 &self.desc().dims_i32().clone(),
369 <T as DataTypeInfo>::cudnn_data_type(),
370 ) => "Unable to create CuDNN FilterDescriptor.")
371 }
372
373 fn cudnn_convolution_desc(
379 &self,
380 filter: &SharedTensor<T>,
381 ) -> Result<ConvolutionDescriptor, PluginError> {
382 exec!(ConvolutionDescriptor::new(
383 &self.desc().dims_i32().clone(),
384 &filter.desc().default_stride_i32().clone(),
385 <T as DataTypeInfo>::cudnn_data_type(),
386 ) => "Unable to create CuDNN ConvolutionDescriptor.")
387 }
388
389 fn cudnn_rnn_desc(
390 &self,
391 cudnn_framework: &Cudnn,
392 hidden_size: i32,
393 num_layers: i32,
394 dropout_desc: utils::DropoutConfig,
395 input_mode: cudnnRNNInputMode_t,
396 direction: cudnnDirectionMode_t,
397 mode: cudnnRNNMode_t,
398 algorithm: cudnnRNNAlgo_t,
399 padding_mode: cudnnRNNPaddingMode_t,
400 ) -> Result<RnnDescriptor, PluginError> {
401 exec!(RnnDescriptor::new(
402 &cudnn_framework,
403 hidden_size,
404 num_layers,
405 dropout_desc,
406 input_mode,
407 direction,
408 mode,
409 algorithm,
410 <T as DataTypeInfo>::cudnn_data_type(),
411 padding_mode,
412 ) => "Unable to create CuDNN RNNDescriptor")
413 }
414}
415
416impl<T> NN<T> for Backend<Cuda>
417where
418 T: Float + DataTypeInfo,
419{
420 type CC = utils::ConvolutionConfig;
421 type CLRN = utils::NormalizationConfig;
422 type CPOOL = utils::PoolingConfig;
423 type CDROP = utils::DropoutConfig;
424 type CRNN = utils::RnnConfig;
425
426 fn init_nn() {
427 }
429}
430
431impl<'a, T> NNOperationConfig<T> for utils::ConvolutionConfig where T: Float + DataTypeInfo {}
432impl<T> NNOperationConfig<T> for utils::RnnConfig where T: Float + DataTypeInfo {}
433impl<T> NNOperationConfig<T> for utils::NormalizationConfig where T: Float + DataTypeInfo {}
434impl<T> NNOperationConfig<T> for utils::PoolingConfig where T: Float + DataTypeInfo {}
435impl<T> NNOperationConfig<T> for utils::DropoutConfig where T: Float + DataTypeInfo {}
436
437impl<T> Sigmoid<T> for Backend<Cuda>
438where
439 T: Float + DataTypeInfo + Default,
440{
441 fn sigmoid(&self, x: &SharedTensor<T>, result: &mut SharedTensor<T>) -> Result<(), Error> {
442 let scal_params: crate::cudnn::utils::ScalParams<T> =
443 crate::cudnn::utils::ScalParams::default();
444 let cudnn_framework = self.framework().cudnn();
445 let r_desc = result.cudnn_tensor_desc_flat()?;
446 let x_mem = read!(x, self);
447 let r_mem = write_only!(result, self);
448
449 exec2!(cudnn_framework.sigmoid_forward(
450 &cudnn_framework.init_activation().unwrap(),
451 &x.cudnn_tensor_desc_flat()?,
452 trans!(x_mem),
453 &r_desc,
454 trans_mut!(r_mem),
455 scal_params,
456 ) => "Unable to execute CUDA cuDNN Activation Sigmoid Forward.")
457 }
458
459 fn sigmoid_grad(
460 &self,
461 x: &SharedTensor<T>,
462 x_diff: &SharedTensor<T>,
463 result: &SharedTensor<T>,
464 result_diff: &mut SharedTensor<T>,
465 ) -> Result<(), Error> {
466 let cudnn_framework = self.framework().cudnn();
467 let scal_params: crate::cudnn::utils::ScalParams<T> =
468 crate::cudnn::utils::ScalParams::default();
469 let dr_desc = result_diff.cudnn_tensor_desc_flat()?;
470 let x_mem = read!(x, self);
471 let dx_mem = read!(x_diff, self);
472 let r_mem = read!(result, self);
473 let dr_mem = write_only!(result_diff, self);
474 exec2!(cudnn_framework.sigmoid_backward(
475 &cudnn_framework.init_activation().unwrap(),
476 &x.cudnn_tensor_desc_flat()?,
477 trans!(x_mem),
478 &x_diff.cudnn_tensor_desc_flat()?,
479 trans!(dx_mem),
480 &result.cudnn_tensor_desc_flat()?,
481 trans!(r_mem),
482 &dr_desc,
483 trans_mut!(dr_mem),
484 scal_params,
485 ) => "Unable to execute CUDA cuDNN Activation Sigmoid Backward.")
486 }
487}
488
489impl<T> ConvolutionConfig<T> for crate::cudnn::utils::ConvolutionConfig
490where
491 T: Float + DataTypeInfo,
492{
493 fn workspace_size(&self) -> usize {
494 self.largest_workspace_size()
495 }
496}
497
498impl<T> Convolution<T> for Backend<Cuda>
499where
500 T: Float + DataTypeInfo,
501{
502 fn new_convolution_config(
503 &self,
504 src: &SharedTensor<T>,
505 dest: &SharedTensor<T>,
506 filter: &SharedTensor<T>,
507 algo_fwd: ConvForwardAlgo,
508 algo_bwd_filter: ConvBackwardFilterAlgo,
509 algo_bwd_data: ConvBackwardDataAlgo,
510 stride: &[i32],
511 zero_padding: &[i32],
512 ) -> Result<Self::CC, Error> {
513 let cudnn_framework = self.framework().cudnn();
514 let src_desc = src.cudnn_tensor_desc()?;
515 let dest_desc = dest.cudnn_tensor_desc()?;
516 let filter_desc = filter.cudnn_filter_desc()?;
517 let conv_desc = crate::cudnn::ConvolutionDescriptor::new(
518 zero_padding,
519 stride,
520 <T as DataTypeInfo>::cudnn_data_type(),
521 )
522 .unwrap();
523
524 let useable_algo_fwd = algo_fwd.find_cudnn_algo(
525 cudnn_framework,
526 &filter_desc,
527 &conv_desc,
528 &src_desc,
529 &dest_desc,
530 )?;
531 let useable_algo_bwd_filter = algo_bwd_filter.find_cudnn_algo(
532 cudnn_framework,
533 &filter_desc,
534 &conv_desc,
535 &src_desc,
536 &dest_desc,
537 )?;
538 let useable_algo_bwd_data = algo_bwd_data.find_cudnn_algo(
539 cudnn_framework,
540 &filter_desc,
541 &conv_desc,
542 &src_desc,
543 &dest_desc,
544 )?;
545
546 let mut workspace_size_fwd = API::get_convolution_forward_workspace_size(
547 *cudnn_framework.id_c(),
548 useable_algo_fwd.as_cudnn().unwrap(),
549 *filter_desc.id_c(),
550 *conv_desc.id_c(),
551 *src_desc.id_c(),
552 *dest_desc.id_c(),
553 )
554 .unwrap();
555 let mut workspace_size_bwd_filter = API::get_convolution_backward_filter_workspace_size(
556 *cudnn_framework.id_c(),
557 useable_algo_bwd_filter.as_cudnn().unwrap(),
558 *filter_desc.id_c(),
559 *conv_desc.id_c(),
560 *src_desc.id_c(),
561 *dest_desc.id_c(),
562 )
563 .unwrap();
564 let mut workspace_size_bwd_data = API::get_convolution_backward_data_workspace_size(
565 *cudnn_framework.id_c(),
566 useable_algo_bwd_data.as_cudnn().unwrap(),
567 *filter_desc.id_c(),
568 *conv_desc.id_c(),
569 *src_desc.id_c(),
570 *dest_desc.id_c(),
571 )
572 .unwrap();
573
574 if workspace_size_fwd == 0 {
575 workspace_size_fwd = 8;
576 }
577 if workspace_size_bwd_filter == 0 {
578 workspace_size_bwd_filter = 8;
579 }
580 if workspace_size_bwd_data == 0 {
581 workspace_size_bwd_data = 8;
582 }
583
584 Ok(crate::cudnn::utils::ConvolutionConfig::new(
585 useable_algo_fwd.as_cudnn().unwrap(),
586 workspace_size_fwd,
587 useable_algo_bwd_filter.as_cudnn().unwrap(),
588 workspace_size_bwd_filter,
589 useable_algo_bwd_data.as_cudnn().unwrap(),
590 workspace_size_bwd_data,
591 conv_desc,
592 filter_desc,
593 ))
594 }
595
596 fn convolution(
597 &self,
598 filter: &SharedTensor<T>,
599 x: &SharedTensor<T>,
600 result: &mut SharedTensor<T>,
601 workspace: &mut SharedTensor<u8>,
602 config: &Self::CC,
603 ) -> Result<(), Error> {
604 let cudnn_framework = self.framework().cudnn();
605 let scal_params: crate::cudnn::utils::ScalParams<T> =
606 crate::cudnn::utils::ScalParams::default();
607
608 let r_desc = result.cudnn_tensor_desc()?;
609 let f_mem = read!(filter, self);
610 let x_mem = read!(x, self);
611 let r_mem = write_only!(result, self);
612 let w_mem = write_only!(workspace, self);
613
614 exec2!(cudnn_framework.convolution_forward(
615 config,
616 trans_mut!(w_mem),
617 trans!(f_mem),
618 &x.cudnn_tensor_desc()?, trans!(x_mem),
620 &r_desc,
621 trans_mut!(r_mem),
622 scal_params,
623 ) => "Unable to execute CUDA cuDNN Activation convolution Forward.")
624 }
625
626 fn convolution_grad_filter(
627 &self,
628 src_data: &SharedTensor<T>,
629 dest_diff: &SharedTensor<T>,
630 filter_diff: &mut SharedTensor<T>,
631 workspace: &mut SharedTensor<u8>,
632 config: &Self::CC,
633 ) -> Result<(), Error> {
634 let cudnn_framework = self.framework().cudnn();
635 let scal_params: crate::cudnn::utils::ScalParams<T> =
636 crate::cudnn::utils::ScalParams::default();
637 let s_mem = read!(src_data, self);
638 let dd_mem = read!(dest_diff, self);
639 let df_mem = write_only!(filter_diff, self);
640 let w_mem = write_only!(workspace, self);
641 exec2!(cudnn_framework.convolution_backward_filter(
642 config,
643 trans_mut!(w_mem),
644 &src_data.cudnn_tensor_desc()?,
645 trans!(s_mem),
646 &dest_diff.cudnn_tensor_desc()?,
647 trans!(dd_mem),
648 trans_mut!(df_mem),
649 scal_params,
650 ) => "Unable to execute CUDA cuDNN Activation convolution Backward.")
651 }
652
653 fn convolution_grad_data(
654 &self,
655 filter: &SharedTensor<T>,
656 x_diff: &SharedTensor<T>,
657 result_diff: &mut SharedTensor<T>,
658 workspace: &mut SharedTensor<u8>,
659 config: &Self::CC,
660 ) -> Result<(), Error> {
661 let cudnn_framework = self.framework().cudnn();
662 let scal_params: crate::cudnn::utils::ScalParams<T> =
663 crate::cudnn::utils::ScalParams::default();
664
665 let dr_desc = result_diff.cudnn_tensor_desc()?;
666 let f_mem = read!(filter, self);
667 let dx_mem = read!(x_diff, self);
668 let dr_mem = write_only!(result_diff, self);
669 let w_mem = write_only!(workspace, self);
670 exec2!(cudnn_framework.convolution_backward_data(
671 config,
672 trans_mut!(w_mem),
673 trans!(f_mem),
674 &x_diff.cudnn_tensor_desc()?,
675 trans!(dx_mem),
676 &dr_desc,
677 trans_mut!(dr_mem),
678 scal_params,
679 ) => "Unable to execute CUDA cuDNN Activation convolution Backward.")
680 }
681}
682
683impl<T> RnnConfig<T> for crate::cudnn::utils::RnnConfig
684where
685 T: Float + DataTypeInfo,
686{
687 fn workspace_size(&self) -> usize {
688 self.largest_workspace_size()
689 }
690}
691
692impl RnnInputMode {
693 fn as_cudnn(&self) -> Result<cudnnRNNInputMode_t, Error> {
694 Ok(match self {
695 RnnInputMode::LinearInput => cudnnRNNInputMode_t::CUDNN_LINEAR_INPUT,
696 RnnInputMode::SkipInput => cudnnRNNInputMode_t::CUDNN_SKIP_INPUT,
697 })
698 }
699
700 fn from_cudnn(input: cudnnRNNInputMode_t) -> Self {
701 match input {
702 cudnnRNNInputMode_t::CUDNN_LINEAR_INPUT => RnnInputMode::LinearInput,
703 cudnnRNNInputMode_t::CUDNN_SKIP_INPUT => RnnInputMode::SkipInput,
704 _ => unreachable!(),
705 }
706 }
707}
708
709impl DirectionMode {
710 fn as_cudnn(&self) -> Result<cudnnDirectionMode_t, Error> {
711 Ok(match self {
712 DirectionMode::BiDirectional => cudnnDirectionMode_t::CUDNN_BIDIRECTIONAL,
713 DirectionMode::UniDirectional => cudnnDirectionMode_t::CUDNN_UNIDIRECTIONAL,
714 })
715 }
716
717 fn from_cudnn(direction: cudnnDirectionMode_t) -> Self {
718 match direction {
719 cudnnDirectionMode_t::CUDNN_BIDIRECTIONAL => DirectionMode::BiDirectional,
720 cudnnDirectionMode_t::CUDNN_UNIDIRECTIONAL => DirectionMode::UniDirectional,
721 _ => unreachable!(),
722 }
723 }
724}
725
726impl RnnNetworkMode {
727 fn as_cudnn(&self) -> Result<cudnnRNNMode_t, Error> {
728 Ok(match self {
729 RnnNetworkMode::ReLU => cudnnRNNMode_t::CUDNN_RNN_RELU,
730 RnnNetworkMode::Tanh => cudnnRNNMode_t::CUDNN_RNN_TANH,
731 RnnNetworkMode::LSTM => cudnnRNNMode_t::CUDNN_LSTM,
732 RnnNetworkMode::GRU => cudnnRNNMode_t::CUDNN_GRU,
733 })
734 }
735
736 fn from_cudnn(network_mode: cudnnRNNMode_t) -> Self {
737 match network_mode {
738 cudnnRNNMode_t::CUDNN_RNN_RELU => RnnNetworkMode::ReLU,
739 cudnnRNNMode_t::CUDNN_RNN_TANH => RnnNetworkMode::Tanh,
740 cudnnRNNMode_t::CUDNN_LSTM => RnnNetworkMode::LSTM,
741 cudnnRNNMode_t::CUDNN_GRU => RnnNetworkMode::GRU,
742 _ => unreachable!(),
743 }
744 }
745}
746
747impl RnnAlgorithm {
748 fn as_cudnn(&self) -> Result<cudnnRNNAlgo_t, Error> {
749 Ok(match self {
750 RnnAlgorithm::PersistDynamic => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_DYNAMIC,
751 RnnAlgorithm::PersistStatic => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_STATIC,
752 RnnAlgorithm::Standard => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_STANDARD,
753 RnnAlgorithm::Count => cudnnRNNAlgo_t::CUDNN_RNN_ALGO_COUNT,
754 })
755 }
756
757 fn from_cudnn(algorithm: cudnnRNNAlgo_t) -> Self {
758 match algorithm {
759 cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_DYNAMIC => RnnAlgorithm::PersistDynamic,
760 cudnnRNNAlgo_t::CUDNN_RNN_ALGO_PERSIST_STATIC => RnnAlgorithm::PersistStatic,
761 cudnnRNNAlgo_t::CUDNN_RNN_ALGO_STANDARD => RnnAlgorithm::Standard,
762 cudnnRNNAlgo_t::CUDNN_RNN_ALGO_COUNT => RnnAlgorithm::Count,
763 _ => unreachable!(),
764 }
765 }
766}
767
768impl MathType {
769 fn as_cudnn(&self) -> Result<cudnnMathType_t, Error> {
770 match self {
771 MathType::Default => Ok(cudnnMathType_t::CUDNN_DEFAULT_MATH),
772 MathType::TensorOPMath => Ok(cudnnMathType_t::CUDNN_TENSOR_OP_MATH),
773 MathType::TensorOPMathAllowConversion => {
774 Ok(cudnnMathType_t::CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION)
775 }
776 }
777 }
778
779 fn from_cudnn(math_type: cudnnMathType_t) -> MathType {
780 match math_type {
781 cudnnMathType_t::CUDNN_DEFAULT_MATH => MathType::Default,
782 cudnnMathType_t::CUDNN_TENSOR_OP_MATH => MathType::TensorOPMath,
783 cudnnMathType_t::CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION => {
784 MathType::TensorOPMathAllowConversion
785 }
786 _ => unreachable!(),
787 }
788 }
789}
790
791impl RnnPaddingMode {
792 fn as_cudnn(&self) -> Result<cudnnRNNPaddingMode_t, Error> {
793 match self {
794 RnnPaddingMode::Enabled => Ok(CUDNN_RNN_PADDED_IO_ENABLED),
795 RnnPaddingMode::Disabled => Ok(CUDNN_RNN_PADDED_IO_DISABLED),
796 }
797 }
798
799 fn from_cudnn(padding_type: cudnnRNNPaddingMode_t) -> RnnPaddingMode {
800 match padding_type {
801 CUDNN_RNN_PADDED_IO_ENABLED => RnnPaddingMode::Enabled,
802 CUDNN_RNN_PADDED_IO_DISABLED => RnnPaddingMode::Disabled,
803 _ => unreachable!(),
804 }
805 }
806}
807
808#[derive(Debug)]
809pub struct RnnSequenceDescriptors {
814 pub x_desc: Vec<TensorDescriptor>,
816 pub y_desc: Vec<TensorDescriptor>,
818 pub dx_desc: Vec<TensorDescriptor>,
820 pub dy_desc: Vec<TensorDescriptor>,
822 pub hx_desc: TensorDescriptor,
824 pub cx_desc: TensorDescriptor,
826 pub hy_desc: TensorDescriptor,
828 pub cy_desc: TensorDescriptor,
830 pub dhx_desc: TensorDescriptor,
832 pub dcx_desc: TensorDescriptor,
834 pub dhy_desc: TensorDescriptor,
836 pub dcy_desc: TensorDescriptor,
838}
839
840impl<T> Rnn<T> for Backend<Cuda>
841where
842 T: Float + DataTypeInfo,
843{
844 fn generate_rnn_weight_description(
845 &self,
846 rnn_config: &Self::CRNN,
847 batch_size: i32,
848 input_size: i32,
849 ) -> Result<Vec<usize>, Error> {
850 let cudnn_framework = self.framework().cudnn();
851 let data_type = <T as DataTypeInfo>::cudnn_data_type();
852
853 let dim_x = vec![batch_size, input_size, 1];
855 let stride_x = vec![dim_x[2] * dim_x[1], dim_x[2], 1];
856
857 let x_desc = TensorDescriptor::new(&dim_x, &stride_x, data_type).unwrap();
859
860 let weight_size: usize = exec2!(API::get_rnn_params_size(
861 *cudnn_framework.id_c(),
862 *rnn_config.rnn_desc().id_c(),
863 *x_desc.id_c(),
866 data_type,
867 ) => "Unable to get CudNN Rnn Params Size.")?;
868
869 let dim_w: Vec<usize> = vec![weight_size / <T as DataTypeInfo>::size(), 1, 1];
871 Ok(dim_w)
872 }
873
874 fn new_rnn_config(
875 &self,
876 src: &SharedTensor<T>,
877 dropout_probability: Option<f32>,
878 dropout_seed: Option<u64>,
879 sequence_length: i32,
880 network_mode: RnnNetworkMode,
881 input_mode: RnnInputMode,
882 direction_mode: DirectionMode,
883 algorithm: RnnAlgorithm,
884 hidden_size: i32,
885 num_layers: i32,
886 batch_size: i32,
887 ) -> Result<Self::CRNN, Error> {
888 let cudnn_framework = self.framework().cudnn();
889 let input_mode = input_mode.as_cudnn()?;
890 let network_mode = network_mode.as_cudnn()?;
891 let algorithm = algorithm.as_cudnn()?;
892
893 let src_description = src.desc();
894 let data_type = <T as DataTypeInfo>::cudnn_data_type();
895
896 let drop_desc = exec2!(cudnn_framework.init_dropout(
897 dropout_probability.unwrap_or(0.5),
898 dropout_seed.unwrap_or(0),
899 ) => "Unable to create Dropout Layer")?;
900
901 let dropout_memory: cudnnDropoutDescriptor_t = *drop_desc.dropout_desc().id_c();
902
903 let x_desc = rnn_sequence_descriptors(
904 sequence_length,
905 src_description[1] as i32,
906 hidden_size,
907 batch_size,
908 num_layers,
909 direction_mode,
910 data_type,
911 )?
912 .x_desc;
913
914 let direction_mode = direction_mode.as_cudnn()?;
915
916 let rnn_desc = exec2!(RnnDescriptor::new(
917 &cudnn_framework,
918 hidden_size,
919 num_layers,
920 drop_desc,
921 input_mode,
922 direction_mode,
923 network_mode,
924 algorithm,
925 data_type,
926 (RnnPaddingMode::Disabled).as_cudnn().unwrap(),
927 ) => "Failed to create RNN descriptor")?;
928
929 exec2!(cudnn_framework.init_rnn(
930 &x_desc,
931 rnn_desc,
932 hidden_size,
933 num_layers,
934 sequence_length,
935 dropout_memory,
936 input_mode,
937 direction_mode,
938 network_mode,
939 algorithm,
940 <T as DataTypeInfo>::cudnn_data_type(),
941 MathType::TensorOPMathAllowConversion.as_cudnn()?,
942 ) => "Unable to perform RNN Initialization")
943 }
944
945 fn rnn_forward(
947 &self,
948 src: &SharedTensor<T>,
949 output: &mut SharedTensor<T>,
950 rnn_config: &Self::CRNN,
951 weight: &SharedTensor<T>,
952 workspace: &mut SharedTensor<u8>,
953 ) -> Result<(), Error> {
954 let cudnn_framework = self.framework().cudnn();
955
956 log::trace!("rnn_forward: src[dims] = {:?}", src.desc());
957 log::trace!("rnn_forward: output[dims] = {:?}", output.desc());
958 log::trace!("rnn_forward: weight[dims] = {:?}", weight.desc());
959
960 let src_dimensions = src.desc();
961 let sequence_descriptors = rnn_sequence_descriptors(
962 *rnn_config.sequence_length(),
963 src_dimensions[1] as i32,
964 rnn_config.hidden_size,
965 src_dimensions[0] as i32,
966 rnn_config.num_layers,
967 DirectionMode::UniDirectional, <T as DataTypeInfo>::cudnn_data_type(),
969 )?;
970
971 let weight_desc = weight.cudnn_filter_desc()?;
972 let reserve = rnn_config.training_reserve();
973
974 let src_mem = read!(src, self);
975 let weight_mem = weight.read(self.device()).unwrap();
976 let output_mem = output.write_only(self.device()).unwrap();
977 let workspace_mem = workspace.write_only(self.device()).unwrap();
978
979 exec2!(cudnn_framework.rnn_forward::<f32>(
980 rnn_config,
981 sequence_descriptors.x_desc,
982 trans!(src_mem),
983 sequence_descriptors.y_desc,
984 trans_mut!(output_mem),
985 &sequence_descriptors.hx_desc,
986 std::ptr::null(),
987 &sequence_descriptors.cx_desc,
988 std::ptr::null(),
989 &weight_desc,
990 trans!(weight_mem),
991 &sequence_descriptors.hy_desc,
992 std::ptr::null_mut(),
993 &sequence_descriptors.cy_desc,
994 std::ptr::null_mut(),
995 trans_mut!(workspace_mem),
996 *reserve.id_c(),
997 ) => "Unable to perform RNN Forward")
998 }
999
1000 fn rnn_backward_data(
1001 &self,
1002 src: &SharedTensor<T>,
1003 src_gradient: &mut SharedTensor<T>,
1004 output: &SharedTensor<T>,
1005 output_gradient: &SharedTensor<T>,
1006 rnn_config: &Self::CRNN,
1007 weight: &SharedTensor<T>,
1008 workspace: &mut SharedTensor<u8>,
1009 ) -> Result<(), Error> {
1010 let cudnn_framework = self.framework().cudnn();
1011 let src_dimensions = src.desc().clone();
1012 let sequence_descriptors = rnn_sequence_descriptors(
1013 *rnn_config.sequence_length(),
1014 src_dimensions[1] as i32,
1015 rnn_config.hidden_size,
1016 src_dimensions[0] as i32,
1017 rnn_config.num_layers,
1018 DirectionMode::UniDirectional,
1019 <T as DataTypeInfo>::cudnn_data_type(),
1020 )?;
1021 let weight_desc = weight.cudnn_filter_desc()?;
1022
1023 let _src_mem = read!(src, self);
1024 let src_gradient_mem = write_only!(src_gradient, self);
1025 let weight_mem = read!(weight, self);
1026 let output_mem = read!(output, self);
1027 let output_gradient_mem = read!(output_gradient, self);
1028 let workspace_mem = write_only!(workspace, self);
1029 let reserve_space = rnn_config.training_reserve();
1030 exec2!(cudnn_framework.rnn_backward_data::<f32>(
1031 rnn_config,
1032 sequence_descriptors.y_desc,
1033 trans!(output_mem),
1034 sequence_descriptors.dy_desc,
1035 trans!(output_gradient_mem),
1037 &sequence_descriptors.dhy_desc,
1038 std::ptr::null_mut(),
1040 &sequence_descriptors.dcy_desc,
1041 std::ptr::null_mut(),
1043 &weight_desc,
1044 trans!(weight_mem),
1045 &sequence_descriptors.hx_desc,
1046 std::ptr::null(),
1047 &sequence_descriptors.cx_desc,
1048 std::ptr::null(),
1049 sequence_descriptors.dx_desc,
1050 trans_mut!(src_gradient_mem),
1051 &sequence_descriptors.dhx_desc,
1052 std::ptr::null_mut(),
1053 &sequence_descriptors.dcx_desc,
1054 std::ptr::null_mut(),
1055 trans_mut!(workspace_mem),
1056 *reserve_space.id_c(),
1057 ) => "Unable to execute CUDA cuDNN RNN Backward Data")
1058 }
1059
1060 fn rnn_backward_weights(
1061 &self,
1062 src: &SharedTensor<T>,
1063 output: &SharedTensor<T>,
1064 filter: &mut SharedTensor<T>,
1065 rnn_config: &Self::CRNN,
1066 workspace: &mut SharedTensor<u8>,
1067 ) -> Result<(), Error> {
1068 let cudnn_framework = self.framework().cudnn();
1069 let src_dimensions = src.desc().clone();
1070 let sequence_descriptors = rnn_sequence_descriptors(
1071 *rnn_config.sequence_length(),
1072 src_dimensions[1] as i32,
1073 rnn_config.hidden_size,
1074 src_dimensions[0] as i32,
1075 rnn_config.num_layers,
1076 DirectionMode::UniDirectional,
1077 <T as DataTypeInfo>::cudnn_data_type(),
1078 )?;
1079 let filter_desc = filter.cudnn_filter_desc()?;
1080 let src_mem = read!(src, self);
1081 let output_mem = read!(output, self);
1082 let workspace_mem = write_only!(workspace, self);
1083 let filter_mem = write_only!(filter, self);
1084 let reserve_space = rnn_config.training_reserve();
1085 exec2!(cudnn_framework.rnn_backward_weights::<f32>(
1086 rnn_config,
1087 sequence_descriptors.x_desc,
1088 trans!(src_mem),
1089 &sequence_descriptors.hx_desc,
1090 std::ptr::null_mut(),
1091 sequence_descriptors.y_desc,
1092 trans!(output_mem),
1093 filter_desc,
1094 trans_mut!(filter_mem),
1095 trans_mut!(workspace_mem),
1096 *reserve_space.id_c(),
1097 ) => "Unable to execute CUDA cuDNN RNN Backward Data")
1098 }
1099}
1100
1101impl<T> SigmoidPointwise<T> for Backend<Cuda>
1102where
1103 T: Float + Default + DataTypeInfo,
1104{
1105 fn sigmoid_pointwise(&self, x: &mut SharedTensor<T>) -> Result<(), Error> {
1106 let cudnn_framework = self.framework().cudnn();
1107 let scal_params: crate::cudnn::utils::ScalParams<T> =
1108 crate::cudnn::utils::ScalParams::default();
1109 let x_desc = x.cudnn_tensor_desc_flat()?;
1110 let x_mem = read_write!(x, self);
1111
1112 exec2!(cudnn_framework.sigmoid_forward(
1113 &cudnn_framework.init_activation().unwrap(),
1114 &x_desc,
1115 trans!(x_mem),
1116 &x_desc,
1117 trans_mut!(x_mem),
1118 scal_params,
1119 ) => "Unable to execute CUDA cuDNN Sigmoid Pointwise forward.")
1120 }
1121
1122 fn sigmoid_pointwise_grad(
1123 &self,
1124 x: &SharedTensor<T>,
1125 x_diff: &mut SharedTensor<T>,
1126 ) -> Result<(), Error> {
1127 let cudnn_framework = self.framework().cudnn();
1128 let scal_params: crate::cudnn::utils::ScalParams<T> =
1129 crate::cudnn::utils::ScalParams::default();
1130 let x_desc = x.cudnn_tensor_desc_flat()?;
1131 let dx_desc = x_diff.cudnn_tensor_desc_flat()?;
1132 let x_mem = read!(x, self);
1133 let dx_mem = read_write!(x_diff, self);
1134 exec2!(cudnn_framework.sigmoid_backward(
1136 &cudnn_framework.init_activation().unwrap(),
1137 &x_desc,
1138 trans!(x_mem),
1139 &dx_desc,
1140 trans!(dx_mem),
1141 &x_desc,
1142 trans!(x_mem),
1143 &dx_desc,
1144 trans_mut!(dx_mem),
1145 scal_params,
1146 ) => "Unable to execute CUDA cuDNN Sigmoid Pointwise backward.")
1147 }
1148}
1149
1150impl<T> Relu<T> for Backend<Cuda>
1151where
1152 T: Float + Default + DataTypeInfo,
1153{
1154 fn relu(&self, x: &SharedTensor<T>, result: &mut SharedTensor<T>) -> Result<(), Error> {
1155 let cudnn_framework = self.framework().cudnn();
1156 let scal_params: crate::cudnn::utils::ScalParams<T> =
1157 crate::cudnn::utils::ScalParams::default();
1158 let r_desc = result.cudnn_tensor_desc_flat()?;
1159 let x_mem = read!(x, self);
1160 let r_mem = write_only!(result, self);
1161 exec2!(cudnn_framework.relu_forward(
1162 &cudnn_framework.init_activation().unwrap(),
1163 &x.cudnn_tensor_desc_flat()?,
1164 trans!(x_mem),
1165 &r_desc,
1166 trans_mut!(r_mem),
1167 scal_params,
1168 ) => "Unable to execute CUDA cuDNN Activation relu Forward.")
1169 }
1170
1171 fn relu_grad(
1172 &self,
1173 x: &SharedTensor<T>,
1174 x_diff: &SharedTensor<T>,
1175 result: &SharedTensor<T>,
1176 result_diff: &mut SharedTensor<T>,
1177 ) -> Result<(), Error> {
1178 let cudnn_framework = self.framework().cudnn();
1179 let scal_params: crate::cudnn::utils::ScalParams<T> =
1180 crate::cudnn::utils::ScalParams::default();
1181 let dr_desc = result_diff.cudnn_tensor_desc_flat()?;
1182 let x_mem = read!(x, self);
1183 let dx_mem = read!(x_diff, self);
1184 let r_mem = read!(result, self);
1185 let dr_mem = write_only!(result_diff, self);
1186
1187 exec2!(cudnn_framework.relu_backward(
1188 &cudnn_framework.init_activation().unwrap(),
1189 &x.cudnn_tensor_desc_flat()?,
1190 trans!(x_mem),
1191 &x_diff.cudnn_tensor_desc_flat()?,
1192 trans!(dx_mem),
1193 &result.cudnn_tensor_desc_flat()?,
1194 trans!(r_mem),
1195 &dr_desc,
1196 trans_mut!(dr_mem),
1197 scal_params,
1198 ) => "Unable to execute CUDA cuDNN Activation relu Backward.")
1199 }
1200}
1201
1202impl<T> ReluPointwise<T> for Backend<Cuda>
1203where
1204 T: Float + Default + DataTypeInfo,
1205{
1206 fn relu_pointwise(&self, x: &mut SharedTensor<T>) -> Result<(), Error> {
1207 let cudnn_framework = self.framework().cudnn();
1208 let scal_params: crate::cudnn::utils::ScalParams<T> =
1209 crate::cudnn::utils::ScalParams::default();
1210 let x_desc = x.cudnn_tensor_desc_flat()?;
1211 let x_mem = read_write!(x, self);
1212
1213 exec2!(cudnn_framework.relu_forward(
1214 &cudnn_framework.init_activation().unwrap(),
1215 &x_desc,
1216 trans!(x_mem),
1217 &x_desc,
1218 trans_mut!(x_mem),
1219 scal_params,
1220 ) => "Unable to execute CUDA cuDNN ReLU Pointwise forward.")
1221 }
1222
1223 fn relu_pointwise_grad(
1224 &self,
1225 x: &SharedTensor<T>,
1226 x_diff: &mut SharedTensor<T>,
1227 ) -> Result<(), Error> {
1228 let cudnn_framework = self.framework().cudnn();
1229 let scal_params: crate::cudnn::utils::ScalParams<T> =
1230 crate::cudnn::utils::ScalParams::default();
1231 let x_desc = x.cudnn_tensor_desc_flat()?;
1232 let dx_desc = x_diff.cudnn_tensor_desc_flat()?;
1233 let x_mem = read!(x, self);
1234 let dx_mem = read_write!(x_diff, self);
1235
1236 exec2!(cudnn_framework.relu_backward(
1237 &cudnn_framework.init_activation().unwrap(),
1238 &x_desc,
1239 trans!(x_mem),
1240 &dx_desc,
1241 trans!(dx_mem),
1242 &x_desc,
1243 trans!(x_mem),
1244 &dx_desc,
1245 trans_mut!(dx_mem),
1246 scal_params,
1247 ) => "Unable to execute CUDA cuDNN ReLU Pointwise backward.")
1248 }
1249}
1250
1251impl<T> Tanh<T> for Backend<Cuda>
1252where
1253 T: Float + Default + DataTypeInfo,
1254{
1255 fn tanh(&self, x: &SharedTensor<T>, result: &mut SharedTensor<T>) -> Result<(), Error> {
1256 let cudnn_framework = self.framework().cudnn();
1257 let scal_params: crate::cudnn::utils::ScalParams<T> =
1258 crate::cudnn::utils::ScalParams::default();
1259 let r_desc = result.cudnn_tensor_desc_flat()?;
1260 let x_mem = read!(x, self);
1261 let r_mem = write_only!(result, self);
1262 exec2!(cudnn_framework.tanh_forward(
1263 &cudnn_framework.init_activation().unwrap(),
1264 &x.cudnn_tensor_desc_flat()?,
1265 trans!(x_mem),
1266 &r_desc,
1267 trans_mut!(r_mem),
1268 scal_params,
1269 ) => "Unable to execute CUDA cuDNN Activation tanh Forward.")
1270 }
1271
1272 fn tanh_grad(
1273 &self,
1274 x: &SharedTensor<T>,
1275 x_diff: &SharedTensor<T>,
1276 result: &SharedTensor<T>,
1277 result_diff: &mut SharedTensor<T>,
1278 ) -> Result<(), Error> {
1279 let cudnn_framework = self.framework().cudnn();
1280 let scal_params: crate::cudnn::utils::ScalParams<T> =
1281 crate::cudnn::utils::ScalParams::default();
1282 let dr_desc = result_diff.cudnn_tensor_desc_flat()?;
1283 let x_mem = read!(x, self);
1284 let dx_mem = read!(x_diff, self);
1285 let r_mem = read!(result, self);
1286 let dr_mem = write_only!(result_diff, self);
1287 exec2!(cudnn_framework.tanh_backward(
1288 &cudnn_framework.init_activation().unwrap(),
1289 &x.cudnn_tensor_desc_flat()?,
1290 trans!(x_mem),
1291 &x_diff.cudnn_tensor_desc_flat()?,
1292 trans!(dx_mem),
1293 &result.cudnn_tensor_desc_flat()?,
1294 trans!(r_mem),
1295 &dr_desc,
1296 trans_mut!(dr_mem),
1297 scal_params,
1298 ) => "Unable to execute CUDA cuDNN Activation tanh Backward.")
1299 }
1300}
1301
1302impl<T> TanhPointwise<T> for Backend<Cuda>
1303where
1304 T: Float + Default + DataTypeInfo,
1305{
1306 fn tanh_pointwise(&self, x: &mut SharedTensor<T>) -> Result<(), Error> {
1307 let cudnn_framework = self.framework().cudnn();
1308 let scal_params: crate::cudnn::utils::ScalParams<T> =
1309 crate::cudnn::utils::ScalParams::default();
1310 let x_desc = x.cudnn_tensor_desc_flat()?;
1311 let x_mem = read_write!(x, self);
1312 exec2!(cudnn_framework.tanh_forward(
1313 &cudnn_framework.init_activation().unwrap(),
1314 &x_desc,
1315 trans!(x_mem),
1316 &x_desc,
1317 trans_mut!(x_mem),
1318 scal_params,
1319 ) => "Unable to execute CUDA cuDNN Tanh Pointwise forward.")
1320 }
1321
1322 fn tanh_pointwise_grad(
1323 &self,
1324 x: &SharedTensor<T>,
1325 x_diff: &mut SharedTensor<T>,
1326 ) -> Result<(), Error> {
1327 let cudnn_framework = self.framework().cudnn();
1328 let scal_params: crate::cudnn::utils::ScalParams<T> =
1329 crate::cudnn::utils::ScalParams::default();
1330 let x_desc = x.cudnn_tensor_desc_flat()?;
1331 let dx_desc = x_diff.cudnn_tensor_desc_flat()?;
1332 let x_mem = read!(x, self);
1333 let dx_mem = read_write!(x_diff, self);
1334 exec2!(cudnn_framework.tanh_backward(
1335 &cudnn_framework.init_activation().unwrap(),
1336 &x_desc,
1337 trans!(x_mem),
1338 &dx_desc,
1339 trans!(dx_mem),
1340 &x_desc,
1341 trans!(x_mem),
1342 &dx_desc,
1343 trans_mut!(dx_mem),
1344 scal_params,
1345 ) => "Unable to execute CUDA cuDNN Tanh Pointwise backward.")
1346 }
1347}
1348
1349impl<T> Softmax<T> for Backend<Cuda>
1350where
1351 T: Float + Default + DataTypeInfo,
1352{
1353 fn softmax(&self, x: &SharedTensor<T>, result: &mut SharedTensor<T>) -> Result<(), Error> {
1354 let cudnn_framework = self.framework().cudnn();
1355 let scal_params: crate::cudnn::utils::ScalParams<T> =
1356 crate::cudnn::utils::ScalParams::default();
1357 let r_desc = result.cudnn_tensor_desc_softmax()?;
1358 let x_mem = read!(x, self);
1359 let r_mem = write_only!(result, self);
1360 exec2!(cudnn_framework.softmax_forward(
1361 &x.cudnn_tensor_desc_softmax()?,
1362 trans!(x_mem),
1363 &r_desc,
1364 trans_mut!(r_mem),
1365 scal_params,
1366 ) => "Unable to execute CUDA cuDNN softmax Forward.")
1367 }
1368
1369 fn softmax_grad(
1370 &self,
1371 x: &SharedTensor<T>,
1372 x_diff: &SharedTensor<T>,
1373 result_diff: &mut SharedTensor<T>,
1374 ) -> Result<(), Error> {
1375 let cudnn_framework = self.framework().cudnn();
1376 let scal_params: crate::cudnn::utils::ScalParams<T> =
1377 crate::cudnn::utils::ScalParams::default();
1378 let dr_desc = result_diff.cudnn_tensor_desc_softmax()?;
1379 let x_mem = read!(x, self);
1380 let dx_mem = read!(x_diff, self);
1381 let dr_mem = write_only!(result_diff, self);
1382 exec2!(cudnn_framework.softmax_backward(
1383 &x.cudnn_tensor_desc_softmax()?,
1384 trans!(x_mem),
1385 &x_diff.cudnn_tensor_desc_softmax()?,
1386 trans!(dx_mem),
1387 &dr_desc,
1388 trans_mut!(dr_mem),
1389 scal_params,
1390 ) => "Unable to execute CUDA cuDNN softmax Backward.")
1391 }
1392}
1393
1394impl<T> LogSoftmax<T> for Backend<Cuda>
1395where
1396 T: Float + Default + DataTypeInfo,
1397{
1398 fn log_softmax(&self, x: &SharedTensor<T>, result: &mut SharedTensor<T>) -> Result<(), Error> {
1399 let cudnn_framework = self.framework().cudnn();
1400 let scal_params: crate::cudnn::utils::ScalParams<T> =
1401 crate::cudnn::utils::ScalParams::default();
1402 let r_desc = result.cudnn_tensor_desc_softmax()?;
1403 let x_mem = read!(x, self);
1404 let r_mem = write_only!(result, self);
1405 exec2!(cudnn_framework.log_softmax_forward(
1406 &x.cudnn_tensor_desc_softmax()?,
1407 trans!(x_mem),
1408 &r_desc,
1409 trans_mut!(r_mem),
1410 scal_params,
1411 ) => "Unable to execute CUDA cuDNN softmax Forward.")
1412 }
1413 fn log_softmax_grad(
1414 &self,
1415 x: &SharedTensor<T>,
1416 x_diff: &SharedTensor<T>,
1417 result_diff: &mut SharedTensor<T>,
1418 ) -> Result<(), Error> {
1419 let cudnn_framework = self.framework().cudnn();
1420 let scal_params: crate::cudnn::utils::ScalParams<T> =
1421 crate::cudnn::utils::ScalParams::default();
1422 let dr_desc = result_diff.cudnn_tensor_desc_softmax()?;
1423 let x_mem = read!(x, self);
1424 let dx_mem = read!(x_diff, self);
1425 let dr_mem = write_only!(result_diff, self);
1426 exec2!(cudnn_framework.log_softmax_backward(
1427 &x.cudnn_tensor_desc_softmax()?,
1428 trans!(x_mem),
1429 &x_diff.cudnn_tensor_desc_softmax()?,
1430 trans!(dx_mem),
1431 &dr_desc,
1432 trans_mut!(dr_mem),
1433 scal_params,
1434 ) => "Unable to execute CUDA cuDNN log softmax Backward.")
1435 }
1436}
1437
1438impl<T> LRN<T> for Backend<Cuda>
1439where
1440 T: Float + Default + DataTypeInfo,
1441{
1442 fn new_lrn_config(&self, n: u32, alpha: f64, beta: f64, k: f64) -> Result<Self::CLRN, Error> {
1443 let cudnn_framework = self.framework().cudnn();
1444 Ok(cudnn_framework
1445 .init_normalization(n, alpha, beta, k)
1446 .unwrap())
1447 }
1448
1449 fn lrn(
1450 &self,
1451 x: &SharedTensor<T>,
1452 result: &mut SharedTensor<T>,
1453 config: &Self::CLRN,
1454 ) -> Result<(), Error> {
1455 let cudnn_framework = self.framework().cudnn();
1456 let scal_params: crate::cudnn::utils::ScalParams<T> =
1457 crate::cudnn::utils::ScalParams::default();
1458 let r_desc = result.cudnn_tensor_desc()?;
1459 let x_mem = read!(x, self);
1460 let r_mem = write_only!(result, self);
1461 exec2!(cudnn_framework.lrn_forward(
1462 config,
1463 &x.cudnn_tensor_desc()?,
1464 trans!(x_mem),
1465 &r_desc,
1466 trans_mut!(r_mem),
1467 scal_params,
1468 ) => "Unable to execute CUDA cuDNN Activation lrn Forward."
1469 )
1470 }
1471
1472 #[allow(unused_variables)]
1473 fn lrn_grad(
1474 &self,
1475 x: &SharedTensor<T>,
1476 x_diff: &SharedTensor<T>,
1477 result: &SharedTensor<T>,
1478 result_diff: &mut SharedTensor<T>,
1479 config: &Self::CLRN,
1480 ) -> Result<(), Error> {
1481 let cudnn_framework = self.framework().cudnn();
1482 let scal_params: crate::cudnn::utils::ScalParams<T> =
1483 crate::cudnn::utils::ScalParams::default();
1484 let dr_desc = result_diff.cudnn_tensor_desc()?;
1485 let x_mem = read!(x, self);
1486 let dx_mem = read!(x_diff, self);
1487 let r_mem = read!(result, self);
1488 let dr_mem = write_only!(result_diff, self);
1489 exec2!(cudnn_framework.lrn_backward(
1490 config,
1491 &x.cudnn_tensor_desc()?,
1492 trans!(x_mem),
1493 &x_diff.cudnn_tensor_desc()?,
1494 trans!(dx_mem),
1495 &result.cudnn_tensor_desc()?,
1496 trans!(r_mem),
1497 &dr_desc,
1498 trans_mut!(dr_mem),
1499 scal_params,
1500 ) => "Unable to execute CUDA cuDNN Activation lrn Backward.")
1501 }
1502}
1503
1504impl<T> Pooling<T> for Backend<Cuda>
1505where
1506 T: Float + Default + DataTypeInfo,
1507{
1508 fn new_pooling_config(
1509 &self,
1510 window: &[i32],
1511 stride: &[i32],
1512 padding: &[i32],
1513 ) -> Result<Self::CPOOL, Error> {
1514 let pooling_avg = crate::cudnn::PoolingDescriptor::new(
1515 crate::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING,
1516 window,
1517 padding,
1518 stride,
1519 )
1520 .unwrap();
1521 let pooling_max = crate::cudnn::PoolingDescriptor::new(
1522 crate::cudnn::cudnnPoolingMode_t::CUDNN_POOLING_MAX,
1523 window,
1524 padding,
1525 stride,
1526 )
1527 .unwrap();
1528 Ok(crate::cudnn::utils::PoolingConfig::new(
1529 pooling_avg,
1530 pooling_max,
1531 ))
1532 }
1533
1534 fn pooling_max(
1535 &self,
1536 x: &SharedTensor<T>,
1537 result: &mut SharedTensor<T>,
1538 config: &Self::CPOOL,
1539 ) -> Result<(), Error> {
1540 let cudnn_framework = self.framework().cudnn();
1541 let scal_params: crate::cudnn::utils::ScalParams<T> =
1542 crate::cudnn::utils::ScalParams::default();
1543
1544 let r_desc = result.cudnn_tensor_desc()?;
1545 let x_mem = read!(x, self);
1546 let r_mem = write_only!(result, self);
1547 exec2!(cudnn_framework.pooling_max_forward(
1548 config,
1549 &x.cudnn_tensor_desc()?,
1550 trans!(x_mem),
1551 &r_desc,
1552 trans_mut!(r_mem),
1553 scal_params,
1554 ) => "Unable to execute CUDA cuDNN max pooling Forward."
1555 )
1556 }
1557
1558 #[allow(unused_variables)]
1559 fn pooling_max_grad(
1560 &self,
1561 x: &SharedTensor<T>,
1562 x_diff: &SharedTensor<T>,
1563 result: &SharedTensor<T>,
1564 result_diff: &mut SharedTensor<T>,
1565 config: &Self::CPOOL,
1566 ) -> Result<(), Error> {
1567 let cudnn_framework = self.framework().cudnn();
1568 let scal_params: crate::cudnn::utils::ScalParams<T> =
1569 crate::cudnn::utils::ScalParams::default();
1570 let dr_desc = result_diff.cudnn_tensor_desc()?;
1571 let x_mem = read!(x, self);
1572 let dx_mem = read!(x_diff, self);
1573 let r_mem = read!(result, self);
1574 let dr_mem = write_only!(result_diff, self);
1575 exec2!(cudnn_framework.pooling_max_backward(
1576 config,
1577 &x.cudnn_tensor_desc()?,
1578 trans!(x_mem),
1579 &x_diff.cudnn_tensor_desc()?,
1580 trans!(dx_mem),
1581 &result.cudnn_tensor_desc()?,
1582 trans!(r_mem),
1583 &dr_desc,
1584 trans_mut!(dr_mem),
1585 scal_params,
1586 ) => "Unable to execute CUDA cuDNN max pooling Backward.")
1587 }
1588
1589 fn pooling_avg(
1590 &self,
1591 x: &SharedTensor<T>,
1592 result: &mut SharedTensor<T>,
1593 config: &Self::CPOOL,
1594 ) -> Result<(), Error> {
1595 let cudnn_framework = self.framework().cudnn();
1596 let scal_params: crate::cudnn::utils::ScalParams<T> =
1597 crate::cudnn::utils::ScalParams::default();
1598 let r_desc = result.cudnn_tensor_desc()?;
1599 let x_mem = read!(x, self);
1600 let r_mem = write_only!(result, self);
1601 exec2!(cudnn_framework.pooling_avg_forward(
1602 config,
1603 &x.cudnn_tensor_desc()?,
1604 trans!(x_mem),
1605 &r_desc,
1606 trans_mut!(r_mem),
1607 scal_params,
1608 ) => "Unable to execute CUDA cuDNN avg pooling Forward.")
1609 }
1610
1611 #[allow(unused_variables)]
1612 fn pooling_avg_grad(
1613 &self,
1614 x: &SharedTensor<T>,
1615 x_diff: &SharedTensor<T>,
1616 result: &SharedTensor<T>,
1617 result_diff: &mut SharedTensor<T>,
1618 config: &Self::CPOOL,
1619 ) -> Result<(), Error> {
1620 let cudnn_framework = self.framework().cudnn();
1621 let scal_params: crate::cudnn::utils::ScalParams<T> =
1622 crate::cudnn::utils::ScalParams::default();
1623 let dr_desc = result_diff.cudnn_tensor_desc()?;
1624 let x_mem = read!(x, self);
1625 let dx_mem = read!(x_diff, self);
1626 let r_mem = read!(result, self);
1627 let dr_mem = write_only!(result_diff, self);
1628 exec2!(cudnn_framework.pooling_avg_backward(
1629 config,
1630 &x.cudnn_tensor_desc()?,
1631 trans!(x_mem),
1632 &x_diff.cudnn_tensor_desc()?,
1633 trans!(dx_mem),
1634 &result.cudnn_tensor_desc()?,
1635 trans!(r_mem),
1636 &dr_desc,
1637 trans_mut!(dr_mem),
1638 scal_params,
1639 ) => "Unable to execute CUDA cuDNN avg pooling Backward.")
1640 }
1641}
1642
1643impl<T> Dropout<T> for Backend<Cuda>
1644where
1645 T: Float + Default + DataTypeInfo,
1646{
1647 fn new_dropout_config(&self, probability: f32, seed: u64) -> Result<Self::CDROP, Error> {
1648 let cudnn_framework = self.framework().cudnn();
1649 Ok(cudnn_framework.init_dropout(probability, seed).unwrap())
1650 }
1651
1652 fn dropout(
1653 &self,
1654 x: &SharedTensor<T>,
1655 result: &mut SharedTensor<T>,
1656 config: &Self::CDROP,
1657 ) -> Result<(), Error> {
1658 let cudnn_framework = self.framework().cudnn();
1659 let r_desc = result.cudnn_tensor_desc()?;
1660 let x_mem = read!(x, self);
1661 let r_mem = write_only!(result, self);
1662
1663 exec2!(cudnn_framework.dropout_forward::<f32>(
1664 config,
1665 &x.cudnn_tensor_desc()?,
1666 trans!(x_mem),
1667 &r_desc,
1668 trans_mut!(r_mem),
1669 ) => "Unable to execute CUDA cuDNN Dropout Forward.")
1670 }
1671
1672 #[allow(unused_variables)]
1673 fn dropout_grad(
1674 &self,
1675 x: &SharedTensor<T>,
1676 x_diff: &SharedTensor<T>,
1677 result: &SharedTensor<T>,
1678 result_diff: &mut SharedTensor<T>,
1679 config: &Self::CDROP,
1680 ) -> Result<(), Error> {
1681 Ok(())
1696 }
1697}
1698
1699#[derive(Debug, thiserror::Error)]
1700pub enum WrappingError {
1701 #[error("{0}")]
1702 Misc(&'static str),
1703
1704 #[error(transparent)]
1705 Inner(#[from] rcudnn::Error),
1706}
1707
1708impl Into<PluginError> for WrappingError {
1709 fn into(self) -> PluginError {
1710 PluginError::PluginInner(Box::new(self))
1711 }
1712}
1713
1714impl Into<co::Error> for WrappingError {
1715 fn into(self) -> co::Error {
1716 co::Error::Plugin(co::plugin::Error::PluginInner(self.into()))
1717 }
1718}