1use std::rc::Rc;
40use std::sync::{Arc, RwLock};
41
42use conn::{DirectionMode, RnnAlgorithm, RnnInputMode, RnnNetworkMode};
43
44use crate::capnp_util::*;
45use crate::co::prelude::*;
46use crate::conn;
47use crate::conn::RnnConfig as connRnnConfig;
48use crate::juice_capnp::rnn_config as capnp_config;
49use crate::layer::*;
50use crate::util::{native_backend, ArcLock};
51use crate::weight::FillerType;
52
53#[derive(Debug, Clone)]
54pub struct Rnn<B: conn::Rnn<f32>> {
56 hidden_size: usize,
57 num_layers: usize,
58 dropout_probability: f32,
59 dropout_seed: u64,
60 rnn_type: RnnNetworkMode,
61 input_mode: RnnInputMode,
62 direction_mode: DirectionMode,
63 workspace: Option<ArcLock<SharedTensor<u8>>>,
64 rnn_config: Option<Rc<B::CRNN>>,
65}
66
67impl<B: conn::Rnn<f32>> Rnn<B> {
68 pub fn from_config(config: &RnnConfig) -> Rnn<B> {
70 Rnn {
71 hidden_size: config.hidden_size,
72 num_layers: config.num_layers,
73 dropout_probability: config.dropout_probability,
74 dropout_seed: config.dropout_seed,
75 rnn_type: config.rnn_type,
76 input_mode: config.input_mode,
77 direction_mode: config.direction_mode,
78 workspace: None,
79 rnn_config: None,
80 }
81 }
82}
83
84impl<B: IBackend + conn::Rnn<f32>> ILayer<B> for Rnn<B> {
85 impl_ilayer_common!();
86
87 fn auto_weight_blobs(&self) -> bool {
88 true
89 }
90
91 fn reshape(
92 &mut self,
93 backend: Rc<B>,
94 input_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
95 input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
96 weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
97 weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
98 output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
99 output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
100 ) {
101 let input = input_data[0].read().unwrap();
102 let mut output_data = output_data[0].write().unwrap();
103 let mut output_gradient = output_gradient[0].write().unwrap();
104
105 let input_shape = input.desc();
107 let batch_size = input_shape[0];
108 let input_size = input_shape[1];
109 let sequence_length = input_shape[2];
110
111 let hidden_size = self.hidden_size;
112
113 let output_shape = &[batch_size, hidden_size, self.num_layers];
114 input_gradient[0].write().unwrap().resize(input_shape).unwrap();
115 output_data.resize(output_shape).unwrap();
116 output_gradient.resize(output_shape).unwrap();
117
118 let config = backend
119 .new_rnn_config(
120 &input,
121 Some(self.dropout_probability),
122 Some(self.dropout_seed),
123 sequence_length as i32,
124 self.rnn_type,
125 self.input_mode,
126 self.direction_mode,
127 RnnAlgorithm::Standard,
133 hidden_size as i32,
134 self.num_layers as i32,
135 batch_size as i32,
136 )
137 .unwrap();
138
139 let filter_dimensions: TensorDesc = backend
140 .generate_rnn_weight_description(&config, batch_size as i32, input_size as i32)
141 .unwrap();
142
143 weights_data[0].write().unwrap().resize(&filter_dimensions).unwrap();
145 weights_data[1].write().unwrap().resize(&(1, self.hidden_size)).unwrap();
147
148 let filler = FillerType::Glorot {
149 input_size: filter_dimensions.clone().size(),
150 output_size: batch_size * self.num_layers * self.hidden_size,
151 };
152
153 let bias_filler = FillerType::Constant { value: 1.0 };
154
155 filler.fill(&mut weights_data[0].write().unwrap());
156 bias_filler.fill(&mut weights_data[1].write().unwrap());
157
158 weights_gradient[0].write().unwrap().resize(&filter_dimensions).unwrap();
159 weights_gradient[1].write().unwrap().resize(&filter_dimensions).unwrap();
160
161 self.rnn_config = Some(Rc::new(config));
162 }
163
164 fn resize_shared_workspace(
165 &mut self,
166 backend: Rc<B>,
167 workspace: Option<ArcLock<SharedTensor<u8>>>,
168 ) -> Option<ArcLock<SharedTensor<u8>>> {
169 let required_size = self.rnn_config.as_ref().unwrap().workspace_size();
170
171 if let Some(old_workspace) = workspace.clone() {
172 let old_workspace_size = old_workspace.read().unwrap().capacity();
173 if old_workspace_size >= required_size {
174 return Some(old_workspace);
175 }
176 }
177 self.workspace = Some(Arc::new(RwLock::new(SharedTensor::<u8>::new(&[required_size]))));
178 self.workspace.clone()
179 }
180}
181
182impl<B: IBackend + conn::Rnn<f32>> ComputeOutput<f32, B> for Rnn<B> {
183 fn compute_output(
184 &self,
185 backend: &B,
186 weights: &[&SharedTensor<f32>],
187 input_data: &[&SharedTensor<f32>],
188 output_data: &mut [&mut SharedTensor<f32>],
189 ) {
190 let input_shape = input_data[0].desc();
191 let batch_size = input_shape[0];
192 let input_size = input_shape[1];
193 let sequence_length = input_shape[2];
194 let rnn_config = self.rnn_config.as_ref().unwrap();
195 let mut workspace = self.workspace.as_ref().unwrap().write().unwrap();
196 backend
197 .rnn_forward(&input_data[0], output_data[0], rnn_config, weights[0], &mut workspace)
198 .unwrap();
199 }
200}
201
202impl<B: IBackend + conn::Rnn<f32>> ComputeInputGradient<f32, B> for Rnn<B> {
203 fn compute_input_gradient(
204 &self,
205 backend: &B,
206 weights_data: &[&SharedTensor<f32>],
207 output_data: &[&SharedTensor<f32>],
208 output_gradients: &[&SharedTensor<f32>],
209 input_data: &[&SharedTensor<f32>],
210 input_gradients: &mut [&mut SharedTensor<f32>],
211 ) {
212 let rnn_config = self.rnn_config.as_ref().unwrap();
213 let mut workspace = self.workspace.as_ref().unwrap().write().unwrap();
214
215 let src = input_data[0];
216 let input_shape = src.desc();
217 let batch_size = input_shape[0];
218 let input_size = input_shape[1];
219 let sequence_length = input_shape[2];
220 let native_backend = native_backend();
221 let readable_input = src.read(native_backend.device()).unwrap().as_slice::<f32>().to_vec();
222
223 backend
224 .rnn_backward_data(
225 &input_data[0],
226 input_gradients[0],
227 &output_data[0],
228 output_gradients[0],
229 rnn_config,
230 weights_data[0],
231 &mut workspace,
232 )
233 .unwrap();
234 }
235}
236
237impl<B: IBackend + conn::Rnn<f32>> ComputeParametersGradient<f32, B> for Rnn<B> {
238 fn compute_parameters_gradient(
239 &self,
240 backend: &B,
241 output_data: &[&SharedTensor<f32>],
242 output_gradients: &[&SharedTensor<f32>],
243 input_data: &[&SharedTensor<f32>],
244 parameters_gradients: &mut [&mut SharedTensor<f32>],
245 ) {
246 let rnn_config = self.rnn_config.as_ref().unwrap();
247 let mut workspace = self.workspace.as_ref().unwrap().write().unwrap();
248
249 backend
251 .rnn_backward_weights(
252 &input_data[0],
253 &output_data[0],
254 &mut parameters_gradients[0],
255 rnn_config,
256 &mut workspace,
257 )
258 .unwrap();
259
260 backend
262 .rnn_backward_weights(
263 &input_data[0],
264 &output_data[0],
265 &mut parameters_gradients[1],
266 rnn_config,
267 &mut workspace,
268 )
269 .unwrap();
270 }
271}
272
273#[derive(Debug, Clone, Copy)]
274pub struct RnnConfig {
277 pub hidden_size: usize,
279 pub num_layers: usize,
281 pub rnn_type: RnnNetworkMode,
283 pub dropout_probability: f32,
285 pub dropout_seed: u64,
287 pub input_mode: RnnInputMode,
289 pub direction_mode: DirectionMode,
291}
292
293impl Into<LayerType> for RnnConfig {
294 fn into(self) -> LayerType {
295 LayerType::Rnn(self)
296 }
297}
298
299impl<'a> CapnpWrite<'a> for RnnConfig {
300 type Builder = capnp_config::Builder<'a>;
301
302 fn write_capnp(&self, builder: &mut Self::Builder) {
304 builder.reborrow().set_num_layers(self.num_layers as u64);
305 builder.reborrow().set_hidden_size(self.hidden_size as u64);
306 builder.reborrow().set_rnn_type(&self.rnn_type.to_string());
307 builder.reborrow().set_dropout_probability(self.dropout_probability);
308 builder.reborrow().set_dropout_seed(self.dropout_seed);
309 builder.reborrow().set_input_mode(&self.input_mode.to_string());
310 builder.reborrow().set_direction_mode(&self.direction_mode.to_string());
311 }
312}
313
314impl<'a> CapnpRead<'a> for RnnConfig {
315 type Reader = capnp_config::Reader<'a>;
316
317 fn read_capnp(reader: Self::Reader) -> Self {
318 let read_num_layers = reader.get_num_layers() as usize;
319 let read_hidden_size = reader.get_hidden_size() as usize;
320 let read_dropout_probability = reader.get_dropout_probability();
321 let read_dropout_seed = reader.get_dropout_seed();
322 let read_rnn_type = RnnNetworkMode::from_string(reader.get_rnn_type().unwrap()).unwrap();
323 let read_input_mode = RnnInputMode::from_string(reader.get_input_mode().unwrap()).unwrap();
324 let read_direction_mode = DirectionMode::from_string(reader.get_direction_mode().unwrap()).unwrap();
325
326 RnnConfig {
327 hidden_size: read_hidden_size,
328 num_layers: read_num_layers,
329 rnn_type: read_rnn_type,
330 dropout_seed: read_dropout_seed,
331 dropout_probability: read_dropout_probability,
332 input_mode: read_input_mode,
333 direction_mode: read_direction_mode,
334 }
335 }
336}
337
338#[cfg(test)]
339mod tests {
340 use std::rc::Rc;
341
342 use conn::Rnn as coRnn;
343 use conn::{DirectionMode, RnnAlgorithm, RnnInputMode, RnnNetworkMode};
344
345 #[cfg(feature = "cuda")]
346 use crate::co::frameworks::cuda::get_cuda_backend as cuda_backend;
347 use crate::co::*;
348 use crate::layer::{ComputeInputGradient, ComputeOutput, ComputeParametersGradient, ILayer};
349 use crate::util::native_backend;
350 use crate::weight::FillerType;
351
352 use super::{Rnn, RnnConfig};
353
354 fn sample_input_64() -> Vec<f32> {
355 vec![
356 0.5f32;64
358 ]
359 }
360
361 fn sample_input_25() -> Vec<f32> {
362 vec![
363 0.5f32;25
365 ]
366 }
367
368 fn sample_output() -> &'static [f32] {
369 [0.6639924, 0.5426032, 0.7527217, 0.3648719, 0.6244233].as_ref()
370 }
371
372 #[test]
373 #[cfg(feature = "cuda")]
374 fn rnn_create_layer() {
375 let cfg = RnnConfig {
376 hidden_size: 8,
377 num_layers: 2,
378 dropout_probability: 0.5,
379 dropout_seed: 0,
380 rnn_type: RnnNetworkMode::LSTM,
381 input_mode: RnnInputMode::LinearInput,
382 direction_mode: DirectionMode::UniDirectional,
383 };
384
385 let native_backend = native_backend();
386 let backend = cuda_backend();
387
388 let batch_size = 5_usize;
389 let sequence_length = 5_usize;
390 let height = 1_usize;
391 let width = 1_usize;
392
393 let hidden_size = cfg.hidden_size;
394 let num_layers = cfg.num_layers;
395
396 let input_shape = &(batch_size, sequence_length, height, width);
397 let mut layer = Rnn::<Backend<Cuda>>::from_config(&cfg);
398
399 let mut input_data = SharedTensor::<f32>::new(input_shape);
400 input_data
401 .write_only(native_backend.device())
402 .unwrap()
403 .as_mut_slice()
404 .copy_from_slice(&sample_input_25());
405
406 let input_shape = input_data.desc();
407
408 let output_shape = &[input_shape[0], input_shape[1], num_layers];
409 let output_data = SharedTensor::<f32>::new(output_shape);
410
411 layer.rnn_config = Some(Rc::from(
412 backend
413 .new_rnn_config(
414 &input_data,
415 None,
416 None,
417 sequence_length as i32,
418 RnnNetworkMode::LSTM,
419 RnnInputMode::LinearInput,
420 DirectionMode::UniDirectional,
421 RnnAlgorithm::Standard,
422 hidden_size as i32,
423 num_layers as i32,
424 input_shape[0] as i32,
425 )
426 .unwrap(),
427 ));
428 }
429
430 #[test]
431 #[cfg(feature = "cuda")]
432 fn rnn_roundtrip_pass() {
433 let _ = env_logger::builder()
434 .is_test(true)
435 .filter_level(log::LevelFilter::Trace)
436 .try_init();
437
438 let backend: Backend<Cuda> = cuda_backend();
439 const SEQUENCE_LENGTH: usize = 7;
440 const HIDDEN_SIZE: usize = 5;
441 const NUM_LAYERS: usize = 3;
442 const BATCH_SIZE: usize = 2;
443 const INPUT_SIZE: usize = 11;
444
445 let cfg = RnnConfig {
446 hidden_size: HIDDEN_SIZE,
447 num_layers: NUM_LAYERS,
448 dropout_probability: 0.5,
449 dropout_seed: 1337,
450 rnn_type: RnnNetworkMode::LSTM,
451 input_mode: RnnInputMode::LinearInput,
452 direction_mode: DirectionMode::UniDirectional,
453 };
454
455 let native_backend = native_backend();
456 let mut layer = Rnn::<Backend<Cuda>>::from_config(&cfg);
457
458 let input_shape = vec![BATCH_SIZE, INPUT_SIZE, 1, 1];
459
460 let mut input_data = SharedTensor::<f32>::new(&input_shape);
461 let mut input_gradients = SharedTensor::<f32>::new(&input_shape);
462
463 let data = std::iter::repeat(0.5_f32)
464 .take(BATCH_SIZE * INPUT_SIZE)
465 .collect::<Vec<f32>>();
466 input_data
467 .write_only(native_backend.device())
468 .unwrap()
469 .as_mut_slice()
470 .copy_from_slice(&data);
471
472 let output_shape = vec![BATCH_SIZE, HIDDEN_SIZE, 1];
473
474 let mut output_data = SharedTensor::<f32>::new(&output_shape);
475
476 let config = backend
477 .new_rnn_config(
478 &input_data,
479 None,
480 None,
481 SEQUENCE_LENGTH as i32,
482 RnnNetworkMode::LSTM,
483 RnnInputMode::LinearInput,
484 DirectionMode::UniDirectional,
485 RnnAlgorithm::Standard,
486 HIDDEN_SIZE as i32,
487 NUM_LAYERS as i32,
488 BATCH_SIZE as i32,
489 )
490 .unwrap();
491
492 let filter_dimensions = <Backend<Cuda> as conn::Rnn<f32>>::generate_rnn_weight_description(
493 &backend,
494 &config,
495 BATCH_SIZE as i32,
496 INPUT_SIZE as i32,
497 )
498 .unwrap();
499
500 layer.rnn_config = Some(Rc::from(config));
501
502 let mut weights_data = vec![
503 SharedTensor::<f32>::new(&filter_dimensions),
504 SharedTensor::<f32>::new(&filter_dimensions), ];
506
507 let weights_gradient = vec![
508 SharedTensor::<f32>::new(&filter_dimensions),
509 SharedTensor::<f32>::new(&(1, SEQUENCE_LENGTH)), ];
511
512 let filler = FillerType::Constant { value: 0.02 };
513
514 filler.fill(&mut weights_data[0]);
515 filler.fill(&mut weights_data[1]);
516
517 layer.resize_shared_workspace(Rc::from(cuda_backend()), None);
518
519 layer.compute_output(
520 &backend,
521 &weights_data.iter().collect::<Vec<_>>(),
522 &[&input_data],
523 &mut [&mut output_data],
524 );
525
526 let mut output_gradients = SharedTensor::<f32>::new(&output_shape);
528 filler.fill(&mut output_gradients);
529
530 layer.compute_input_gradient(
531 &backend,
532 &weights_data.iter().collect::<Vec<_>>(),
533 &[&output_data],
534 &[&output_gradients],
535 &[&input_data],
536 &mut [&mut input_gradients],
537 );
538
539 layer.compute_parameters_gradient(
540 &backend,
541 &[&output_data],
542 &[&output_gradients],
543 &[&input_data],
544 &mut weights_data.iter_mut().collect::<Vec<_>>(),
545 );
546 }
547}