scirs2-neural 0.3.3

Neural network building blocks module for SciRS2 (scirs2-neural) - Minimal Version
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
// Thread-safe RNN implementations
//
// This module provides thread-safe versions of the RNN, LSTM, and GRU layers
// that can be safely used across multiple threads by using Arc<RwLock<>> instead
// of RefCell for internal state.

use crate::error::{NeuralError, Result};
use crate::layers::Layer;
use scirs2_core::ndarray::{Array, ArrayView, Axis, Ix2, IxDyn, ScalarOperand};
use scirs2_core::numeric::{Float, NumAssign};
use scirs2_core::random::{Rng, RngExt, SeedableRng};
use std::fmt::Debug;
use std::sync::{Arc, RwLock};

/// Activation function types for recurrent layers
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum RecurrentActivation {
    /// Hyperbolic tangent (tanh) activation
    Tanh,
    /// Sigmoid activation
    Sigmoid,
    /// Rectified Linear Unit (ReLU)
    ReLU,
}

impl RecurrentActivation {
    /// Apply the activation function
    fn apply<F: Float + NumAssign>(&self, x: F) -> F {
        match self {
            RecurrentActivation::Tanh => x.tanh(),
            RecurrentActivation::Sigmoid => F::one() / (F::one() + (-x).exp()),
            RecurrentActivation::ReLU => {
                if x > F::zero() {
                    x
                } else {
                    F::zero()
                }
            }
        }
    }

    /// Apply the activation function to an array
    #[allow(dead_code)]
    fn apply_array<F: Float + NumAssign + ScalarOperand>(
        &self,
        x: &Array<F, IxDyn>,
    ) -> Array<F, IxDyn> {
        match self {
            RecurrentActivation::Tanh => x.mapv(|v| v.tanh()),
            RecurrentActivation::Sigmoid => x.mapv(|v| F::one() / (F::one() + (-v).exp())),
            RecurrentActivation::ReLU => x.mapv(|v| if v > F::zero() { v } else { F::zero() }),
        }
    }
}

/// Thread-safe version of RNN for sequence processing
///
/// This implementation replaces RefCell with Arc<RwLock<>> for thread safety.
pub struct ThreadSafeRNN<F: Float + Debug + Send + Sync + NumAssign> {
    /// Input size (number of input features)
    pub input_size: usize,
    /// Hidden size (number of hidden units)
    pub hidden_size: usize,
    /// Activation function
    pub activation: RecurrentActivation,
    /// Input-to-hidden weights
    weight_ih: Array<F, IxDyn>,
    /// Hidden-to-hidden weights
    weight_hh: Array<F, IxDyn>,
    /// Input-to-hidden bias
    bias_ih: Array<F, IxDyn>,
    /// Hidden-to-hidden bias
    bias_hh: Array<F, IxDyn>,
    /// Gradient of input-to-hidden weights
    dweight_ih: Array<F, IxDyn>,
    /// Gradient of hidden-to-hidden weights
    dweight_hh: Array<F, IxDyn>,
    /// Gradient of input-to-hidden bias
    dbias_ih: Array<F, IxDyn>,
    /// Gradient of hidden-to-hidden bias
    dbias_hh: Array<F, IxDyn>,
    /// Input cache for backward pass
    input_cache: Arc<RwLock<Option<Array<F, IxDyn>>>>,
    /// Hidden states cache for backward pass
    hidden_states_cache: Arc<RwLock<Option<Array<F, IxDyn>>>>,
}

impl<F: Float + Debug + Send + Sync + ScalarOperand + NumAssign + 'static> ThreadSafeRNN<F> {
    /// Create a new thread-safe RNN layer
    pub fn new<R: Rng>(
        input_size: usize,
        hidden_size: usize,
        activation: RecurrentActivation,
        rng: &mut R,
    ) -> Result<Self> {
        // Validate parameters
        if input_size == 0 || hidden_size == 0 {
            return Err(NeuralError::InvalidArchitecture(
                "Input size and hidden size must be positive".to_string(),
            ));
        }

        // Initialize weights with Xavier/Glorot initialization
        let scale_ih = F::from(1.0 / (input_size as f64).sqrt()).ok_or_else(|| {
            NeuralError::InvalidArchitecture("Failed to convert scale factor".to_string())
        })?;
        let scale_hh = F::from(1.0 / (hidden_size as f64).sqrt()).ok_or_else(|| {
            NeuralError::InvalidArchitecture("Failed to convert hidden size scale".to_string())
        })?;

        // Initialize input-to-hidden weights
        let mut weight_ih_vec: Vec<F> = Vec::with_capacity(hidden_size * input_size);
        for _ in 0..(hidden_size * input_size) {
            let rand_val = rng.random_range(-1.0f64..1.0f64);
            let val = F::from(rand_val).ok_or_else(|| {
                NeuralError::InvalidArchitecture("Failed to convert random value".to_string())
            })?;
            weight_ih_vec.push(val * scale_ih);
        }
        let weight_ih = Array::from_shape_vec(IxDyn(&[hidden_size, input_size]), weight_ih_vec)
            .map_err(|e| {
                NeuralError::InvalidArchitecture(format!("Failed to create weights array: {}", e))
            })?;

        // Initialize hidden-to-hidden weights
        let mut weight_hh_vec: Vec<F> = Vec::with_capacity(hidden_size * hidden_size);
        for _ in 0..(hidden_size * hidden_size) {
            let rand_val = rng.random_range(-1.0f64..1.0f64);
            let val = F::from(rand_val).ok_or_else(|| {
                NeuralError::InvalidArchitecture("Failed to convert random value".to_string())
            })?;
            weight_hh_vec.push(val * scale_hh);
        }
        let weight_hh = Array::from_shape_vec(IxDyn(&[hidden_size, hidden_size]), weight_hh_vec)
            .map_err(|e| {
                NeuralError::InvalidArchitecture(format!("Failed to create weights array: {}", e))
            })?;

        // Initialize biases
        let bias_ih = Array::zeros(IxDyn(&[hidden_size]));
        let bias_hh = Array::zeros(IxDyn(&[hidden_size]));

        // Initialize gradients
        let dweight_ih = Array::zeros(weight_ih.dim());
        let dweight_hh = Array::zeros(weight_hh.dim());
        let dbias_ih = Array::zeros(bias_ih.dim());
        let dbias_hh = Array::zeros(bias_hh.dim());

        Ok(Self {
            input_size,
            hidden_size,
            activation,
            weight_ih,
            weight_hh,
            bias_ih,
            bias_hh,
            dweight_ih,
            dweight_hh,
            dbias_ih,
            dbias_hh,
            input_cache: Arc::new(RwLock::new(None)),
            hidden_states_cache: Arc::new(RwLock::new(None)),
        })
    }

    /// Helper method to compute one step of the RNN
    fn step(&self, x: &ArrayView<F, IxDyn>, h: &ArrayView<F, IxDyn>) -> Result<Array<F, IxDyn>> {
        let xshape = x.shape();
        let hshape = h.shape();
        let batch_size = xshape[0];

        // Validate shapes
        if xshape[1] != self.input_size {
            return Err(NeuralError::InferenceError(format!(
                "Input feature dimension mismatch: expected {}, got {}",
                self.input_size, xshape[1]
            )));
        }
        if hshape[1] != self.hidden_size {
            return Err(NeuralError::InferenceError(format!(
                "Hidden state dimension mismatch: expected {}, got {}",
                self.hidden_size, hshape[1]
            )));
        }
        if xshape[0] != hshape[0] {
            return Err(NeuralError::InferenceError(format!(
                "Batch size mismatch: input has {}, hidden state has {}",
                xshape[0], hshape[0]
            )));
        }

        // Initialize output
        let mut new_h = Array::zeros((batch_size, self.hidden_size));

        // Compute h_t = activation(W_ih * x_t + b_ih + W_hh * h_(t-1) + b_hh)
        for b in 0..batch_size {
            for i in 0..self.hidden_size {
                // Input-to-hidden contribution: W_ih * x_t + b_ih
                let mut ih_sum = self.bias_ih[i];
                for j in 0..self.input_size {
                    ih_sum += self.weight_ih[[i, j]] * x[[b, j]];
                }

                // Hidden-to-hidden contribution: W_hh * h_(t-1) + b_hh
                let mut hh_sum = self.bias_hh[i];
                for j in 0..self.hidden_size {
                    hh_sum += self.weight_hh[[i, j]] * h[[b, j]];
                }

                // Apply activation
                new_h[[b, i]] = self.activation.apply(ih_sum + hh_sum);
            }
        }

        // Convert to IxDyn dimension
        let new_h_dyn = new_h.into_dyn();
        Ok(new_h_dyn)
    }
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> Layer<F>
    for ThreadSafeRNN<F>
{
    fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>> {
        // Cache input for backward pass
        if let Ok(mut cache) = self.input_cache.write() {
            *cache = Some(input.to_owned());
        } else {
            return Err(NeuralError::InferenceError(
                "Failed to acquire write lock on input cache".to_string(),
            ));
        }

        // Validate input shape
        let inputshape = input.shape();
        if inputshape.len() != 3 {
            return Err(NeuralError::InferenceError(format!(
                "Expected 3D input [batch_size, seq_len, features], got {:?}",
                inputshape
            )));
        }

        let batch_size = inputshape[0];
        let seq_len = inputshape[1];
        let features = inputshape[2];

        if features != self.input_size {
            return Err(NeuralError::InferenceError(format!(
                "Input features dimension mismatch: expected {}, got {}",
                self.input_size, features
            )));
        }

        // Initialize hidden state to zeros
        let mut h = Array::zeros((batch_size, self.hidden_size));

        // Initialize output array to store all hidden states
        let mut all_hidden_states = Array::zeros((batch_size, seq_len, self.hidden_size));

        // Process each time step
        for t in 0..seq_len {
            // Extract input at time t
            let x_t = input.slice(scirs2_core::ndarray::s![.., t, ..]);

            // Process one step
            let x_t_view = x_t.view().into_dyn();
            let h_view = h.view().into_dyn();
            h = self
                .step(&x_t_view, &h_view)?
                .into_dimensionality::<Ix2>()
                .expect("Operation failed");

            // Store hidden state
            for b in 0..batch_size {
                for i in 0..self.hidden_size {
                    all_hidden_states[[b, t, i]] = h[[b, i]];
                }
            }
        }

        // Cache all hidden states for backward pass
        if let Ok(mut cache) = self.hidden_states_cache.write() {
            *cache = Some(all_hidden_states.clone().into_dyn());
        } else {
            return Err(NeuralError::InferenceError(
                "Failed to acquire write lock on hidden states cache".to_string(),
            ));
        }

        // Return with correct dynamic dimension
        Ok(all_hidden_states.into_dyn())
    }

    fn backward(
        &self,
        input: &Array<F, IxDyn>,
        _grad_output: &Array<F, IxDyn>,
    ) -> Result<Array<F, IxDyn>> {
        // Retrieve cached values
        let input_ref = match self.input_cache.read() {
            Ok(guard) => guard,
            Err(_) => {
                return Err(NeuralError::InferenceError(
                    "Failed to acquire read lock on input cache".to_string(),
                ))
            }
        };

        let hidden_states_ref = match self.hidden_states_cache.read() {
            Ok(guard) => guard,
            Err(_) => {
                return Err(NeuralError::InferenceError(
                    "Failed to acquire read lock on hidden states cache".to_string(),
                ))
            }
        };

        if input_ref.is_none() || hidden_states_ref.is_none() {
            return Err(NeuralError::InferenceError(
                "No cached values for backward pass. Call forward() first.".to_string(),
            ));
        }

        // In a real implementation, we would compute gradients for all parameters
        // and return the gradient with respect to the input
        // Here we're providing a simplified version that returns a gradient of zeros
        // with the correct shape
        let grad_input = Array::zeros(input.dim());
        Ok(grad_input)
    }

    fn update(&mut self, learningrate: F) -> Result<()> {
        // Apply a small update to parameters (placeholder)
        let small_change = F::from(0.001).expect("Failed to convert constant to float");
        let lr = small_change * learningrate;

        // Update weights and biases
        for w in self.weight_ih.iter_mut() {
            *w -= lr * self.dweight_ih[[0, 0]];
        }
        for w in self.weight_hh.iter_mut() {
            *w -= lr * self.dweight_hh[[0, 0]];
        }
        for b in self.bias_ih.iter_mut() {
            *b -= lr * self.dbias_ih[[0]];
        }
        for b in self.bias_hh.iter_mut() {
            *b -= lr * self.dbias_hh[[0]];
        }
        Ok(())
    }

    fn as_any(&self) -> &dyn std::any::Any {
        self
    }

    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
        self
    }
}

/// Thread-safe version of Bidirectional RNN wrapper
/// This layer wraps a recurrent layer to enable bidirectional processing
/// while ensuring thread safety with Arc<RwLock<>> instead of RefCell.
pub struct ThreadSafeBidirectional<F: Float + Debug + Send + Sync + NumAssign> {
    /// Forward RNN layer
    forward_layer: Box<dyn Layer<F> + Send + Sync>,
    /// Backward RNN layer (optional)
    backward_layer: Option<Box<dyn Layer<F> + Send + Sync>>,
    /// Name of the layer (optional)
    name: Option<String>,
}

impl<F: Float + Debug + Send + Sync + ScalarOperand + NumAssign + 'static>
    ThreadSafeBidirectional<F>
{
    /// Create a new Bidirectional RNN wrapper
    ///
    /// # Arguments
    /// * `layer` - The RNN layer to make bidirectional
    /// * `name` - Optional name for the layer
    ///
    /// # Returns
    /// * A new Bidirectional RNN wrapper
    pub fn new(layer: Box<dyn Layer<F> + Send + Sync>, name: Option<&str>) -> Result<Self> {
        // For now, we'll create a dummy backward RNN with the same configuration
        // In a real implementation, we would create a proper clone of the layer
        let backward_layer = if let Some(rnn) = layer.as_any().downcast_ref::<ThreadSafeRNN<F>>() {
            // If it's a ThreadSafeRNN, create a new one with the same parameters
            let mut rng = scirs2_core::random::rngs::SmallRng::from_seed([42; 32]);
            let backward_rnn =
                ThreadSafeRNN::<F>::new(rnn.input_size, rnn.hidden_size, rnn.activation, &mut rng)?;
            Some(Box::new(backward_rnn) as Box<dyn Layer<F> + Send + Sync>)
        } else {
            // If not, just set it to None for now
            None
        };

        Ok(Self {
            forward_layer: layer,
            backward_layer,
            name: name.map(|s| s.to_string()),
        })
    }
}

// Custom implementation of Debug for ThreadSafeBidirectional
impl<F: Float + Debug + Send + Sync + NumAssign> std::fmt::Debug for ThreadSafeBidirectional<F> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("ThreadSafeBidirectional")
            .field("name", &self.name)
            .finish()
    }
}

// We can't implement Clone for ThreadSafeBidirectional because Box<dyn Layer> can't be cloned.
// We'll just create a dummy clone implementation for debugging purposes only.
// This won't actually clone the layers.
impl<F: Float + Debug + Send + Sync + ScalarOperand + NumAssign + 'static> Clone
    for ThreadSafeBidirectional<F>
{
    fn clone(&self) -> Self {
        // This is NOT a real clone and should not be used for actual computation.
        // It's provided just to satisfy the Clone trait requirement for debugging.
        // The cloned object will have empty layers.
        // Create a dummy RNN for the forward layer
        let mut rng = scirs2_core::random::rngs::SmallRng::from_seed([42; 32]);
        let dummy_rnn = ThreadSafeRNN::<F>::new(1, 1, RecurrentActivation::Tanh, &mut rng)
            .expect("Failed to create dummy RNN");

        Self {
            forward_layer: Box::new(dummy_rnn),
            backward_layer: None,
            name: self.name.clone(),
        }
    }
}

impl<F: Float + Debug + ScalarOperand + Send + Sync + NumAssign + 'static> Layer<F>
    for ThreadSafeBidirectional<F>
{
    fn forward(&self, input: &Array<F, IxDyn>) -> Result<Array<F, IxDyn>> {
        // Run forward layer
        let forward_output = self.forward_layer.forward(input)?;

        // If we have a backward layer, run it on reversed input and combine
        if let Some(ref backward_layer) = self.backward_layer {
            // Reverse input along sequence dimension (axis 1)
            let mut reversed_input = input.to_owned();
            reversed_input.invert_axis(Axis(1));

            // Run backward layer
            let mut backward_output = backward_layer.forward(&reversed_input)?;

            // Reverse backward output to align with forward output
            backward_output.invert_axis(Axis(1));

            // Combine forward and backward outputs along last dimension
            let combined = scirs2_core::ndarray::stack(
                Axis(2),
                &[forward_output.view(), backward_output.view()],
            )?;

            // Reshape to flatten the last dimension
            let shape = combined.shape();
            let newshape = (shape[0], shape[1], shape[2] * shape[3]);
            let output = combined.into_shape_with_order(newshape)?.into_dyn();
            Ok(output)
        } else {
            // If no backward layer, just return forward output
            Ok(forward_output)
        }
    }

    fn backward(
        &self,
        _input: &Array<F, IxDyn>,
        _grad_output: &Array<F, IxDyn>,
    ) -> Result<Array<F, IxDyn>> {
        // For now, just return a placeholder gradient
        let grad_input = Array::zeros(_input.dim());
        Ok(grad_input)
    }

    fn update(&mut self, learningrate: F) -> Result<()> {
        // Update forward layer
        self.forward_layer.update(learningrate)?;

        // Update backward layer if present
        if let Some(ref mut backward_layer) = self.backward_layer {
            backward_layer.update(learningrate)?;
        }

        Ok(())
    }

    fn as_any(&self) -> &dyn std::any::Any {
        self
    }

    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
        self
    }
}