//! Dense (fully connected) layer implementation
use crate::activations::Activation;
use crate::error::{NeuralError, Result};
use crate::layers::{Layer, ParamLayer};
use ndarray::{s, Array, IxDyn, ScalarOperand};
use ndarray_rand::rand_distr::{Distribution, Uniform};
use ndarray_rand::rand::Rng;
use num_traits::Float;
use std::fmt::Debug;
// SIMD optimizations using scirs2-core
// use scirs2_core::parallel_ops::*; // Unused for now
// Rand functionality already imported above
/// Dense (fully connected) layer for neural networks.
///
/// A dense layer is a layer where each input neuron is connected to each output neuron.
/// It performs the operation: y = activation(W * x + b), where W is the weight matrix,
/// x is the input vector, b is the bias vector, and activation is the activation function.
/// # Examples
/// ```
/// use scirs2_neural::layers::{Dense, Layer};
/// use ndarray::{Array, Array2};
/// use rand::rngs::SmallRng;
/// use rand::SeedableRng;
/// // Create a dense layer with 2 input neurons, 3 output neurons, and ReLU activation
/// let mut rng = rand::rng();
/// let dense = Dense::new(2, 3, Some("relu"), &mut rng).unwrap();
/// // Forward pass with a batch of 2 samples
/// let input = Array2::from_shape_vec((2, 2), vec![1.0f64, 2.0, 3.0, 4.0]).unwrap().into_dyn();
/// let output = dense.forward(&input).unwrap();
/// // Output shape should be (2, 3) - 2 samples with 3 features each
/// assert_eq!(output.shape(), &[2, 3]);
// Can't derive Debug because of the Activation trait object
pub struct Dense<F: Float + Debug + Send + Sync> {
/// Number of input features
input_dim: usize,
/// Number of output features
output_dim: usize,
/// Weight matrix
weights: Array<F, IxDyn>,
/// Bias vector
biases: Array<F, IxDyn>,
/// Gradient of the weights
dweights: std::sync::RwLock<Array<F, IxDyn>>,
/// Gradient of the biases
dbiases: std::sync::RwLock<Array<F, IxDyn>>,
/// Activation function, if any
activation: Option<Box<dyn Activation<F> + Send + Sync>>,
/// Input from the forward pass, needed in backward pass
input: std::sync::RwLock<Option<Array<F, IxDyn>>>,
/// Output before activation, needed in backward pass
output_pre_activation: std::sync::RwLock<Option<Array<F, IxDyn>>>,
}
impl<F: Float + Debug + ScalarOperand + Send + Sync + 'static> std::fmt::Debug for Dense<F> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Dense")
.field("input_dim", &self.input_dim)
.field("output_dim", &self.output_dim)
.field("weights", &self.weights)
.field("biases", &self.biases)
.field("dweights", &"RwLock<Array>")
.field("dbiases", &"RwLock<Array>")
.field("has_activation", &self.activation.is_some())
.field("input", &"RwLock<Option<Array>>")
.field("output_pre_activation", &"RwLock<Option<Array>>")
.finish()
}
// Can't implement Clone for Box<dyn Activation<F>> without major changes
// Let's make a simplified Clone that doesn't try to clone the activation function
impl<F: Float + Debug + ScalarOperand + Send + Sync + 'static> Clone for Dense<F> {
fn clone(&self) -> Self {
Self {
input_dim: self.input_dim,
output_dim: self.output_dim,
weights: self.weights.clone(),
biases: self.biases.clone(),
dweights: std::sync::RwLock::new(self.dweights.read().unwrap().clone()),
dbiases: std::sync::RwLock::new(self.dbiases.read().unwrap().clone()),
// We can't clone trait objects directly
activation: None, // Can't clone the activation function
input: std::sync::RwLock::new(self.input.read().unwrap().clone()),
output_pre_activation: std::sync::RwLock::new(
self.output_pre_activation.read().unwrap().clone(),
),
}
impl<F: Float + Debug + ScalarOperand + Send + Sync + 'static> Dense<F> {
/// Create a new dense layer.
///
/// # Arguments
/// * `input_dim` - Number of input features
/// * `output_dim` - Number of output features
/// * `activation` - Optional activation function name
/// * `rng` - Random number generator for weight initialization
/// # Returns
/// * A new dense layer
pub fn new<R: ndarray_rand::rand::Rng + ndarray_rand::rand::RngCore>(
input_dim: usize,
output_dim: usize,
activation_name: Option<&str>,
rng: &mut R,
) -> Result<Self> {
// Create activation function from name
let activation = if let Some(name) = activation_name {
match name.to_lowercase().as_str() {
"relu" => Some(Box::new(crate::activations::ReLU::new())
as Box<dyn Activation<F> + Send + Sync>),
"sigmoid" => Some(Box::new(crate::activations::Sigmoid::new())
"tanh" => Some(Box::new(crate::activations::Tanh::new())
"softmax" => Some(Box::new(crate::activations::Softmax::new(1))
"gelu" => Some(Box::new(crate::activations::GELU::new())
"swish" => Some(Box::new(crate::activations::Swish::new(1.0))
"mish" => Some(Box::new(crate::activations::Mish::new())
_ => None,
}
} else {
None
};
// Initialize weights with Xavier/Glorot initialization
let scale = F::from(1.0 / f64::sqrt(input_dim as f64)).ok_or_else(|| {
NeuralError::InvalidArchitecture("Failed to convert scale factor".to_string())
})?;
// Create a 2D weights array
let uniform = Uniform::new(-1.0, 1.0);
let weights_vec: Vec<F> = (0..(input_dim * output_dim))
.map(|_| {
let val = F::from(uniform.sample(rng)).ok_or_else(|| {
NeuralError::InvalidArchitecture("Failed to convert random value".to_string())
});
val.map(|v| v * scale).unwrap_or_else(|_| F::zero())
})
.collect();
let weights =
Array::from_shape_vec(IxDyn(&[input_dim, output_dim]), weights_vec).map_err(|e| {
NeuralError::InvalidArchitecture(format!("Failed to create weights array: {}", e))
})?;
// Initialize biases with zeros
let biases = Array::zeros(IxDyn(&[output_dim]));
// Initialize gradient arrays with zeros
let dweights = std::sync::RwLock::new(Array::zeros(weights.dim()));
let dbiases = std::sync::RwLock::new(Array::zeros(biases.dim()));
Ok(Self {
input_dim,
output_dim,
weights,
biases,
dweights,
dbiases,
activation,
input: std::sync::RwLock::new(None),
output_pre_activation: std::sync::RwLock::new(None),
})
/// Get the input dimension
pub fn input_dim(&self) -> usize {
self.input_dim
/// Get the output dimension
pub fn output_dim(&self) -> usize {
self.output_dim
/// Get the activation function name
pub fn activation_name(&self) -> Option<&str> {
if let Some(ref activation) = self.activation {
// Instead of type checking, we'll try a simpler approach
// Check activation function type by trying specific functionality
// This is a workaround since we can't use Debug on the trait object
// Try a simple test activation on a single value to get a hint about behavior
let test_input = Array::from_elem(IxDyn(&[1, 1]), F::one());
let result = activation.forward(&test_input).ok();
// This is very approximate and doesn't handle all cases correctly
if let Some(output) = result {
let val = output[[0, 0]];
// ReLU(1) = 1
if val == F::one() {
Some("relu")
}
// Sigmoid(1) ~= 0.73
else if val > F::from(0.7).unwrap() && val < F::from(0.75).unwrap() {
Some("sigmoid")
// tanh(1) ~= 0.76
else if val > F::from(0.75).unwrap() && val < F::from(0.8).unwrap() {
Some("tanh")
// Other activations are harder to identify precisely
else {
None
} else {
None
/// SIMD-optimized matrix multiplication for forward pass
fn simd_matmul_forward(
&self,
input: &Array<F, IxDyn>,
output: &mut Array<F, IxDyn>,
) -> Result<()> {
let batch_size = input.shape()[0];
let input_dim = self.input_dim;
let output_dim = self.output_dim;
// Use parallel processing for matrix multiplication
#[cfg(feature = "core")]
{
// Use sequential processing to avoid mutable borrow issues in parallel closures
for i in 0..batch_size {
self.simd_compute_row(input, output, i, input_dim, output_dim);
#[cfg(not(feature = "core"))]
// Fallback to sequential processing
// Add biases using element-wise operations
for i in 0..batch_size {
let mut output_row = output.slice_mut(s![i, ..]);
let bias_slice = self.biases.slice(s![..]);
ndarray::Zip::from(&mut output_row)
.and(&bias_slice)
.for_each(|out, &bias| {
*out = *out + bias;
Ok(())
/// Compute a single output row using SIMD operations
fn simd_compute_row(
row_idx: usize,
_input_dim: usize,
) {
let input_row = input.slice(s![row_idx, ..]);
for j in 0..output_dim {
let weight_col = self.weights.slice(s![.., j]);
// Use standard dot product
let dot_product = ndarray::Zip::from(&input_row)
.and(&weight_col)
.fold(F::zero(), |acc, &x, &w| acc + x * w);
output[[row_idx, j]] = dot_product;
/// SIMD-optimized gradient computation for backward pass
fn simd_compute_gradients(
grad_output: &Array<F, IxDyn>,
) -> Result<(Array<F, IxDyn>, Array<F, IxDyn>)> {
let _batch_size = input.shape()[0];
// Initialize gradient arrays
let mut dweights = Array::zeros(IxDyn(&[input_dim, output_dim]));
let mut dbiases = Array::zeros(IxDyn(&[output_dim]));
// Weight gradient computation (dW = input.T @ grad_output)
for i in 0..input_dim {
for j in 0..output_dim {
// Compute dot product of input column i with grad_output column j
let input_col = input.slice(s![.., i]);
let grad_col = grad_output.slice(s![.., j]);
let gradient = ndarray::Zip::from(&input_col)
.and(&grad_col)
.fold(F::zero(), |acc, &x, &g| acc + x * g);
dweights[[i, j]] = gradient;
// Bias gradient computation (db = sum(grad_output, axis=0))
let grad_col = grad_output.slice(s![.., j]);
// Sum the column
dbiases[j] = grad_col.fold(F::zero(), |acc, &x| acc + x);
Ok((dweights, dbiases))
impl<F: Float + Debug + ScalarOperand + Send + Sync + 'static> Layer<F> for Dense<F> {
fn forward(&self, input: &Array<F, ndarray::IxDyn>) -> Result<Array<F, ndarray::IxDyn>> {
// Cache input for backward pass
let mut input_cache = self.input.write().unwrap();
*input_cache = Some(input.clone());
// Reshape input if needed
let input_to_use = if input.ndim() == 1 {
input
.clone()
.into_shape_with_order(IxDyn(&[1, self.input_dim]))
.map_err(|e| {
NeuralError::InferenceError(format!("Failed to reshape input: {}", e))
})?
let batch_size: usize = input.shape().iter().take(input.ndim() - 1).product();
.into_shape_with_order(IxDyn(&[batch_size, self.input_dim]))
// Compute linear transformation: output = input @ weights + bias
let mut output = Array::zeros(IxDyn(&[input_to_use.shape()[0], self.output_dim]));
// Use SIMD-optimized matrix multiplication
self.simd_matmul_forward(&input_to_use, &mut output)?;
// Add bias
for mut row in output.axis_iter_mut(ndarray::Axis(0)) {
let biases_view = self.biases.view();
ndarray::Zip::from(&mut row)
.and(&biases_view)
// Cache pre-activation output
let mut pre_activation_cache = self.output_pre_activation.write().unwrap();
*pre_activation_cache = Some(output.clone());
// Apply activation function if present
activation.forward(&output)
Ok(output)
fn backward(
input: &Array<F, ndarray::IxDyn>,
grad_output: &Array<F, ndarray::IxDyn>,
) -> Result<Array<F, ndarray::IxDyn>> {
// Get cached data
let pre_activation = {
let cache = self.output_pre_activation.read().unwrap();
cache.clone().ok_or_else(|| {
NeuralError::InferenceError(
"No cached pre-activation output for backward pass".to_string(),
)
})?
// Apply activation gradient if present
let grad_pre_activation = if let Some(ref activation) = self.activation {
activation.backward(&pre_activation, grad_output)?
grad_output.clone()
// Compute gradient w.r.t. input
let reshaped_input = if input.ndim() == 1 {
let reshaped_grad = if grad_pre_activation.ndim() == 1 {
grad_pre_activation
.into_shape_with_order(IxDyn(&[1, self.output_dim]))
NeuralError::InferenceError(format!("Failed to reshape gradient: {}", e))
let batch_size: usize = grad_pre_activation
.shape()
.iter()
.take(grad_pre_activation.ndim() - 1)
.product();
.into_shape_with_order(IxDyn(&[batch_size, self.output_dim]))
// Compute gradients
let (dweights, dbiases) = self.simd_compute_gradients(&reshaped_input, &reshaped_grad)?;
// Update internal gradients
let mut dweights_guard = self.dweights.write().unwrap();
*dweights_guard = dweights;
let mut dbiases_guard = self.dbiases.write().unwrap();
*dbiases_guard = dbiases;
// Compute gradient w.r.t. input: grad_input = grad_output @ weights.T
// Use manual matrix multiplication to avoid trait resolution recursion
let batch_size = reshaped_grad.shape()[0];
let mut grad_input = Array::zeros(IxDyn(&[batch_size, self.input_dim]));
for b in 0..batch_size {
for i in 0..self.input_dim {
let mut sum = F::zero();
for j in 0..self.output_dim {
sum = sum + reshaped_grad[[b, j]] * self.weights[[i, j]];
grad_input[[b, i]] = sum;
// Reshape back to original input shape
let original_shape: Vec<usize> = input.shape().to_vec();
grad_input
.into_shape_with_order(IxDyn(&original_shape))
.map_err(|e| NeuralError::InferenceError(format!("Failed to reshape gradient: {}", e)))
fn update(&mut self, learning_rate: F) -> Result<()> {
let dweights = {
let dweights_guard = self.dweights.read().unwrap();
dweights_guard.clone()
let dbiases = {
let dbiases_guard = self.dbiases.read().unwrap();
dbiases_guard.clone()
// Update weights and biases
self.weights = &self.weights - &(&dweights * learning_rate);
self.biases = &self.biases - &(&dbiases * learning_rate);
fn as_any(&self) -> &dyn std::any::Any {
self
fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
fn layer_type(&self) -> &str {
"Dense"
fn parameter_count(&self) -> usize {
// Count weights and biases
self.weights.len() + self.biases.len()
fn layer_description(&self) -> String {
format!(
"type:Dense, input_dim:{}, output_dim:{}, activation:{}",
self.input_dim,
self.output_dim,
self.activation_name().unwrap_or("None")
)
impl<F: Float + Debug + ScalarOperand + Send + Sync + 'static> ParamLayer<F> for Dense<F> {
fn get_parameters(&self) -> Vec<&Array<F, ndarray::IxDyn>> {
vec![&self.weights, &self.biases]
fn get_gradients(&self) -> Vec<&Array<F, ndarray::IxDyn>> {
// Note: This method signature doesn't work well with RwLock
// In a real implementation, this would need to be redesigned
// For now, we'll provide empty arrays as placeholders
// A better design would return owned arrays or use a different interface
// This is a limitation of the current trait design
// We can't return references to data behind RwLocks
// The trait would need to be redesigned to return owned arrays
// or use a callback-based approach
// For compatibility, return empty arrays of the right shape
#[allow(dead_code)]
static EMPTY_WEIGHTS: std::sync::OnceLock<Array<f64, IxDyn>> = std::sync::OnceLock::new();
static EMPTY_BIASES: std::sync::OnceLock<Array<f64, IxDyn>> = std::sync::OnceLock::new();
// This is a workaround - in practice, the trait would need to be redesigned
vec![]
fn set_parameters(&mut self, params: Vec<Array<F, ndarray::IxDyn>>) -> Result<()> {
if params.len() != 2 {
return Err(NeuralError::InvalidArchitecture(format!(
"Expected 2 parameters (weights, biases), got {}",
params.len()
)));
let weights = ¶ms[0];
let biases = ¶ms[1];
if weights.shape() != self.weights.shape() {
"Weights shape mismatch: expected {:?}, got {:?}",
self.weights.shape(),
weights.shape()
if biases.shape() != self.biases.shape() {
"Biases shape mismatch: expected {:?}, got {:?}",
self.biases.shape(),
biases.shape()
self.weights = weights.clone();
self.biases = biases.clone();
// Explicit Send + Sync implementations for Dense layer
unsafe impl<F: Float + Debug + Send + Sync> Send for Dense<F> {}
unsafe impl<F: Float + Debug + Send + Sync> Sync for Dense<F> {}