use crate::error::ModelError;
use crate::neural_network::Tensor;
use crate::neural_network::layer::TrainingParameters;
use crate::neural_network::layer::helper_function::calculate_output_shape_1d_pooling;
use crate::neural_network::layer::layer_weight::LayerWeight;
use crate::neural_network::layer::pooling_layer::input_validation_function::{
validate_all_dims_positive, validate_input_shape_dims, validate_pool_size_1d,
validate_stride_1d,
};
use crate::neural_network::neural_network_trait::Layer;
use ndarray::Array3;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
const AVERAGE_POOLING_1D_PARALLEL_THRESHOLD: usize = 32;
pub struct AveragePooling1D {
pool_size: usize,
stride: usize,
input_shape: Vec<usize>,
input_cache: Option<Tensor>,
}
impl AveragePooling1D {
pub fn new(
pool_size: usize,
input_shape: Vec<usize>,
stride: Option<usize>,
) -> Result<Self, ModelError> {
let stride = stride.unwrap_or(pool_size);
validate_input_shape_dims(&input_shape, 3, "AveragePooling1D")?;
validate_all_dims_positive(&input_shape)?;
validate_pool_size_1d(pool_size, input_shape[2])?;
validate_stride_1d(stride)?;
Ok(AveragePooling1D {
pool_size,
stride,
input_shape,
input_cache: None,
})
}
}
impl Layer for AveragePooling1D {
fn forward(&mut self, input: &Tensor) -> Result<Tensor, ModelError> {
if input.ndim() != 3 {
return Err(ModelError::InputValidationError(
"input tensor is not 3D".to_string(),
));
}
self.input_cache = Some(input.clone());
let batch_size = input.shape()[0];
let channels = input.shape()[1];
let length = input.shape()[2];
let output_length = (length - self.pool_size) / self.stride + 1;
let mut output = Array3::<f32>::zeros((batch_size, channels, output_length)).into_dyn();
let pool_size = self.pool_size;
let stride = self.stride;
let compute_pooling = |b: usize, c: usize| {
let mut batch_channel_output = Vec::new();
for i in 0..output_length {
let start_idx = i * stride;
let end_idx = start_idx + pool_size;
let mut sum = 0.0;
for j in start_idx..end_idx {
sum += input[[b, c, j]];
}
batch_channel_output.push((i, sum / (pool_size as f32)));
}
((b, c), batch_channel_output)
};
let results: Vec<_> = execute_parallel_or_sequential!(
batch_size,
channels,
AVERAGE_POOLING_1D_PARALLEL_THRESHOLD,
compute_pooling
);
for ((b, c), outputs) in results {
for (i, val) in outputs {
output[[b, c, i]] = val;
}
}
Ok(output)
}
fn backward(&mut self, grad_output: &Tensor) -> Result<Tensor, ModelError> {
let input = match &self.input_cache {
Some(input) => input,
None => {
return Err(ModelError::ProcessingError(
"No cached input for AveragePooling1D".to_string(),
));
}
};
let batch_size = input.shape()[0];
let channels = input.shape()[1];
let length = input.shape()[2];
let output_length = grad_output.shape()[2];
let mut grad_input = Array3::<f32>::zeros((batch_size, channels, length)).into_dyn();
let scale_factor = 1.0 / (self.pool_size as f32);
let pool_size = self.pool_size;
let stride = self.stride;
let compute_gradient = |b: usize, c: usize| {
let mut channel_grad = vec![0.0f32; length];
for i in 0..output_length {
let start_idx = i * stride;
let end_idx = start_idx + pool_size;
let grad_val = grad_output[[b, c, i]] * scale_factor;
for j in start_idx..end_idx {
channel_grad[j] += grad_val;
}
}
((b, c), channel_grad)
};
let results: Vec<_> = execute_parallel_or_sequential!(
batch_size,
channels,
AVERAGE_POOLING_1D_PARALLEL_THRESHOLD,
compute_gradient
);
merge_gradients_1d!(grad_input, results, length);
Ok(grad_input)
}
fn layer_type(&self) -> &str {
"AveragePooling1D"
}
layer_functions_1d_pooling!();
}