use scivex_core::{Float, Tensor};
use crate::error::Result;
use crate::variable::Variable;
use super::Layer;
pub struct BatchNorm1d<T: Float> {
gamma: Variable<T>,
beta: Variable<T>,
running_mean: Vec<T>,
running_var: Vec<T>,
eps: T,
training: bool,
}
impl<T: Float> BatchNorm1d<T> {
pub fn new(num_features: usize) -> Self {
let gamma = Variable::new(Tensor::ones(vec![num_features]), true);
let beta = Variable::new(Tensor::zeros(vec![num_features]), true);
Self {
gamma,
beta,
running_mean: vec![T::zero(); num_features],
running_var: vec![T::one(); num_features],
eps: T::from_f64(1e-5),
training: true,
}
}
}
impl<T: Float> Layer<T> for BatchNorm1d<T> {
fn forward(&self, x: &Variable<T>) -> Result<Variable<T>> {
let x_data = x.data();
let shape = x_data.shape().to_vec();
let batch = shape[0];
let features = shape[1];
let x_slice = x_data.as_slice();
let gamma_data = self.gamma.data();
let beta_data = self.beta.data();
let g_slice = gamma_data.as_slice();
let b_slice = beta_data.as_slice();
let (mean, var) = if self.training {
let mut mean = vec![T::zero(); features];
let mut var = vec![T::zero(); features];
let n = T::from_usize(batch);
for f in 0..features {
let mut sum = T::zero();
for b in 0..batch {
sum += x_slice[b * features + f];
}
mean[f] = sum / n;
let mut sq_sum = T::zero();
for b in 0..batch {
let diff = x_slice[b * features + f] - mean[f];
sq_sum += diff * diff;
}
var[f] = sq_sum / n;
}
(mean, var)
} else {
(self.running_mean.clone(), self.running_var.clone())
};
let mut out_data = vec![T::zero(); batch * features];
let mut x_norm_data = vec![T::zero(); batch * features];
let mut inv_std = vec![T::zero(); features];
for f in 0..features {
inv_std[f] = T::one() / (var[f] + self.eps).sqrt();
}
for b_idx in 0..batch {
for f in 0..features {
let idx = b_idx * features + f;
let xn = (x_slice[idx] - mean[f]) * inv_std[f];
x_norm_data[idx] = xn;
out_data[idx] = g_slice[f] * xn + b_slice[f];
}
}
let out = Tensor::from_vec(out_data, shape.clone())?;
let x_norm = Tensor::from_vec(x_norm_data, shape.clone())?;
let inv_std_t = Tensor::from_vec(inv_std, vec![features])?;
Ok(Variable::from_op(
out,
vec![x.clone(), self.gamma.clone(), self.beta.clone()],
Box::new(move |g: &Tensor<T>| {
let g_s = g.as_slice();
let xn_s = x_norm.as_slice();
let inv_s = inv_std_t.as_slice();
let gs = gamma_data.as_slice();
let mut grad_gamma = vec![T::zero(); features];
let mut grad_beta = vec![T::zero(); features];
let mut grad_x = vec![T::zero(); batch * features];
for f in 0..features {
let mut g_sum = T::zero();
let mut gxn_sum = T::zero();
for b_idx in 0..batch {
let idx = b_idx * features + f;
grad_gamma[f] += g_s[idx] * xn_s[idx];
grad_beta[f] += g_s[idx];
g_sum += g_s[idx];
gxn_sum += g_s[idx] * xn_s[idx];
}
let n = T::from_usize(batch);
for b_idx in 0..batch {
let idx = b_idx * features + f;
grad_x[idx] =
gs[f] * inv_s[f] * (g_s[idx] - g_sum / n - xn_s[idx] * gxn_sum / n);
}
}
vec![
Tensor::from_vec(grad_x, shape.clone())
.expect("grad shape matches forward pass"),
Tensor::from_vec(grad_gamma, vec![features])
.expect("gamma grad length matches features"),
Tensor::from_vec(grad_beta, vec![features])
.expect("beta grad length matches features"),
]
}),
))
}
fn parameters(&self) -> Vec<Variable<T>> {
vec![self.gamma.clone(), self.beta.clone()]
}
fn train(&mut self) {
self.training = true;
}
fn eval(&mut self) {
self.training = false;
}
}
#[cfg(test)]
mod tests {
use super::*;
use scivex_core::Tensor;
#[test]
fn test_batchnorm_output_shape() {
let bn = BatchNorm1d::<f64>::new(3);
let x = Variable::new(
Tensor::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0], vec![2, 3]).unwrap(),
true,
);
let y = bn.forward(&x).unwrap();
assert_eq!(y.shape(), vec![2, 3]);
}
#[test]
fn test_batchnorm_normalized_output() {
let bn = BatchNorm1d::<f64>::new(2);
let x = Variable::new(
Tensor::from_vec(vec![1.0, 10.0, 3.0, 20.0, 5.0, 30.0], vec![3, 2]).unwrap(),
true,
);
let y = bn.forward(&x).unwrap();
let y_data = y.data();
let y_s = y_data.as_slice();
let mean_f0 = (y_s[0] + y_s[2] + y_s[4]) / 3.0;
assert!(mean_f0.abs() < 1e-5, "mean was {mean_f0}");
}
#[test]
fn test_batchnorm_parameters() {
let bn = BatchNorm1d::<f64>::new(5);
assert_eq!(bn.parameters().len(), 2);
}
}