use crate::error::{AnomalyError, AnomalyResult};
use crate::handle::LcgRng;
const INV_EPS: f32 = 1e-3;
const GRAD_CLIP: f32 = 1.0e3;
#[derive(Debug, Clone)]
pub struct DeepSadConfig {
pub dims: Vec<usize>,
pub learning_rate: f32,
pub eta: f32,
pub weight_decay: f32,
pub seed: u64,
}
impl DeepSadConfig {
#[must_use]
pub fn new(dims: &[usize]) -> Self {
Self {
dims: dims.to_vec(),
learning_rate: 0.01,
eta: 1.0,
weight_decay: 0.0,
seed: 42,
}
}
}
fn xavier_init(fan_in: usize, fan_out: usize, rng: &mut LcgRng) -> Vec<f32> {
let limit = (6.0_f32 / (fan_in + fan_out) as f32).sqrt();
(0..fan_in * fan_out)
.map(|_| rng.next_f32() * 2.0 * limit - limit)
.collect()
}
#[derive(Debug, Clone)]
struct SadEncoder {
dims: Vec<usize>,
weights: Vec<Vec<f32>>,
biases: Vec<Vec<f32>>,
}
impl SadEncoder {
fn new(dims: &[usize], rng: &mut LcgRng) -> AnomalyResult<Self> {
if dims.len() < 2 {
return Err(AnomalyError::InvalidLayerDims {
msg: "need at least [input_dim, rep_dim]".into(),
});
}
for &d in dims {
if d == 0 {
return Err(AnomalyError::InvalidLayerDims {
msg: "zero dimension in layer spec".into(),
});
}
}
let n_layers = dims.len() - 1;
let mut weights = Vec::with_capacity(n_layers);
let mut biases = Vec::with_capacity(n_layers);
for l in 0..n_layers {
weights.push(xavier_init(dims[l], dims[l + 1], rng));
biases.push(vec![0.0_f32; dims[l + 1]]);
}
Ok(Self {
dims: dims.to_vec(),
weights,
biases,
})
}
#[inline]
fn n_layers(&self) -> usize {
self.weights.len()
}
fn forward(&self, x: &[f32]) -> AnomalyResult<Vec<f32>> {
if x.len() != self.dims[0] {
return Err(AnomalyError::DimensionMismatch {
expected: self.dims[0],
got: x.len(),
});
}
let n_layers = self.n_layers();
let mut activation = x.to_vec();
for layer in 0..n_layers {
let in_dim = self.dims[layer];
let out_dim = self.dims[layer + 1];
let w = &self.weights[layer];
let b = &self.biases[layer];
let mut out = vec![0.0_f32; out_dim];
for o in 0..out_dim {
let mut acc = b[o];
for i in 0..in_dim {
acc += w[o * in_dim + i] * activation[i];
}
out[o] = if layer < n_layers - 1 {
acc.max(0.0)
} else {
acc
};
}
activation = out;
}
Ok(activation)
}
fn forward_cache(&self, x: &[f32]) -> (Vec<Vec<f32>>, Vec<Vec<f32>>) {
let n_layers = self.n_layers();
let mut acts: Vec<Vec<f32>> = Vec::with_capacity(n_layers + 1);
let mut pre: Vec<Vec<f32>> = Vec::with_capacity(n_layers);
acts.push(x.to_vec());
for layer in 0..n_layers {
let in_dim = self.dims[layer];
let out_dim = self.dims[layer + 1];
let w = &self.weights[layer];
let b = &self.biases[layer];
let a_prev = &acts[layer];
let mut z = vec![0.0_f32; out_dim];
let mut a = vec![0.0_f32; out_dim];
for o in 0..out_dim {
let mut acc = b[o];
for i in 0..in_dim {
acc += w[o * in_dim + i] * a_prev[i];
}
z[o] = acc;
a[o] = if layer < n_layers - 1 {
acc.max(0.0)
} else {
acc
};
}
pre.push(z);
acts.push(a);
}
(acts, pre)
}
fn backward(
&self,
acts: &[Vec<f32>],
pre: &[Vec<f32>],
grad_out: &[f32],
grad_w: &mut [Vec<f32>],
grad_b: &mut [Vec<f32>],
) {
let n_layers = self.n_layers();
let mut delta = grad_out.to_vec();
for layer in (0..n_layers).rev() {
let in_dim = self.dims[layer];
let out_dim = self.dims[layer + 1];
let a_prev = &acts[layer];
{
let gw = &mut grad_w[layer];
let gb = &mut grad_b[layer];
for o in 0..out_dim {
let go = delta[o];
let base = o * in_dim;
for i in 0..in_dim {
gw[base + i] += go * a_prev[i];
}
if layer < n_layers - 1 {
gb[o] += go;
}
}
}
if layer > 0 {
let w = &self.weights[layer];
let pre_prev = &pre[layer - 1];
let mut new_delta = vec![0.0_f32; in_dim];
for i in 0..in_dim {
let mut s = 0.0_f32;
for o in 0..out_dim {
s += w[o * in_dim + i] * delta[o];
}
new_delta[i] = if pre_prev[i] > 0.0 { s } else { 0.0 };
}
delta = new_delta;
}
}
}
}
#[derive(Debug, Clone)]
pub struct DeepSad {
config: DeepSadConfig,
encoder: SadEncoder,
center: Option<Vec<f32>>,
pub input_dim: usize,
pub rep_dim: usize,
}
impl DeepSad {
pub fn new(config: DeepSadConfig) -> AnomalyResult<Self> {
let mut rng = LcgRng::new(config.seed);
let encoder = SadEncoder::new(&config.dims, &mut rng)?;
let input_dim = config.dims[0];
let rep_dim = config.dims[config.dims.len() - 1];
Ok(Self {
config,
encoder,
center: None,
input_dim,
rep_dim,
})
}
pub fn forward(&self, x: &[f32]) -> AnomalyResult<Vec<f32>> {
self.encoder.forward(x)
}
fn validate(&self, x: &[f32], n_samples: usize, labels: &[f32]) -> AnomalyResult<()> {
if n_samples == 0 {
return Err(AnomalyError::EmptyInput);
}
if x.len() != n_samples * self.input_dim {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples * self.input_dim,
got: x.len(),
});
}
if labels.len() != n_samples {
return Err(AnomalyError::DimensionMismatch {
expected: n_samples,
got: labels.len(),
});
}
Ok(())
}
fn init_center(&mut self, x: &[f32], labels: &[f32]) -> AnomalyResult<()> {
let mut center = vec![0.0_f32; self.rep_dim];
let mut count = 0_usize;
for (s, &eta) in labels.iter().enumerate() {
if eta >= 0.0 {
let xi = &x[s * self.input_dim..(s + 1) * self.input_dim];
let rep = self.encoder.forward(xi)?;
for (ck, rk) in center.iter_mut().zip(rep.iter()) {
*ck += rk;
}
count += 1;
}
}
if count == 0 {
for (s, _) in labels.iter().enumerate() {
let xi = &x[s * self.input_dim..(s + 1) * self.input_dim];
let rep = self.encoder.forward(xi)?;
for (ck, rk) in center.iter_mut().zip(rep.iter()) {
*ck += rk;
}
}
count = labels.len();
}
let inv = 1.0 / count as f32;
for ck in &mut center {
*ck *= inv;
if ck.abs() < 0.01 {
*ck = 0.01;
}
}
self.center = Some(center);
Ok(())
}
pub fn sad_loss(&self, x: &[f32], n_samples: usize, labels: &[f32]) -> AnomalyResult<f32> {
self.validate(x, n_samples, labels)?;
let c = self.center.as_ref().ok_or(AnomalyError::NotFitted)?;
let mut total = 0.0_f32;
for (s, &eta) in labels.iter().enumerate() {
let xi = &x[s * self.input_dim..(s + 1) * self.input_dim];
let rep = self.encoder.forward(xi)?;
let dsq: f32 = rep
.iter()
.zip(c.iter())
.map(|(r, ck)| (r - ck).powi(2))
.sum();
total += if eta >= 0.0 {
dsq
} else {
self.config.eta / (dsq + INV_EPS)
};
}
Ok(total / n_samples as f32)
}
fn train_step(&mut self, x: &[f32], n_samples: usize, labels: &[f32]) -> AnomalyResult<f32> {
let c = self.center.clone().ok_or(AnomalyError::NotFitted)?;
let rep_dim = self.rep_dim;
let in_dim = self.input_dim;
let mut grad_w: Vec<Vec<f32>> = self
.encoder
.weights
.iter()
.map(|w| vec![0.0_f32; w.len()])
.collect();
let mut grad_b: Vec<Vec<f32>> = self
.encoder
.biases
.iter()
.map(|b| vec![0.0_f32; b.len()])
.collect();
let mut total_loss = 0.0_f32;
for (s, &eta) in labels.iter().enumerate() {
let xi = &x[s * in_dim..(s + 1) * in_dim];
let (acts, pre) = self.encoder.forward_cache(xi);
let phi = &acts[self.encoder.n_layers()];
let mut diff = vec![0.0_f32; rep_dim];
let mut dsq = 0.0_f32;
for ((dk, pk), ck) in diff.iter_mut().zip(phi.iter()).zip(c.iter()) {
let d = pk - ck;
*dk = d;
dsq += d * d;
}
let mut grad_out = vec![0.0_f32; rep_dim];
if eta >= 0.0 {
total_loss += dsq;
for (gk, dk) in grad_out.iter_mut().zip(diff.iter()) {
*gk = 2.0 * dk;
}
} else {
let denom = dsq + INV_EPS;
total_loss += self.config.eta / denom;
let coef = -2.0 * self.config.eta / (denom * denom);
for (gk, dk) in grad_out.iter_mut().zip(diff.iter()) {
*gk = coef * dk;
}
}
clip_gradient(&mut grad_out, GRAD_CLIP);
self.encoder
.backward(&acts, &pre, &grad_out, &mut grad_w, &mut grad_b);
}
let inv_n = 1.0 / n_samples as f32;
let lr = self.config.learning_rate;
let wd = self.config.weight_decay;
let n_layers = self.encoder.n_layers();
for layer in 0..n_layers {
let w = &mut self.encoder.weights[layer];
let gw = &grad_w[layer];
for (wj, gj) in w.iter_mut().zip(gw.iter()) {
*wj -= lr * (gj * inv_n + wd * *wj);
}
if layer < n_layers - 1 {
let b = &mut self.encoder.biases[layer];
let gb = &grad_b[layer];
for (bj, gj) in b.iter_mut().zip(gb.iter()) {
*bj -= lr * gj * inv_n;
}
}
}
Ok(total_loss * inv_n)
}
pub fn fit(
&mut self,
x: &[f32],
n_samples: usize,
labels: &[f32],
n_steps: usize,
) -> AnomalyResult<Vec<f32>> {
self.validate(x, n_samples, labels)?;
if self.center.is_none() {
self.init_center(x, labels)?;
}
let mut history = Vec::with_capacity(n_steps + 1);
for _ in 0..n_steps {
history.push(self.train_step(x, n_samples, labels)?);
}
history.push(self.sad_loss(x, n_samples, labels)?);
Ok(history)
}
pub fn score(&self, x: &[f32]) -> AnomalyResult<f32> {
let c = self.center.as_ref().ok_or(AnomalyError::NotFitted)?;
let rep = self.encoder.forward(x)?;
Ok(rep
.iter()
.zip(c.iter())
.map(|(r, ck)| (r - ck).powi(2))
.sum())
}
pub fn score_batch(&self, x: &[f32], n: usize) -> AnomalyResult<Vec<f32>> {
let c = self.center.as_ref().ok_or(AnomalyError::NotFitted)?;
if x.len() != n * self.input_dim {
return Err(AnomalyError::DimensionMismatch {
expected: n * self.input_dim,
got: x.len(),
});
}
let mut scores = Vec::with_capacity(n);
for i in 0..n {
let xi = &x[i * self.input_dim..(i + 1) * self.input_dim];
let rep = self.encoder.forward(xi)?;
let s: f32 = rep
.iter()
.zip(c.iter())
.map(|(r, ck)| (r - ck).powi(2))
.sum();
scores.push(s);
}
Ok(scores)
}
#[inline]
#[must_use]
pub fn is_fitted(&self) -> bool {
self.center.is_some()
}
}
fn clip_gradient(g: &mut [f32], max_norm: f32) {
let norm = g.iter().map(|v| v * v).sum::<f32>().sqrt();
if norm > max_norm && norm > 0.0 {
let scale = max_norm / norm;
for v in g.iter_mut() {
*v *= scale;
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn toy_batch(dim: usize, n_normal: usize, n_anom: usize, seed: u64) -> (Vec<f32>, Vec<f32>) {
let mut rng = LcgRng::new(seed);
let mut x = Vec::new();
let mut labels = Vec::new();
for _ in 0..n_normal {
for _ in 0..dim {
x.push(1.0 + rng.next_f32() * 0.1);
}
labels.push(1.0_f32);
}
for _ in 0..n_anom {
for _ in 0..dim {
x.push(-1.0 - rng.next_f32() * 0.1);
}
labels.push(-1.0_f32);
}
(x, labels)
}
#[test]
fn loss_decreases_during_training() {
let cfg = DeepSadConfig {
dims: vec![3, 6, 2],
learning_rate: 0.02,
eta: 1.0,
weight_decay: 0.0,
seed: 1,
};
let mut model = DeepSad::new(cfg).expect("new");
let (x, labels) = toy_batch(3, 8, 2, 10);
let history = model.fit(&x, labels.len(), &labels, 30).expect("fit");
assert!(
history.iter().all(|l| l.is_finite()),
"loss must stay finite"
);
assert!(
history
.last()
.expect("loss history should have a final entry")
< history
.first()
.expect("loss history should have an initial entry"),
"final loss {:?} should be below initial loss {:?}",
history.last(),
history.first()
);
}
#[test]
fn anomalies_score_higher_after_training() {
let cfg = DeepSadConfig {
dims: vec![3, 8, 2],
learning_rate: 0.03,
eta: 1.0,
weight_decay: 0.0,
seed: 2,
};
let mut model = DeepSad::new(cfg).expect("new");
let (x, labels) = toy_batch(3, 10, 4, 20);
model.fit(&x, labels.len(), &labels, 60).expect("fit");
let normal_pt = [1.0_f32, 1.0, 1.0];
let anom_pt = [-1.0_f32, -1.0, -1.0];
let s_normal = model.score(&normal_pt).expect("score normal");
let s_anom = model.score(&anom_pt).expect("score anom");
assert!(
s_anom > s_normal,
"anomaly score {s_anom} should exceed normal score {s_normal}"
);
}
#[test]
fn scores_finite() {
let cfg = DeepSadConfig::new(&[4, 8, 3]);
let mut model = DeepSad::new(cfg).expect("new");
let (x, labels) = toy_batch(4, 6, 3, 30);
model.fit(&x, labels.len(), &labels, 15).expect("fit");
for q in &[[0.5_f32, 0.5, 0.5, 0.5], [9.0, -9.0, 3.0, -3.0], [0.0; 4]] {
let s = model.score(q).expect("score");
assert!(s.is_finite() && s >= 0.0, "score={s}");
}
}
#[test]
fn eta_sign_controls_gradient_direction() {
let (x_train, labels_train) = toy_batch(3, 6, 0, 7);
let p = [0.6_f32, 0.4, 0.7];
let make = || {
let mut m = DeepSad::new(DeepSadConfig {
dims: vec![3, 6, 2],
learning_rate: 0.05,
eta: 1.0,
weight_decay: 0.0,
seed: 5,
})
.expect("new");
m.fit(&x_train, labels_train.len(), &labels_train, 0)
.expect("init centre");
m
};
let mut pull = make();
let d_before_pull = pull.score(&p).expect("score");
pull.train_step(&p, 1, &[1.0]).expect("pull step");
let d_after_pull = pull.score(&p).expect("score");
assert!(
d_after_pull < d_before_pull,
"η=+1 should shrink distance: {d_before_pull} → {d_after_pull}"
);
let mut push = make();
let d_before_push = push.score(&p).expect("score");
push.train_step(&p, 1, &[-1.0]).expect("push step");
let d_after_push = push.score(&p).expect("score");
assert!(
d_after_push > d_before_push,
"η=−1 should grow distance: {d_before_push} → {d_after_push}"
);
}
#[test]
fn empty_and_dim_mismatch_errors() {
let mut model = DeepSad::new(DeepSadConfig::new(&[3, 4, 2])).expect("new");
assert!(matches!(
model.fit(&[], 0, &[], 1),
Err(AnomalyError::EmptyInput)
));
assert!(matches!(
model.fit(&[1.0, 2.0, 3.0, 4.0], 2, &[1.0, 1.0], 1),
Err(AnomalyError::DimensionMismatch { .. })
));
assert!(matches!(
model.fit(&[1.0, 2.0, 3.0], 1, &[1.0, -1.0], 1),
Err(AnomalyError::DimensionMismatch { .. })
));
assert!(matches!(
model.score(&[0.0, 0.0, 0.0]),
Err(AnomalyError::NotFitted)
));
assert!(matches!(
DeepSad::new(DeepSadConfig::new(&[4])),
Err(AnomalyError::InvalidLayerDims { .. })
));
}
}