#![allow(clippy::suboptimal_flops, clippy::neg_cmp_op_on_partial_ord)]
extern crate alloc;
use alloc::boxed::Box;
use alloc::vec;
#[derive(Debug, Clone)]
pub struct AdaGradF64 {
params: Box<[f64]>,
sum_sq_grad: Box<[f64]>,
learning_rate: f64,
epsilon: f64,
dims: usize,
count: u64,
}
#[derive(Debug, Clone)]
pub struct AdaGradF64Builder {
dimensions: Option<usize>,
learning_rate: Option<f64>,
epsilon: Option<f64>,
}
impl AdaGradF64 {
#[inline]
#[must_use]
pub fn builder() -> AdaGradF64Builder {
AdaGradF64Builder {
dimensions: Option::None,
learning_rate: Option::None,
epsilon: Option::None,
}
}
#[inline]
pub fn step(&mut self, gradient: &[f64]) -> Result<(), crate::DataError> {
assert_eq!(
gradient.len(),
self.dims,
"gradient length {} != dimensions {}",
gradient.len(),
self.dims,
);
for &g in gradient {
check_finite!(g);
}
for i in 0..self.dims {
self.sum_sq_grad[i] += gradient[i] * gradient[i];
self.params[i] -= self.learning_rate * gradient[i]
/ (crate::math::sqrt(self.sum_sq_grad[i]) + self.epsilon);
}
self.count += 1;
Ok(())
}
#[inline]
#[must_use]
pub fn parameters(&self) -> &[f64] {
&self.params
}
#[inline]
#[must_use]
pub fn parameter(&self, index: usize) -> f64 {
self.params[index]
}
#[inline]
pub fn set_parameters(&mut self, params: &[f64]) {
assert_eq!(
params.len(),
self.dims,
"params length {} != dimensions {}",
params.len(),
self.dims,
);
self.params.copy_from_slice(params);
}
#[inline]
#[must_use]
pub fn accumulated_gradients(&self) -> &[f64] {
&self.sum_sq_grad
}
#[inline]
#[must_use]
pub fn dimensions(&self) -> usize {
self.dims
}
#[inline]
#[must_use]
pub fn learning_rate(&self) -> f64 {
self.learning_rate
}
#[inline]
#[must_use]
pub fn epsilon(&self) -> f64 {
self.epsilon
}
#[inline]
#[must_use]
pub fn count(&self) -> u64 {
self.count
}
#[inline]
#[must_use]
pub fn is_primed(&self) -> bool {
self.count > 0
}
#[inline]
pub fn reset(&mut self) {
self.params.fill(0.0);
self.sum_sq_grad.fill(0.0);
self.count = 0;
}
}
impl AdaGradF64Builder {
#[inline]
#[must_use]
pub fn dimensions(mut self, dims: usize) -> Self {
self.dimensions = Option::Some(dims);
self
}
#[inline]
#[must_use]
pub fn learning_rate(mut self, lr: f64) -> Self {
self.learning_rate = Option::Some(lr);
self
}
#[inline]
#[must_use]
pub fn epsilon(mut self, eps: f64) -> Self {
self.epsilon = Option::Some(eps);
self
}
#[inline]
pub fn build(self) -> Result<AdaGradF64, crate::ConfigError> {
let dims = self
.dimensions
.ok_or(crate::ConfigError::Missing("dimensions"))?;
let lr = self
.learning_rate
.ok_or(crate::ConfigError::Missing("learning_rate"))?;
let eps = self.epsilon.unwrap_or(1e-8);
if dims < 1 {
return Err(crate::ConfigError::Invalid("dimensions must be >= 1"));
}
if !(lr > 0.0) {
return Err(crate::ConfigError::Invalid(
"learning_rate must be positive",
));
}
if !(eps > 0.0) {
return Err(crate::ConfigError::Invalid("epsilon must be positive"));
}
Ok(AdaGradF64 {
params: vec![0.0; dims].into_boxed_slice(),
sum_sq_grad: vec![0.0; dims].into_boxed_slice(),
learning_rate: lr,
epsilon: eps,
dims,
count: 0,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn minimizes_quadratic() {
let mut ag = AdaGradF64::builder()
.dimensions(1)
.learning_rate(1.0)
.build()
.unwrap();
ag.set_parameters(&[5.0]);
for _ in 0..500 {
let grad = [2.0 * ag.parameters()[0]];
ag.step(&grad).unwrap();
}
assert!(
ag.parameters()[0].abs() < 0.1,
"x = {}",
ag.parameters()[0]
);
}
#[test]
fn adapts_to_different_scales() {
let mut ag = AdaGradF64::builder()
.dimensions(2)
.learning_rate(1.0)
.build()
.unwrap();
ag.set_parameters(&[5.0, 5.0]);
for _ in 0..500 {
let p = ag.parameters();
let grad = [200.0 * p[0], 2.0 * p[1]];
ag.step(&grad).unwrap();
}
assert!(
ag.parameters()[0].abs() < 0.5,
"x0 = {}",
ag.parameters()[0]
);
assert!(
ag.parameters()[1].abs() < 0.5,
"x1 = {}",
ag.parameters()[1]
);
}
#[test]
fn accumulated_gradients_grow() {
let mut ag = AdaGradF64::builder()
.dimensions(2)
.learning_rate(0.1)
.build()
.unwrap();
ag.step(&[3.0, 4.0]).unwrap();
let acc = ag.accumulated_gradients();
assert!((acc[0] - 9.0).abs() < f64::EPSILON);
assert!((acc[1] - 16.0).abs() < f64::EPSILON);
ag.step(&[3.0, 4.0]).unwrap();
let acc = ag.accumulated_gradients();
assert!((acc[0] - 18.0).abs() < f64::EPSILON);
assert!((acc[1] - 32.0).abs() < f64::EPSILON);
}
#[test]
fn rejects_nan_gradient() {
let mut ag = AdaGradF64::builder()
.dimensions(2)
.learning_rate(0.1)
.build()
.unwrap();
assert_eq!(
ag.step(&[1.0, f64::NAN]),
Err(crate::DataError::NotANumber)
);
assert_eq!(ag.count(), 0);
}
#[test]
fn rejects_inf_gradient() {
let mut ag = AdaGradF64::builder()
.dimensions(1)
.learning_rate(0.1)
.build()
.unwrap();
assert_eq!(
ag.step(&[f64::INFINITY]),
Err(crate::DataError::Infinite)
);
assert_eq!(ag.count(), 0);
}
#[test]
#[should_panic(expected = "gradient length")]
fn dimension_mismatch_panics() {
let mut ag = AdaGradF64::builder()
.dimensions(3)
.learning_rate(0.1)
.build()
.unwrap();
let _ = ag.step(&[1.0]);
}
#[test]
fn reset_clears_all() {
let mut ag = AdaGradF64::builder()
.dimensions(2)
.learning_rate(0.1)
.build()
.unwrap();
ag.set_parameters(&[1.0, 2.0]);
ag.step(&[1.0, 1.0]).unwrap();
ag.reset();
assert_eq!(ag.count(), 0);
assert!(ag.parameters().iter().all(|&p| p == 0.0));
assert!(ag.accumulated_gradients().iter().all(|&g| g == 0.0));
}
#[test]
fn builder_rejects_zero_dimensions() {
let result = AdaGradF64::builder()
.dimensions(0)
.learning_rate(0.1)
.build();
assert!(result.is_err());
}
#[test]
fn builder_rejects_negative_epsilon() {
let result = AdaGradF64::builder()
.dimensions(2)
.learning_rate(0.1)
.epsilon(-1.0)
.build();
assert!(result.is_err());
}
#[test]
fn default_epsilon() {
let ag = AdaGradF64::builder()
.dimensions(1)
.learning_rate(0.1)
.build()
.unwrap();
assert!((ag.epsilon() - 1e-8).abs() < 1e-15);
}
#[test]
fn count_tracks_steps() {
let mut ag = AdaGradF64::builder()
.dimensions(1)
.learning_rate(0.1)
.build()
.unwrap();
assert_eq!(ag.count(), 0);
assert!(!ag.is_primed());
ag.step(&[1.0]).unwrap();
assert_eq!(ag.count(), 1);
assert!(ag.is_primed());
}
#[test]
fn builder_requires_dimensions() {
let result = AdaGradF64::builder().learning_rate(0.1).build();
assert!(matches!(
result,
Err(crate::ConfigError::Missing("dimensions"))
));
}
#[test]
fn builder_requires_learning_rate() {
let result = AdaGradF64::builder().dimensions(2).build();
assert!(matches!(
result,
Err(crate::ConfigError::Missing("learning_rate"))
));
}
}