use std::collections::VecDeque;
use trustformers_core::errors::{Result, TrustformersError};
use trustformers_core::tensor::Tensor;
use trustformers_core::traits::Optimizer;
#[derive(Debug)]
pub struct SSBFGS {
pub learning_rate: f32,
pub history_size: usize,
pub scaling_factor: f32,
pub momentum: f32,
pub step: usize,
pub scale_history: VecDeque<f32>,
}
#[derive(Debug, Clone)]
pub struct SSBFGSConfig {
pub learning_rate: f32,
pub history_size: usize,
pub scaling_factor: f32,
pub momentum: f32,
}
impl Default for SSBFGSConfig {
fn default() -> Self {
Self {
learning_rate: 1.0,
history_size: 10,
scaling_factor: 1.0,
momentum: 0.9,
}
}
}
impl Default for SSBFGS {
fn default() -> Self {
Self::new()
}
}
impl SSBFGS {
pub fn new() -> Self {
Self::from_config(SSBFGSConfig::default())
}
pub fn from_config(config: SSBFGSConfig) -> Self {
Self {
learning_rate: config.learning_rate,
history_size: config.history_size,
scaling_factor: config.scaling_factor,
momentum: config.momentum,
step: 0,
scale_history: VecDeque::new(),
}
}
pub fn for_physics_informed() -> Self {
Self::from_config(SSBFGSConfig {
learning_rate: 0.8,
history_size: 15,
scaling_factor: 1.2,
momentum: 0.95,
})
}
pub fn for_non_convex() -> Self {
Self::from_config(SSBFGSConfig {
learning_rate: 0.5,
history_size: 20,
scaling_factor: 0.8,
momentum: 0.85,
})
}
fn compute_self_scaling_factor(&mut self, grad_norm: f32) -> f32 {
let mut scale = self.scaling_factor;
if !self.scale_history.is_empty() {
let mean_scale: f32 =
self.scale_history.iter().sum::<f32>() / self.scale_history.len() as f32;
let adaptation_factor = 1.0 + 0.1 * grad_norm.tanh();
scale = self.momentum * mean_scale + (1.0 - self.momentum) * adaptation_factor;
}
scale = scale.clamp(0.1, 10.0);
self.scale_history.push_back(scale);
if self.scale_history.len() > self.history_size {
self.scale_history.pop_front();
}
scale
}
pub fn get_stats(&self) -> SSBFGSStats {
SSBFGSStats {
step: self.step,
current_scaling_factor: self.scale_history.back().copied().unwrap_or(1.0),
average_scaling_factor: if !self.scale_history.is_empty() {
self.scale_history.iter().sum::<f32>() / self.scale_history.len() as f32
} else {
1.0
},
}
}
}
#[derive(Debug, Clone)]
pub struct SSBFGSStats {
pub step: usize,
pub current_scaling_factor: f32,
pub average_scaling_factor: f32,
}
impl Optimizer for SSBFGS {
fn update(&mut self, parameter: &mut Tensor, grad: &Tensor) -> Result<()> {
match (parameter, grad) {
(Tensor::F32(param), Tensor::F32(grad_arr)) => {
self.step += 1;
let grad_norm: f32 = grad_arr.iter().map(|g| g * g).sum::<f32>().sqrt();
let scale = self.compute_self_scaling_factor(grad_norm);
let scaled_lr = self.learning_rate * scale;
*param = &*param - &(grad_arr.clone() * scaled_lr);
Ok(())
},
_ => Err(TrustformersError::tensor_op_error(
"Unsupported tensor types for SSBFGS",
"ssbfgs_update",
)),
}
}
fn zero_grad(&mut self) {
}
fn step(&mut self) {
}
fn get_lr(&self) -> f32 {
self.learning_rate
}
fn set_lr(&mut self, lr: f32) {
self.learning_rate = lr;
}
}
#[derive(Debug)]
pub struct SSBroyden {
pub learning_rate: f32,
pub history_size: usize,
pub scaling_factor: f32,
pub momentum: f32,
pub step: usize,
pub scale_history: VecDeque<f32>,
}
#[derive(Debug, Clone)]
pub struct SSBroydenConfig {
pub learning_rate: f32,
pub history_size: usize,
pub scaling_factor: f32,
pub momentum: f32,
}
impl Default for SSBroydenConfig {
fn default() -> Self {
Self {
learning_rate: 1.0,
history_size: 15,
scaling_factor: 1.0,
momentum: 0.9,
}
}
}
impl Default for SSBroyden {
fn default() -> Self {
Self::new()
}
}
impl SSBroyden {
pub fn new() -> Self {
Self::from_config(SSBroydenConfig::default())
}
pub fn from_config(config: SSBroydenConfig) -> Self {
Self {
learning_rate: config.learning_rate,
history_size: config.history_size,
scaling_factor: config.scaling_factor,
momentum: config.momentum,
step: 0,
scale_history: VecDeque::new(),
}
}
pub fn for_physics_informed() -> Self {
Self::from_config(SSBroydenConfig {
learning_rate: 0.7,
history_size: 20,
scaling_factor: 1.1,
momentum: 0.95,
})
}
fn compute_self_scaling_factor(&mut self, grad_norm: f32) -> f32 {
let mut scale = self.scaling_factor;
if !self.scale_history.is_empty() {
let mean_scale: f32 =
self.scale_history.iter().sum::<f32>() / self.scale_history.len() as f32;
let adaptation_factor = 1.0 + 0.1 * grad_norm.tanh();
scale = self.momentum * mean_scale + (1.0 - self.momentum) * adaptation_factor;
}
scale = scale.clamp(0.1, 5.0);
self.scale_history.push_back(scale);
if self.scale_history.len() > self.history_size {
self.scale_history.pop_front();
}
scale
}
}
impl Optimizer for SSBroyden {
fn update(&mut self, parameter: &mut Tensor, grad: &Tensor) -> Result<()> {
match (parameter, grad) {
(Tensor::F32(param), Tensor::F32(grad_arr)) => {
self.step += 1;
let grad_norm: f32 = grad_arr.iter().map(|g| g * g).sum::<f32>().sqrt();
let scale = self.compute_self_scaling_factor(grad_norm);
let scaled_lr = self.learning_rate * scale;
*param = &*param - &(grad_arr.clone() * scaled_lr);
Ok(())
},
_ => Err(TrustformersError::tensor_op_error(
"Unsupported tensor types for SSBroyden",
"ssbroyden_update",
)),
}
}
fn zero_grad(&mut self) {
}
fn step(&mut self) {
}
fn get_lr(&self) -> f32 {
self.learning_rate
}
fn set_lr(&mut self, lr: f32) {
self.learning_rate = lr;
}
}