use optirs_core::optimizers::{AdaBound, AdaDelta};
use scirs2_core::ndarray_ext::array;
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("╔═══════════════════════════════════════════════════════════╗");
println!("║ OptiRS v0.1.0 - New Optimizers Demonstration ║");
println!("╚═══════════════════════════════════════════════════════════╝\n");
println!("Problem: Minimize f(x,y) = x² + y²");
println!("Starting point: (5.0, 5.0)");
println!("Optimal solution: (0.0, 0.0)\n");
println!("┌─────────────────────────────────────────────────────────┐");
println!("│ 1. AdaDelta: No Learning Rate Required │");
println!("└─────────────────────────────────────────────────────────┘\n");
let mut adadelta = AdaDelta::<f64>::new(
0.95, 1e-6, )?;
let mut params = array![5.0, 5.0];
println!("Initial parameters: {:?}", params);
for step in 0..100 {
let grads = array![2.0 * params[0], 2.0 * params[1]];
params = adadelta.step(params.view(), grads.view())?;
if step % 20 == 0 {
let loss = params[0] * params[0] + params[1] * params[1];
println!(
" Step {:3}: params = [{:.4}, {:.4}], loss = {:.6}",
step, params[0], params[1], loss
);
}
}
let final_loss = params[0] * params[0] + params[1] * params[1];
println!("\nFinal parameters: [{:.6}, {:.6}]", params[0], params[1]);
println!("Final loss: {:.6}", final_loss);
println!("✓ AdaDelta successfully converged!\n");
println!("┌─────────────────────────────────────────────────────────┐");
println!("│ 2. AdaBound: Smooth Transition Adam → SGD │");
println!("└─────────────────────────────────────────────────────────┘\n");
let mut adabound = AdaBound::<f64>::new(
0.001, 0.1, 0.9, 0.999, 1e-8, 1e-3, 0.0, false, )?;
let mut params = array![5.0, 5.0];
println!("Initial parameters: {:?}", params);
println!("Dynamic bounds will converge from adaptive to final_lr = 0.1\n");
for step in 0..200 {
let grads = array![2.0 * params[0], 2.0 * params[1]];
params = adabound.step(params.view(), grads.view())?;
if step % 40 == 0 {
let loss = params[0] * params[0] + params[1] * params[1];
let (lower_bound, upper_bound) = adabound.current_bounds();
println!(
" Step {:3}: params = [{:.4}, {:.4}], loss = {:.6}",
step, params[0], params[1], loss
);
println!(
" LR bounds: [{:.6}, {:.6}]",
lower_bound, upper_bound
);
}
}
let final_loss = params[0] * params[0] + params[1] * params[1];
let (final_lower, final_upper) = adabound.current_bounds();
println!("\nFinal parameters: [{:.6}, {:.6}]", params[0], params[1]);
println!("Final loss: {:.6}", final_loss);
println!("Final LR bounds: [{:.6}, {:.6}]", final_lower, final_upper);
println!("✓ AdaBound successfully converged!\n");
println!("┌─────────────────────────────────────────────────────────┐");
println!("│ 3. AMSBound: AdaBound with Max Velocity │");
println!("└─────────────────────────────────────────────────────────┘\n");
let mut amsbound = AdaBound::<f64>::new(
0.001, 0.1, 0.9, 0.999, 1e-8, 1e-3, 0.0, true, )?;
let mut params = array![5.0, 5.0];
println!("Initial parameters: {:?}", params);
println!("AMSBound uses max(v_t) for more stable updates\n");
for step in 0..200 {
let grads = array![2.0 * params[0], 2.0 * params[1]];
params = amsbound.step(params.view(), grads.view())?;
if step % 40 == 0 {
let loss = params[0] * params[0] + params[1] * params[1];
println!(
" Step {:3}: params = [{:.4}, {:.4}], loss = {:.6}",
step, params[0], params[1], loss
);
}
}
let final_loss = params[0] * params[0] + params[1] * params[1];
println!("\nFinal parameters: [{:.6}, {:.6}]", params[0], params[1]);
println!("Final loss: {:.6}", final_loss);
println!("✓ AMSBound successfully converged!\n");
println!("┌─────────────────────────────────────────────────────────┐");
println!("│ Optimizer Comparison Summary │");
println!("└─────────────────────────────────────────────────────────┘\n");
println!("Key Features:");
println!(" • AdaDelta:");
println!(" - No learning rate parameter needed");
println!(" - Uses adaptive rates based on gradient history");
println!(" - Robust to hyperparameter choices");
println!(" - Warmup boost helps overcome cold-start problem\n");
println!(" • AdaBound:");
println!(" - Smooth transition from Adam to SGD");
println!(" - Dynamic learning rate bounds");
println!(" - Better generalization than pure Adam");
println!(" - Prevents LR from becoming too large/small\n");
println!(" • AMSBound:");
println!(" - AdaBound with max velocity");
println!(" - More stable than standard AdaBound");
println!(" - Uses max(v_t) like AMSGrad\n");
println!("Use Cases:");
println!(" • AdaDelta: When you want to avoid LR tuning");
println!(" • AdaBound: For production models needing good generalization");
println!(" • AMSBound: When training stability is critical\n");
println!("═══════════════════════════════════════════════════════════");
println!("✓ All new optimizers demonstrated successfully!");
println!("═══════════════════════════════════════════════════════════\n");
Ok(())
}