use optirs_core::optimizers::{Adam, Optimizer, SGD};
use optirs_core::schedulers::{ExponentialDecay, LearningRateScheduler, StepDecay};
use scirs2_core::ndarray::{array, Array1, Ix1};
use scirs2_core::random::{thread_rng, Distribution, Normal};
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("=== Advanced Optimization Techniques ===\n");
println!("1. Learning Rate Scheduling");
println!("----------------------------");
learning_rate_scheduling()?;
println!("\n2. Parameter Groups with Different Learning Rates");
println!("-------------------------------------------------");
parameter_groups()?;
println!("\n3. L1 and L2 Regularization");
println!("---------------------------");
regularization_example()?;
println!("\n4. Gradient Clipping");
println!("--------------------");
gradient_clipping()?;
println!("\n5. Multi-layer Network Optimization");
println!("------------------------------------");
multi_layer_optimization()?;
Ok(())
}
fn learning_rate_scheduling() -> Result<(), Box<dyn std::error::Error>> {
let mut params = Array1::from_vec(vec![5.0, 3.0, 8.0, 2.0]);
let gradients = Array1::from_elem(4, 0.1);
println!("Exponential Decay (decay_rate=0.9, decay_steps=1):");
let mut optimizer = SGD::new(1.0);
let mut scheduler = ExponentialDecay::new(1.0, 0.9, 1);
for epoch in 0..5 {
let lr = scheduler.step();
params = optimizer.step(¶ms, &gradients)?;
Optimizer::<f64, Ix1>::set_learning_rate(&mut optimizer, lr);
println!(" Epoch {}: LR = {:.4}", epoch, lr);
}
println!("\nStep Decay (drop by 0.5 every 2 epochs):");
let mut params = Array1::from_vec(vec![5.0, 3.0, 8.0, 2.0]);
let mut optimizer = SGD::new(1.0);
let mut scheduler = StepDecay::new(1.0, 0.5, 2);
for epoch in 0..6 {
let lr = scheduler.step();
params = optimizer.step(¶ms, &gradients)?;
Optimizer::<f64, Ix1>::set_learning_rate(&mut optimizer, lr);
println!(" Epoch {}: LR = {:.4}", epoch, lr);
}
Ok(())
}
fn parameter_groups() -> Result<(), Box<dyn std::error::Error>> {
let layer1_params = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
let layer1_grads = Array1::from_elem(4, 0.1);
let layer2_params = Array1::from_vec(vec![5.0, 6.0]);
let layer2_grads = Array1::from_elem(2, 0.1);
let mut layer1_optimizer = Adam::new(0.0001); let mut layer2_optimizer = Adam::new(0.01);
println!("Before optimization:");
println!(" Layer 1 params: {:?}", layer1_params);
println!(" Layer 2 params: {:?}", layer2_params);
let layer1_updated = layer1_optimizer.step(&layer1_params, &layer1_grads)?;
let layer2_updated = layer2_optimizer.step(&layer2_params, &layer2_grads)?;
println!("\nAfter 1 step:");
println!(" Layer 1 params: {:?}", layer1_updated);
println!(" Layer 2 params: {:?}", layer2_updated);
println!("\nObservation: Layer 2 (faster LR) changed more than Layer 1 (slower LR)");
Ok(())
}
fn regularization_example() -> Result<(), Box<dyn std::error::Error>> {
let params = Array1::from_vec(vec![2.0, -3.0, 1.5, -0.5]);
let gradients = Array1::from_elem(4, 0.1);
println!("L2 Regularization (Weight Decay = 0.01):");
let weight_decay = 0.01;
let l2_penalty: Array1<f64> = params.mapv(|p| weight_decay * p);
let regularized_grads = &gradients + &l2_penalty;
let mut optimizer = SGD::new(0.1);
let updated = optimizer.step(¶ms, ®ularized_grads)?;
println!(" Original params: {:?}", params);
println!(" L2 penalty: {:?}", l2_penalty);
println!(" Updated params: {:?}", updated);
println!("\nL1 Regularization (alpha = 0.01):");
let alpha = 0.01;
let l1_penalty: Array1<f64> = params.mapv(|p| alpha * p.signum());
let l1_regularized_grads = &gradients + &l1_penalty;
let mut optimizer = SGD::new(0.1);
let updated_l1 = optimizer.step(¶ms, &l1_regularized_grads)?;
println!(" Original params: {:?}", params);
println!(" L1 penalty: {:?}", l1_penalty);
println!(" Updated params: {:?}", updated_l1);
println!("\nNote: L1 encourages sparsity, L2 encourages small weights");
Ok(())
}
fn gradient_clipping() -> Result<(), Box<dyn std::error::Error>> {
let params = Array1::from_vec(vec![1.0, 2.0, 3.0, 4.0]);
let large_gradients = Array1::from_vec(vec![100.0, -150.0, 200.0, -80.0]);
println!("Original gradients: {:?}", large_gradients);
println!("Gradient norm: {:.2}", gradient_norm(&large_gradients));
let max_norm = 10.0;
let clipped = clip_gradient_norm(&large_gradients, max_norm);
println!("\nAfter clipping to norm {:.1}:", max_norm);
println!(" Clipped gradients: {:?}", clipped);
println!(" New norm: {:.2}", gradient_norm(&clipped));
let mut optimizer = Adam::new(0.01);
let updated = optimizer.step(¶ms, &clipped)?;
println!("\nUpdated params: {:?}", updated);
Ok(())
}
fn multi_layer_optimization() -> Result<(), Box<dyn std::error::Error>> {
let mut rng = thread_rng();
let normal = Normal::new(0.0, 0.1)?;
let mut w1 = Array1::from_iter((0..32).map(|_| normal.sample(&mut rng))); let mut w2 = Array1::from_iter((0..16).map(|_| normal.sample(&mut rng)));
println!("Network architecture: 4 -> 8 -> 2");
println!("Layer 1 params: {} weights", w1.len());
println!("Layer 2 params: {} weights", w2.len());
let mut opt1 = Adam::new(0.001); let mut opt2 = SGD::new(0.01);
println!("\nTraining for 10 iterations...");
for iter in 0..10 {
let grad1 = Array1::from_iter((0..32).map(|_| normal.sample(&mut rng)));
let grad2 = Array1::from_iter((0..16).map(|_| normal.sample(&mut rng)));
let grad1_clipped = clip_gradient_norm(&grad1, 5.0);
let grad2_clipped = clip_gradient_norm(&grad2, 5.0);
w1 = opt1.step(&w1, &grad1_clipped)?;
w2 = opt2.step(&w2, &grad2_clipped)?;
if iter % 2 == 0 {
let loss = simulate_loss(&w1, &w2);
println!(" Iteration {}: Loss = {:.4}", iter, loss);
}
}
println!("\nOptimization complete!");
println!(
"Final layer 1 mean: {:.4}",
w1.mean().expect("unwrap failed")
);
println!(
"Final layer 2 mean: {:.4}",
w2.mean().expect("unwrap failed")
);
Ok(())
}
fn gradient_norm(grads: &Array1<f64>) -> f64 {
grads.iter().map(|&g| g * g).sum::<f64>().sqrt()
}
fn clip_gradient_norm(grads: &Array1<f64>, max_norm: f64) -> Array1<f64> {
let norm = gradient_norm(grads);
if norm > max_norm {
grads.mapv(|g| g * max_norm / norm)
} else {
grads.clone()
}
}
fn simulate_loss(w1: &Array1<f64>, w2: &Array1<f64>) -> f64 {
let w1_norm = w1.iter().map(|&w| w * w).sum::<f64>();
let w2_norm = w2.iter().map(|&w| w * w).sum::<f64>();
(w1_norm + w2_norm).sqrt() * 0.1
}