pub use self::OptMethod::{GaussNewton, GradientDescent, LevenbergMarquardt};
use self::OptOption::{InitParam, MaxIter};
use crate::fuga::ConvToMat;
use crate::numerical::utils::jacobian;
use crate::structure::ad::{ADVec, JetVec, AD};
use crate::structure::matrix::Matrix;
use crate::traits::matrix::{LinearAlgebra, MatrixTrait};
use crate::util::useful::max;
use std::collections::HashMap;
#[derive(Debug, Clone, Copy)]
pub enum OptMethod {
GradientDescent,
GaussNewton,
LevenbergMarquardt,
}
#[derive(Debug, Clone, Copy, PartialOrd, PartialEq, Eq, Hash)]
pub enum OptOption {
InitParam,
MaxIter,
}
pub struct Optimizer<F>
where
F: Fn(&Vec<f64>, Vec<AD>) -> Option<Vec<AD>>,
{
domain: Vec<f64>,
observed: Vec<f64>,
func: Box<F>,
param: Vec<AD>,
max_iter: usize,
error: f64,
method: OptMethod,
option: HashMap<OptOption, bool>,
hyperparams: HashMap<String, f64>,
}
impl<F> Optimizer<F>
where
F: Fn(&Vec<f64>, Vec<AD>) -> Option<Vec<AD>>,
{
pub fn new(data: Matrix, func: F) -> Self {
let mut default_option: HashMap<OptOption, bool> = HashMap::new();
default_option.insert(InitParam, false);
default_option.insert(MaxIter, false);
Optimizer {
domain: data.col(0),
observed: data.col(1),
func: Box::new(func),
param: vec![],
max_iter: 0,
error: 0f64,
method: LevenbergMarquardt,
option: default_option,
hyperparams: HashMap::new(),
}
}
pub fn get_domain(&self) -> Vec<f64> {
self.domain.clone()
}
pub fn get_error(&self) -> f64 {
self.error
}
pub fn get_hyperparam(&self, key: &str) -> Option<&f64> {
self.hyperparams.get(key)
}
pub fn set_init_param(&mut self, p: Vec<f64>) -> &mut Self {
if let Some(x) = self.option.get_mut(&InitParam) {
*x = true
}
self.param = p.to_ad_vec();
self
}
pub fn set_max_iter(&mut self, n: usize) -> &mut Self {
if let Some(x) = self.option.get_mut(&MaxIter) {
*x = true
}
self.max_iter = n;
self
}
pub fn set_method(&mut self, method: OptMethod) -> &mut Self {
self.method = method;
self
}
pub fn set_lr(&mut self, lr: f64) -> &mut Self {
self.hyperparams.insert("lr".to_string(), lr);
self
}
pub fn set_lambda_init(&mut self, lambda_init: f64) -> &mut Self {
self.hyperparams
.insert("lambda_init".to_string(), lambda_init);
self
}
pub fn set_lambda_max(&mut self, lambda_max: f64) -> &mut Self {
self.hyperparams
.insert("lambda_max".to_string(), lambda_max);
self
}
pub fn optimize(&mut self) -> Vec<f64> {
let (x_vec, y_vec) = (self.domain.clone(), self.observed.clone());
let (p_init, max_iter) = (self.param.clone(), self.max_iter);
let safe_f = |p: &Vec<AD>| (self.func)(&x_vec, p.clone()).unwrap();
let unsafe_f = |p: Vec<AD>| (self.func)(&x_vec, p);
let p_init_vec = p_init.to_f64_vec();
let y = y_vec.to_col();
let mut p: Matrix = p_init_vec.clone().into();
let mut j = jacobian(safe_f, &p_init_vec);
let mut y_hat: Matrix = safe_f(&p_init).to_f64_vec().into();
let mut jtj = &j.t() * &j;
let mut valid_p = p.clone();
let mut err_stack = 0usize;
match self.method {
GradientDescent => {
let alpha = *self.hyperparams.get("lr").unwrap_or(&1e-3);
for i in 0..max_iter {
let h = alpha * j.t() * (&y - &y_hat);
let p_cand = &p + &h;
match unsafe_f(p_cand.data.to_ad_vec()) {
Some(value) => {
p = p_cand;
valid_p = p.clone();
err_stack = 0;
j = jacobian(safe_f, &p.data);
y_hat = value.to_f64_vec().into();
}
None => {
if i < max_iter - 1 && err_stack < 3 {
p = p_cand;
err_stack += 1;
} else {
p = valid_p;
break;
}
}
}
}
}
GaussNewton => unimplemented!(),
LevenbergMarquardt => {
let mut chi2 = ((&y - &y_hat).t() * (&y - &y_hat))[(0, 0)];
let mut nu = 2f64;
let lambda_0 = *self.hyperparams.get("lambda_init").unwrap_or(&1e-3);
let lambda_max = *self
.hyperparams
.get("lambda_max")
.unwrap_or(&f64::MAX.sqrt());
let mut lambda = lambda_0 * max(jtj.diag());
for i in 0..max_iter {
if lambda > lambda_max {
println!(
"Caution: At {}-th iter, lambda exceeds max value: {}",
i + 1,
lambda
);
break;
}
let b_lu = (jtj.clone() + lambda * jtj.to_diag()).lu();
if b_lu.det() == 0f64 {
break;
}
let b = b_lu.inv();
let h: Matrix = b * j.t() * (&y - &y_hat);
let p_temp = &p + &h;
match unsafe_f(p_temp.data.to_ad_vec()) {
Some(value) => {
let j_temp = jacobian(safe_f, &p.data);
let y_hat_temp: Matrix = value.to_f64_vec().into();
let chi2_temp = ((&y - &y_hat_temp).t() * (&y - &y_hat_temp))[(0, 0)];
let rho = (chi2 - chi2_temp)
/ (h.t()
* (lambda * jtj.to_diag() * h.clone() + j.t() * (&y - &y_hat)))
[(0, 0)];
if rho > 0f64 {
p = p_temp;
valid_p = p.clone();
err_stack = 0;
j = j_temp;
jtj = &j.t() * &j;
y_hat = y_hat_temp;
chi2 = chi2_temp;
lambda *=
max(vec![1f64 / 3f64, 1f64 - (2f64 * rho - 1f64).powi(3)]);
nu = 2f64;
} else {
lambda *= nu;
nu *= 2f64;
}
}
None => {
if i < max_iter - 1 && err_stack < 3 {
p = p_temp;
err_stack += 1;
} else {
p = valid_p;
break;
}
}
}
}
}
}
let error_temp = &y - &y_hat;
self.error = ((error_temp.t() * error_temp)[(0, 0)] / y.row as f64).sqrt();
p.data
}
}