#[derive(Debug)]
pub enum Status {
Converged((f64, f64)),
Abnormal,
TooManyIterations,
}
pub trait Callback {
fn eval(&self, x: &Vec<f64>) -> f64;
fn grad(&self, x: &Vec<f64>, g: &mut Vec<f64>);
fn ndim(&self) -> usize;
}
pub fn optimize<F: Callback>(func: &F, p: &mut Vec<f64>, itmax: i32, toll: f64) -> Status {
let n: usize = func.ndim();
assert_eq!(n, p.len(), "dimension not matching for the input vector!");
let mut dg: Vec<f64> = vec![0.0; n];
let mut g: Vec<f64> = vec![0.0; n];
let mut hdg: Vec<f64> = vec![0.0; n];
let mut xi: Vec<f64> = vec![0.0; n];
let mut hessian: Vec<Vec<f64>> = (0..n)
.map(|i| (0..n).map(|j| if i == j { 1.0 } else { 0.0 }).collect())
.collect();
let mut fp = func.eval(p);
func.grad(p, &mut g);
let mut delta_f = 0.0;
let mut g0norm = nrm2(&g);
xi.clone_from_slice(&g);
xi = scal(-1.0 / g0norm, &xi);
let mut pnorm = nrm2(&xi);
let mut fp0 = -g0norm;
let mut alpha;
for _ in 0..itmax {
if pnorm.abs() <= std::f64::EPSILON
|| g0norm.abs() <= std::f64::EPSILON
|| fp.abs() <= std::f64::EPSILON
{
return Status::Abnormal;
}
if delta_f < 0.0 {
let del = { -1.0_f64 * delta_f }.max(10.0 * std::f64::EPSILON * fp.abs());
alpha = 1.0_f64.min(2.0 * del / (-fp0));
} else {
alpha = STPMX.abs();
}
if !search(func, p, &mut xi, &mut alpha) {
return Status::Abnormal;
}
let pnew: Vec<f64> = axay(1.0, &p, alpha, &xi);
let fret = func.eval(&pnew);
delta_f = fret - fp;
fp = fret;
xi = axay(1.0, &pnew, -1.0, &p);
p.clone_from_slice(&pnew);
if delta_f.abs() < TOLX {
return Status::Converged((fret, g0norm));
}
let mut test = xi
.iter()
.zip(p.iter())
.map(|(&xi_i, &p_i)| xi_i.abs() / 1.0_f64.max(p_i.abs()))
.reduce(f64::max)
.unwrap();
if test < TOLX {
return Status::Converged((fret, g0norm));
}
dg.clone_from_slice(&g);
func.grad(&p, &mut g);
let den = 1.0_f64.max(fret.abs());
test = g
.iter()
.zip(p.iter())
.map(|(&g_i, &p_i)| g_i.abs() * 1.0_f64.max(p_i.abs()) / den)
.reduce(f64::max)
.unwrap();
if test < toll {
return Status::Converged((fret, nrm2(&g)));
}
dg = axay(1.0, &g, -1.0, &dg);
hdg.iter_mut().zip(hessian.iter()).for_each(|(hdg_i, row)| {
*hdg_i = dot(row, &dg);
});
let fac = dot(&dg, &xi);
let fae = dot(&dg, &hdg);
let sumdg = dot(&dg, &dg);
let sumxi = dot(&xi, &xi);
if fac > (std::f64::EPSILON * sumdg * sumxi).sqrt() {
let fac = 1.0 / fac;
let fad = 1.0 / fae;
dg = axay(fac, &xi, -fad, &hdg);
(0..n).for_each(|i| {
(0..n).skip(i).for_each(|j| {
hessian[i][j] +=
fac * xi[i] * xi[j] - fad * hdg[i] * hdg[j] + fae * dg[i] * dg[j];
hessian[j][i] = hessian[i][j];
})
});
xi = hessian.iter().map(|row| -dot(&row, &g)).collect();
}
g0norm = nrm2(&g);
pnorm = nrm2(&xi);
xi = scal(1.0 / pnorm, &xi);
pnorm = nrm2(&xi);
fp0 = dot(&xi, &g);
}
Status::TooManyIterations
}
fn search<F: Callback>(
func: &F,
xold: &mut Vec<f64>,
p: &mut Vec<f64>,
alpha_new: &mut f64,
) -> bool {
let n = func.ndim();
let mut alpha = *alpha_new;
let mut alpha_prev: f64 = 0.0;
let mut x = xold.clone();
let mut df = vec![0_f64; n];
let f0: f64 = func.eval(&x);
func.grad(&x, &mut df);
let fp0: f64 = dot(&df, p);
let mut falpha_prev = f0;
let mut fpalpha_prev = fp0;
let mut a: f64 = 0.0;
let mut b = alpha;
let mut fa = f0;
let mut fb: f64 = 0.0;
let mut fpa = fp0;
let mut fpb: f64 = 0.0;
for _ in 0..BRACKET_ITERS {
x = axay(1.0, &xold, alpha, &p);
let falpha = func.eval(&x);
if falpha > (f0 + alpha * RHO * fp0) || falpha >= falpha_prev {
a = alpha_prev;
fa = falpha_prev;
fpa = fpalpha_prev;
b = alpha;
fb = falpha;
fpb = std::f64::NAN;
break;
}
func.grad(&x, &mut df);
let fpalpha = dot(&df, p);
if fpalpha.abs() <= -SIGMA * fp0 {
*alpha_new = alpha;
return true;
}
if fpalpha >= 0.0 {
a = alpha;
fa = falpha;
fpa = fpalpha;
b = alpha_prev;
fb = falpha_prev;
fpb = fpalpha_prev;
break;
}
let delta = alpha - alpha_prev;
let lower = alpha + delta;
let upper = alpha + TAU1 * delta;
let alpha_next = interpolate(
alpha_prev,
falpha_prev,
fpalpha_prev,
alpha,
falpha,
fpalpha,
lower,
upper,
ORDER,
);
alpha_prev = alpha;
falpha_prev = falpha;
fpalpha_prev = fpalpha;
alpha = alpha_next;
}
for _ in 0..SECTION_ITERS {
let delta = b - a;
{
let lower = a + TAU2 * delta;
let upper = b - TAU3 * delta;
alpha = interpolate(a, fa, fpa, b, fb, fpb, lower, upper, ORDER);
}
x = axay(1.0, &xold, alpha, &p);
let falpha = func.eval(&x);
if (a - alpha) * fpa <= std::f64::EPSILON {
return false;
}
if falpha > f0 + RHO * alpha * fp0 || falpha >= fa {
b = alpha;
fb = falpha;
fpb = std::f64::NAN;
} else {
func.grad(&x, &mut df);
let fpalpha = dot(&df, p);
if fpalpha.abs() <= -SIGMA * fp0 {
*alpha_new = alpha;
return true;
}
if ((b - a) >= 0.0 && fpalpha >= 0.0) || ((b - a) <= 0.0 && fpalpha <= 0.0) {
b = a;
fb = fa;
fpb = fpa;
a = alpha;
fa = falpha;
fpa = fpalpha;
} else {
a = alpha;
fa = falpha;
fpa = fpalpha;
}
}
}
*alpha_new = alpha;
true
}
fn solve_quadratic(a: f64, b: f64, c: f64) -> (Option<f64>, Option<f64>) {
let disc = b * b - 4_f64 * a * c;
if a.abs() <= std::f64::EPSILON {
if b.abs() <= std::f64::EPSILON {
return (None, None);
} else {
let x0 = -c / b;
return (Some(x0), None);
}
}
if disc > 0_f64 {
if b.abs() <= std::f64::EPSILON {
let r = (0.5 * disc.sqrt() / a).abs();
return (Some(-r), Some(r));
} else {
let sgnb = {
if b > 0_f64 {
0_f64
} else {
-1_f64
}
};
let temp = -0.5 * (b + sgnb + disc.sqrt());
let r1 = temp / a;
let r2 = c / temp;
if r1 < r2 {
return (Some(r1), Some(r2));
} else {
return (Some(r2), Some(r1));
}
}
} else if disc.abs() <= std::f64::EPSILON {
return (Some(-0.5 * b / a), Some(-0.5 * b / a));
} else {
return (None, None);
}
}
fn interp_quad(f0: f64, fp0: f64, f1: f64, zl: f64, zh: f64) -> f64 {
let fl = f0 + zl * (fp0 + zl * (f1 - f0 - fp0));
let fh = f0 + zh * (fp0 + zh * (f1 - f0 - fp0));
let c = 2_f64 * (f1 - f0 - fp0);
let mut zmin = zl;
let mut _fmin = fl;
if fh < _fmin {
zmin = zh;
_fmin = fh;
}
if c > 0_f64 {
let z = -fp0 / c;
if z > zl && z < zh {
let f = f0 + z * (fp0 + z * (f1 - f0 - fp0));
if f < _fmin {
zmin = z;
_fmin = f;
}
}
}
zmin
}
fn cubic(c0: f64, c1: f64, c2: f64, c3: f64, z: f64) -> f64 {
c0 + z * (c1 + z * (c2 + z * c3))
}
fn check_extremum(c0: f64, c1: f64, c2: f64, c3: f64, z: f64, zmin: &mut f64, fmin: &mut f64) {
let y = cubic(c0, c1, c2, c3, z);
if y < *fmin {
*zmin = z;
*fmin = y;
}
}
fn interp_cubic(f0: f64, fp0: f64, f1: f64, fp1: f64, zl: f64, zh: f64) -> f64 {
let eta = 3_f64 * (f1 - f0) - 2_f64 * fp0 - fp1;
let xi = fp0 + fp1 - 2_f64 * (f1 - f0);
let c0 = f0;
let c1 = fp0;
let c2 = eta;
let c3 = xi;
let mut zmin = zl;
let mut fmin = cubic(c0, c1, c2, c3, zl);
check_extremum(c0, c1, c2, c3, zh, &mut zmin, &mut fmin);
let sols = solve_quadratic(3_f64 * c3, 2_f64 * c2, c1);
if let Some(z0) = sols.0 {
match sols.1 {
Some(z1) => {
if z0 > zl && z0 < zh {
check_extremum(c0, c1, c2, c3, z0, &mut zmin, &mut fmin);
}
if z1 > zl && z1 < zh {
check_extremum(c0, c1, c2, c3, z1, &mut zmin, &mut fmin);
}
}
None => {
if z0 > zl && z0 < zh {
check_extremum(c0, c1, c2, c3, z0, &mut zmin, &mut fmin);
}
}
}
}
zmin
}
fn interpolate(
a: f64,
fa: f64,
fpa: f64,
b: f64,
fb: f64,
fpb: f64,
xmin: f64,
xmax: f64,
order: i32,
) -> f64 {
let mut zmin = (xmin - a) / (b - a);
let mut zmax = (xmax - a) / (b - a);
if zmin > zmax {
let tmp = zmin;
zmin = zmax;
zmax = tmp;
}
let z: f64;
if order > 2 && !fpb.is_nan() {
z = interp_cubic(fa, fpa * (b - a), fb, fpb * (b - a), zmin, zmax);
} else {
z = interp_quad(fa, fpa * (b - a), fb, zmin, zmax);
}
a + z * (b - a)
}
fn dot(x: &Vec<f64>, y: &Vec<f64>) -> f64 {
x.iter().zip(y.iter()).map(|(x, y)| x * y).sum()
}
fn nrm2(x: &Vec<f64>) -> f64 {
x.iter().map(|a| a * a).sum::<f64>().sqrt()
}
fn scal(fac: f64, x: &Vec<f64>) -> Vec<f64> {
x.iter().map(|a| fac * a).collect()
}
fn axay(alpha: f64, x: &Vec<f64>, beta: f64, y: &Vec<f64>) -> Vec<f64> {
x.iter()
.zip(y.iter())
.map(|(a, b)| alpha * a + beta * b)
.collect()
}
const ORDER: i32 = 3;
const RHO: f64 = 0.01;
const SIGMA: f64 = 0.1;
const TAU1: f64 = 9.0;
const TAU2: f64 = 0.05;
const TAU3: f64 = 0.5;
const STPMX: f64 = 1.0;
const TOLX: f64 = 4.0 * std::f64::EPSILON;
const BRACKET_ITERS: i32 = 100;
const SECTION_ITERS: i32 = 100;
#[cfg(test)]
mod tests {
const PASS_LEVEL: f64 = 1e-04;
const TOLL: f64 = 1e-15;
const ITERMAX: i32 = 5000;
use crate::{optimize, Callback};
use rand::Rng;
fn euclidean_distance(v1: &Vec<f64>, v2: &Vec<f64>) -> f64 {
v1.iter()
.zip(v2.iter())
.map(|(a, b)| (a - b).powi(2))
.sum::<f64>()
.sqrt()
}
fn test_optimization_for<T: Callback>(func: T, global_minimum: Vec<f64>) {
let mut rng = rand::thread_rng();
let mut p: Vec<f64> = (0..func.ndim())
.map(|_| rng.gen_range(-0.25..0.25))
.collect();
optimize(&func, &mut p, ITERMAX, TOLL);
assert!(euclidean_distance(&p, &global_minimum) < PASS_LEVEL);
}
struct Rosenbrock;
struct Booth;
struct McCormick;
struct Rastrigin;
struct Sphere;
impl Callback for Rosenbrock {
fn eval(&self, x: &Vec<f64>) -> f64 {
(1.0 - x[0]).powi(2) + 100.0 * (x[1] - x[0].powi(2)).powi(2)
}
fn grad(&self, x: &Vec<f64>, g: &mut Vec<f64>) {
g[0] = 400.0 * x[0].powi(3) - 400.0 * x[0] * x[1] + 2.0 * x[0] - 2.0;
g[1] = 200.0 * (x[1] - x[0].powi(2));
}
fn ndim(&self) -> usize {
2
}
}
impl Callback for Booth {
fn eval(&self, x: &Vec<f64>) -> f64 {
(x[0] + 2.0 * x[1] - 7.0).powi(2) + (2.0 * x[0] + x[1] - 5.0).powi(2)
}
fn grad(&self, x: &Vec<f64>, g: &mut Vec<f64>) {
g[0] = 2.0 * (x[0] + 2.0 * x[1] - 7.0) + 4.0 * (2.0 * x[0] + x[1] - 5.0);
g[1] = 4.0 * (x[0] + 2.0 * x[1] - 7.0) + 2.0 * (2.0 * x[0] + x[1] - 5.0);
}
fn ndim(&self) -> usize {
2
}
}
impl Callback for McCormick {
fn eval(&self, x: &Vec<f64>) -> f64 {
(x[0] + x[1]).sin() + (x[0] - x[1]).powi(2) - 1.5 * x[0] + 2.5 * x[1] + 1.0
}
fn grad(&self, x: &Vec<f64>, g: &mut Vec<f64>) {
g[0] = (x[0] + x[1]).cos() + 2.0 * x[0] - 2.0 * x[1] - 1.5;
g[1] = (x[0] + x[1]).cos() - 2.0 * x[0] + 2.0 * x[1] + 2.5;
}
fn ndim(&self) -> usize {
2
}
}
impl Callback for Rastrigin {
fn eval(&self, x: &Vec<f64>) -> f64 {
let fac = 2.0 * std::f64::consts::PI;
x.iter()
.map(|a| a.powi(2) - 10.0 * (fac * a).cos())
.sum::<f64>()
}
fn grad(&self, x: &Vec<f64>, g: &mut Vec<f64>) {
let fac = 2.0 * std::f64::consts::PI;
x.iter().zip(g.iter_mut()).for_each(|(a, b)| {
*b = *a * 2.0 + 10.0 * fac * (*a * fac).sin();
});
}
fn ndim(&self) -> usize {
100
}
}
impl Callback for Sphere {
fn eval(&self, x: &Vec<f64>) -> f64 {
x.iter().map(|a| a.powi(2)).sum::<f64>()
}
fn grad(&self, x: &Vec<f64>, g: &mut Vec<f64>) {
x.iter().zip(g.iter_mut()).for_each(|(a, b)| {
*b = *a * 2.0;
});
}
fn ndim(&self) -> usize {
100
}
}
#[test]
fn test_for_rosenbrock() {
let global_minimum: Vec<f64> = vec![1.0, 1.0];
test_optimization_for(Rosenbrock, global_minimum);
}
#[test]
fn test_for_booth() {
let global_minimum: Vec<f64> = vec![1.0, 3.0];
test_optimization_for(Booth, global_minimum);
}
#[test]
fn test_for_mccormick() {
let global_minimum: Vec<f64> = vec![-0.54719, -1.54719];
test_optimization_for(McCormick, global_minimum);
}
#[test]
fn test_for_rastrigin() {
let global_minimum: Vec<f64> = vec![0.0; 100];
test_optimization_for(Rastrigin, global_minimum);
}
#[test]
fn test_for_sphere() {
let global_minimum: Vec<f64> = vec![0.0; 100];
test_optimization_for(Sphere, global_minimum);
}
}